1 #ifdef PETSC_RCS_HEADER 2 static char vcid[] = "$Id: aijfact.c,v 1.89 1997/09/18 16:47:24 balay Exp bsmith $"; 3 #endif 4 5 #include "src/mat/impls/aij/seq/aij.h" 6 #include "src/vec/vecimpl.h" 7 #include "src/inline/dot.h" 8 9 #undef __FUNC__ 10 #define __FUNC__ "MatOrder_Flow_SeqAIJ" 11 int MatOrder_Flow_SeqAIJ(Mat mat,MatReordering type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 16 } 17 18 /* 19 Factorization code for AIJ format. 20 */ 21 #undef __FUNC__ 22 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 23 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 24 { 25 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 26 IS isicol; 27 int *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j; 28 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 29 int *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im; 30 31 PetscFunctionBegin; 32 PetscValidHeaderSpecific(isrow,IS_COOKIE); 33 PetscValidHeaderSpecific(iscol,IS_COOKIE); 34 35 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 36 ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 37 38 /* get new row pointers */ 39 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew); 40 ainew[0] = -shift; 41 /* don't know how many column pointers are needed so estimate */ 42 jmax = (int) (f*ai[n]+(!shift)); 43 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew); 44 /* fill is a linked list of nonzeros in active row */ 45 fill = (int *) PetscMalloc( (2*n+1)*sizeof(int)); CHKPTRQ(fill); 46 im = fill + n + 1; 47 /* idnew is location of diagonal in factor */ 48 idnew = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(idnew); 49 idnew[0] = -shift; 50 51 for ( i=0; i<n; i++ ) { 52 /* first copy previous fill into linked list */ 53 nnz = nz = ai[r[i]+1] - ai[r[i]]; 54 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 55 ajtmp = aj + ai[r[i]] + shift; 56 fill[n] = n; 57 while (nz--) { 58 fm = n; 59 idx = ic[*ajtmp++ + shift]; 60 do { 61 m = fm; 62 fm = fill[m]; 63 } while (fm < idx); 64 fill[m] = idx; 65 fill[idx] = fm; 66 } 67 row = fill[n]; 68 while ( row < i ) { 69 ajtmp = ajnew + idnew[row] + (!shift); 70 nzbd = 1 + idnew[row] - ainew[row]; 71 nz = im[row] - nzbd; 72 fm = row; 73 while (nz-- > 0) { 74 idx = *ajtmp++ + shift; 75 nzbd++; 76 if (idx == i) im[row] = nzbd; 77 do { 78 m = fm; 79 fm = fill[m]; 80 } while (fm < idx); 81 if (fm != idx) { 82 fill[m] = idx; 83 fill[idx] = fm; 84 fm = idx; 85 nnz++; 86 } 87 } 88 row = fill[row]; 89 } 90 /* copy new filled row into permanent storage */ 91 ainew[i+1] = ainew[i] + nnz; 92 if (ainew[i+1] > jmax) { 93 /* allocate a longer ajnew */ 94 int maxadd; 95 maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); 96 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 97 jmax += maxadd; 98 ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 99 PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int)); 100 PetscFree(ajnew); 101 ajnew = ajtmp; 102 realloc++; /* count how many times we realloc */ 103 } 104 ajtmp = ajnew + ainew[i] + shift; 105 fm = fill[n]; 106 nzi = 0; 107 im[i] = nnz; 108 while (nnz--) { 109 if (fm < i) nzi++; 110 *ajtmp++ = fm - shift; 111 fm = fill[fm]; 112 } 113 idnew[i] = ainew[i] + nzi; 114 } 115 if (ai[n] != 0) { 116 double af = ((double)ainew[n])/((double)ai[n]); 117 PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 118 realloc,f,af); 119 PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 120 PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 121 PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 122 } else { 123 PLogInfo(A,"Info:MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 124 } 125 126 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 127 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 128 129 PetscFree(fill); 130 131 /* put together the new matrix */ 132 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B); CHKERRQ(ierr); 133 PLogObjectParent(*B,isicol); 134 ierr = ISDestroy(isicol); CHKERRQ(ierr); 135 b = (Mat_SeqAIJ *) (*B)->data; 136 PetscFree(b->imax); 137 b->singlemalloc = 0; 138 /* the next line frees the default space generated by the Create() */ 139 PetscFree(b->a); PetscFree(b->ilen); 140 b->a = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 141 b->j = ajnew; 142 b->i = ainew; 143 b->diag = idnew; 144 b->ilen = 0; 145 b->imax = 0; 146 b->row = isrow; 147 b->col = iscol; 148 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 149 /* In b structure: Free imax, ilen, old a, old j. 150 Allocate idnew, solve_work, new a, new j */ 151 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 152 b->maxnz = b->nz = ainew[n] + shift; 153 154 (*B)->info.factor_mallocs = realloc; 155 (*B)->info.fill_ratio_given = f; 156 if (ai[i] != 0) { 157 (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[i]); 158 } else { 159 (*B)->info.fill_ratio_needed = 0.0; 160 } 161 162 PetscFunctionReturn(0); 163 } 164 /* ----------------------------------------------------------- */ 165 int Mat_AIJ_CheckInode(Mat); 166 167 #undef __FUNC__ 168 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 169 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 170 { 171 Mat C = *B; 172 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data; 173 IS iscol = b->col, isrow = b->row, isicol; 174 int *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j; 175 int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 176 int *diag_offset = b->diag,diag,k; 177 int preserve_row_sums = (int) a->ilu_preserve_row_sums; 178 register int *pj; 179 Scalar *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0; 180 double ssum; 181 register Scalar *pv, *rtmps,*u_values; 182 183 PetscFunctionBegin; 184 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 185 PLogObjectParent(*B,isicol); 186 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 187 ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 188 rtmp = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) ); CHKPTRQ(rtmp); 189 PetscMemzero(rtmp,(n+1)*sizeof(Scalar)); 190 rtmps = rtmp + shift; ics = ic + shift; 191 192 /* precalcuate row sums */ 193 if (preserve_row_sums) { 194 rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) ); CHKPTRQ(rowsums); 195 for ( i=0; i<n; i++ ) { 196 nz = a->i[r[i]+1] - a->i[r[i]]; 197 v = a->a + a->i[r[i]] + shift; 198 sum = 0.0; 199 for ( j=0; j<nz; j++ ) sum += v[j]; 200 rowsums[i] = sum; 201 } 202 } 203 204 for ( i=0; i<n; i++ ) { 205 nz = ai[i+1] - ai[i]; 206 ajtmp = aj + ai[i] + shift; 207 for ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0; 208 209 /* load in initial (unfactored row) */ 210 nz = a->i[r[i]+1] - a->i[r[i]]; 211 ajtmpold = a->j + a->i[r[i]] + shift; 212 v = a->a + a->i[r[i]] + shift; 213 for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] = v[j]; 214 215 row = *ajtmp++ + shift; 216 while (row < i ) { 217 pc = rtmp + row; 218 if (*pc != 0.0) { 219 pv = b->a + diag_offset[row] + shift; 220 pj = b->j + diag_offset[row] + (!shift); 221 multiplier = *pc / *pv++; 222 *pc = multiplier; 223 nz = ai[row+1] - diag_offset[row] - 1; 224 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 225 PLogFlops(2*nz); 226 } 227 row = *ajtmp++ + shift; 228 } 229 /* finished row so stick it into b->a */ 230 pv = b->a + ai[i] + shift; 231 pj = b->j + ai[i] + shift; 232 nz = ai[i+1] - ai[i]; 233 for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];} 234 diag = diag_offset[i] - ai[i]; 235 /* 236 Possibly adjust diagonal entry on current row to force 237 LU matrix to have same row sum as initial matrix. 238 */ 239 if (pv[diag] == 0.0) { 240 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot"); 241 } 242 if (preserve_row_sums) { 243 pj = b->j + ai[i] + shift; 244 sum = rowsums[i]; 245 for ( j=0; j<diag; j++ ) { 246 u_values = b->a + diag_offset[pj[j]] + shift; 247 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 248 inner_sum = 0.0; 249 for ( k=0; k<nz; k++ ) { 250 inner_sum += u_values[k]; 251 } 252 sum -= pv[j]*inner_sum; 253 254 } 255 nz = ai[i+1] - diag_offset[i] - 1; 256 u_values = b->a + diag_offset[i] + 1 + shift; 257 for ( k=0; k<nz; k++ ) { 258 sum -= u_values[k]; 259 } 260 ssum = PetscAbsScalar(sum/pv[diag]); 261 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 262 } 263 /* check pivot entry for current row */ 264 } 265 266 /* invert diagonal entries for simplier triangular solves */ 267 for ( i=0; i<n; i++ ) { 268 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 269 } 270 271 if (preserve_row_sums) PetscFree(rowsums); 272 PetscFree(rtmp); 273 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 274 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 275 ierr = ISDestroy(isicol); CHKERRQ(ierr); 276 C->factor = FACTOR_LU; 277 ierr = Mat_AIJ_CheckInode(C); CHKERRQ(ierr); 278 C->assembled = PETSC_TRUE; 279 PLogFlops(b->n); 280 PetscFunctionReturn(0); 281 } 282 /* ----------------------------------------------------------- */ 283 #undef __FUNC__ 284 #define __FUNC__ "MatLUFactor_SeqAIJ" 285 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f) 286 { 287 Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data; 288 int ierr; 289 Mat C; 290 291 PetscFunctionBegin; 292 ierr = MatLUFactorSymbolic(A,row,col,f,&C); CHKERRQ(ierr); 293 ierr = MatLUFactorNumeric(A,&C); CHKERRQ(ierr); 294 295 /* free all the data structures from mat */ 296 PetscFree(mat->a); 297 if (!mat->singlemalloc) {PetscFree(mat->i); PetscFree(mat->j);} 298 if (mat->diag) PetscFree(mat->diag); 299 if (mat->ilen) PetscFree(mat->ilen); 300 if (mat->imax) PetscFree(mat->imax); 301 if (mat->solve_work) PetscFree(mat->solve_work); 302 if (mat->inode.size) PetscFree(mat->inode.size); 303 PetscFree(mat); 304 305 PetscMemcpy(A,C,sizeof(struct _p_Mat)); 306 PetscHeaderDestroy(C); 307 PetscFunctionReturn(0); 308 } 309 /* ----------------------------------------------------------- */ 310 #undef __FUNC__ 311 #define __FUNC__ "MatSolve_SeqAIJ" 312 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx) 313 { 314 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 315 IS iscol = a->col, isrow = a->row; 316 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 317 int nz,shift = a->indexshift,*rout,*cout; 318 Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 319 320 PetscFunctionBegin; 321 if (!n) PetscFunctionReturn(0); 322 323 VecGetArray_Fast(bb,b); 324 VecGetArray_Fast(xx,x); 325 tmp = a->solve_work; 326 327 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 328 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 329 330 /* forward solve the lower triangular */ 331 tmp[0] = b[*r++]; 332 tmps = tmp + shift; 333 for ( i=1; i<n; i++ ) { 334 v = aa + ai[i] + shift; 335 vi = aj + ai[i] + shift; 336 nz = a->diag[i] - ai[i]; 337 sum = b[*r++]; 338 while (nz--) sum -= *v++ * tmps[*vi++]; 339 tmp[i] = sum; 340 } 341 342 /* backward solve the upper triangular */ 343 for ( i=n-1; i>=0; i-- ){ 344 v = aa + a->diag[i] + (!shift); 345 vi = aj + a->diag[i] + (!shift); 346 nz = ai[i+1] - a->diag[i] - 1; 347 sum = tmp[i]; 348 while (nz--) sum -= *v++ * tmps[*vi++]; 349 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 350 } 351 352 ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 353 ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 354 PLogFlops(2*a->nz - a->n); 355 PetscFunctionReturn(0); 356 } 357 358 /* ----------------------------------------------------------- */ 359 #undef __FUNC__ 360 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 361 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx) 362 { 363 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 364 int i, n = a->m, *vi, *ai = a->i, *aj = a->j,nz, *adiag = a->diag; 365 int ai_i, adiag_i,ierr; 366 Scalar *x,*b, *aa = a->a, sum, *v; 367 368 PetscFunctionBegin; 369 if (!n) PetscFunctionReturn(0); 370 if (a->indexshift) { 371 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 372 PetscFunctionReturn(0); 373 } 374 375 VecGetArray_Fast(bb,b); 376 VecGetArray_Fast(xx,x); 377 378 #if defined(USE_FORTRAN_KERNELS) 379 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 380 #else 381 /* forward solve the lower triangular */ 382 x[0] = b[0]; 383 for ( i=1; i<n; i++ ) { 384 ai_i = ai[i]; 385 v = aa + ai_i; 386 vi = aj + ai_i; 387 nz = adiag[i] - ai_i; 388 sum = b[i]; 389 while (nz--) sum -= *v++ * x[*vi++]; 390 x[i] = sum; 391 } 392 393 /* backward solve the upper triangular */ 394 for ( i=n-1; i>=0; i-- ){ 395 adiag_i = adiag[i]; 396 v = aa + adiag_i + 1; 397 vi = aj + adiag_i + 1; 398 nz = ai[i+1] - adiag_i - 1; 399 sum = x[i]; 400 while (nz--) sum -= *v++ * x[*vi++]; 401 x[i] = sum*aa[adiag_i]; 402 } 403 #endif 404 PLogFlops(2*a->nz - a->n); 405 PetscFunctionReturn(0); 406 } 407 408 #undef __FUNC__ 409 #define __FUNC__ "MatSolveAdd_SeqAIJ" 410 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx) 411 { 412 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 413 IS iscol = a->col, isrow = a->row; 414 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 415 int nz, shift = a->indexshift,*rout,*cout; 416 Scalar *x,*b,*tmp, *aa = a->a, sum, *v; 417 418 PetscFunctionBegin; 419 if (yy != xx) {ierr = VecCopy(yy,xx); CHKERRQ(ierr);} 420 421 VecGetArray_Fast(bb,b); 422 VecGetArray_Fast(xx,x); 423 tmp = a->solve_work; 424 425 ierr = ISGetIndices(isrow,&rout); CHKERRQ(ierr); r = rout; 426 ierr = ISGetIndices(iscol,&cout); CHKERRQ(ierr); c = cout + (n-1); 427 428 /* forward solve the lower triangular */ 429 tmp[0] = b[*r++]; 430 for ( i=1; i<n; i++ ) { 431 v = aa + ai[i] + shift; 432 vi = aj + ai[i] + shift; 433 nz = a->diag[i] - ai[i]; 434 sum = b[*r++]; 435 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 436 tmp[i] = sum; 437 } 438 439 /* backward solve the upper triangular */ 440 for ( i=n-1; i>=0; i-- ){ 441 v = aa + a->diag[i] + (!shift); 442 vi = aj + a->diag[i] + (!shift); 443 nz = ai[i+1] - a->diag[i] - 1; 444 sum = tmp[i]; 445 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 446 tmp[i] = sum*aa[a->diag[i]+shift]; 447 x[*c--] += tmp[i]; 448 } 449 450 ierr = ISRestoreIndices(isrow,&rout); CHKERRQ(ierr); 451 ierr = ISRestoreIndices(iscol,&cout); CHKERRQ(ierr); 452 PLogFlops(2*a->nz); 453 454 PetscFunctionReturn(0); 455 } 456 /* -------------------------------------------------------------------*/ 457 #undef __FUNC__ 458 #define __FUNC__ "MatSolveTrans_SeqAIJ" 459 int MatSolveTrans_SeqAIJ(Mat A,Vec bb, Vec xx) 460 { 461 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 462 IS iscol = a->col, isrow = a->row, invisrow,inviscol; 463 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 464 int nz,shift = a->indexshift,*rout,*cout; 465 Scalar *x,*b,*tmp, *aa = a->a, *v; 466 467 PetscFunctionBegin; 468 VecGetArray_Fast(bb,b); 469 VecGetArray_Fast(xx,x); 470 tmp = a->solve_work; 471 472 /* invert the permutations */ 473 ierr = ISInvertPermutation(isrow,&invisrow); CHKERRQ(ierr); 474 ierr = ISInvertPermutation(iscol,&inviscol); CHKERRQ(ierr); 475 476 ierr = ISGetIndices(invisrow,&rout); CHKERRQ(ierr); r = rout; 477 ierr = ISGetIndices(inviscol,&cout); CHKERRQ(ierr); c = cout; 478 479 /* copy the b into temp work space according to permutation */ 480 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 481 482 /* forward solve the U^T */ 483 for ( i=0; i<n; i++ ) { 484 v = aa + a->diag[i] + shift; 485 vi = aj + a->diag[i] + (!shift); 486 nz = ai[i+1] - a->diag[i] - 1; 487 tmp[i] *= *v++; 488 while (nz--) { 489 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 490 } 491 } 492 493 /* backward solve the L^T */ 494 for ( i=n-1; i>=0; i-- ){ 495 v = aa + a->diag[i] - 1 + shift; 496 vi = aj + a->diag[i] - 1 + shift; 497 nz = a->diag[i] - ai[i]; 498 while (nz--) { 499 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 500 } 501 } 502 503 /* copy tmp into x according to permutation */ 504 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 505 506 ierr = ISRestoreIndices(invisrow,&rout); CHKERRQ(ierr); 507 ierr = ISRestoreIndices(inviscol,&cout); CHKERRQ(ierr); 508 ierr = ISDestroy(invisrow); CHKERRQ(ierr); 509 ierr = ISDestroy(inviscol); CHKERRQ(ierr); 510 511 PLogFlops(2*a->nz-a->n); 512 PetscFunctionReturn(0); 513 } 514 515 #undef __FUNC__ 516 #define __FUNC__ "MatSolveTransAdd_SeqAIJ" 517 int MatSolveTransAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx) 518 { 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 520 IS iscol = a->col, isrow = a->row, invisrow,inviscol; 521 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 522 int nz,shift = a->indexshift, *rout, *cout; 523 Scalar *x,*b,*tmp, *aa = a->a, *v; 524 525 PetscFunctionBegin; 526 if (zz != xx) VecCopy(zz,xx); 527 528 VecGetArray_Fast(bb,b); 529 VecGetArray_Fast(xx,x); 530 tmp = a->solve_work; 531 532 /* invert the permutations */ 533 ierr = ISInvertPermutation(isrow,&invisrow); CHKERRQ(ierr); 534 ierr = ISInvertPermutation(iscol,&inviscol); CHKERRQ(ierr); 535 ierr = ISGetIndices(invisrow,&rout); CHKERRQ(ierr); r = rout; 536 ierr = ISGetIndices(inviscol,&cout); CHKERRQ(ierr); c = cout; 537 538 /* copy the b into temp work space according to permutation */ 539 for ( i=0; i<n; i++ ) tmp[c[i]] = b[i]; 540 541 /* forward solve the U^T */ 542 for ( i=0; i<n; i++ ) { 543 v = aa + a->diag[i] + shift; 544 vi = aj + a->diag[i] + (!shift); 545 nz = ai[i+1] - a->diag[i] - 1; 546 tmp[i] *= *v++; 547 while (nz--) { 548 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 549 } 550 } 551 552 /* backward solve the L^T */ 553 for ( i=n-1; i>=0; i-- ){ 554 v = aa + a->diag[i] - 1 + shift; 555 vi = aj + a->diag[i] - 1 + shift; 556 nz = a->diag[i] - ai[i]; 557 while (nz--) { 558 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 559 } 560 } 561 562 /* copy tmp into x according to permutation */ 563 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 564 565 ierr = ISRestoreIndices(invisrow,&rout); CHKERRQ(ierr); 566 ierr = ISRestoreIndices(inviscol,&cout); CHKERRQ(ierr); 567 ierr = ISDestroy(invisrow); CHKERRQ(ierr); 568 ierr = ISDestroy(inviscol); CHKERRQ(ierr); 569 570 PLogFlops(2*a->nz); 571 PetscFunctionReturn(0); 572 } 573 /* ----------------------------------------------------------------*/ 574 575 #undef __FUNC__ 576 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 577 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,int levels,Mat *fact) 578 { 579 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 580 IS isicol; 581 int *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j; 582 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 583 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0; 584 int incrlev,nnz,i,shift = a->indexshift; 585 PetscTruth col_identity, row_identity; 586 587 PetscFunctionBegin; 588 /* special case that simply copies fill pattern */ 589 ISIdentity(isrow,&row_identity); ISIdentity(iscol,&col_identity); 590 if (levels == 0 && row_identity && col_identity) { 591 ierr = MatConvertSameType_SeqAIJ(A,fact,DO_NOT_COPY_VALUES); CHKERRQ(ierr); 592 (*fact)->factor = FACTOR_LU; 593 b = (Mat_SeqAIJ *) (*fact)->data; 594 if (!b->diag) { 595 ierr = MatMarkDiag_SeqAIJ(*fact); CHKERRQ(ierr); 596 } 597 b->row = isrow; 598 b->col = iscol; 599 b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 600 (*fact)->ops.solve = MatSolve_SeqAIJ_NaturalOrdering; 601 PetscFunctionReturn(0); 602 } 603 604 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 605 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 606 ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 607 608 /* get new row pointers */ 609 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew); 610 ainew[0] = -shift; 611 /* don't know how many column pointers are needed so estimate */ 612 jmax = (int) (f*(ai[n]+!shift)); 613 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew); 614 /* ajfill is level of fill for each fill entry */ 615 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill); 616 /* fill is a linked list of nonzeros in active row */ 617 fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill); 618 /* im is level for each filled value */ 619 im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im); 620 /* dloc is location of diagonal in factor */ 621 dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc); 622 dloc[0] = 0; 623 for ( prow=0; prow<n; prow++ ) { 624 /* first copy previous fill into linked list */ 625 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 626 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 627 xi = aj + ai[r[prow]] + shift; 628 fill[n] = n; 629 while (nz--) { 630 fm = n; 631 idx = ic[*xi++ + shift]; 632 do { 633 m = fm; 634 fm = fill[m]; 635 } while (fm < idx); 636 fill[m] = idx; 637 fill[idx] = fm; 638 im[idx] = 0; 639 } 640 nzi = 0; 641 row = fill[n]; 642 while ( row < prow ) { 643 incrlev = im[row] + 1; 644 nz = dloc[row]; 645 xi = ajnew + ainew[row] + shift + nz; 646 flev = ajfill + ainew[row] + shift + nz + 1; 647 nnz = ainew[row+1] - ainew[row] - nz - 1; 648 if (*xi++ + shift != row) { 649 SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot: try running with -pc_ilu_nonzeros_along_diagonal"); 650 } 651 fm = row; 652 while (nnz-- > 0) { 653 idx = *xi++ + shift; 654 if (*flev + incrlev > levels) { 655 flev++; 656 continue; 657 } 658 do { 659 m = fm; 660 fm = fill[m]; 661 } while (fm < idx); 662 if (fm != idx) { 663 im[idx] = *flev + incrlev; 664 fill[m] = idx; 665 fill[idx] = fm; 666 fm = idx; 667 nzf++; 668 } 669 else { 670 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 671 } 672 flev++; 673 } 674 row = fill[row]; 675 nzi++; 676 } 677 /* copy new filled row into permanent storage */ 678 ainew[prow+1] = ainew[prow] + nzf; 679 if (ainew[prow+1] > jmax-shift) { 680 /* allocate a longer ajnew */ 681 int maxadd; 682 maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); 683 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 684 jmax += maxadd; 685 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 686 PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int)); 687 PetscFree(ajnew); 688 ajnew = xi; 689 /* allocate a longer ajfill */ 690 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 691 PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int)); 692 PetscFree(ajfill); 693 ajfill = xi; 694 realloc++; 695 } 696 xi = ajnew + ainew[prow] + shift; 697 flev = ajfill + ainew[prow] + shift; 698 dloc[prow] = nzi; 699 fm = fill[n]; 700 while (nzf--) { 701 *xi++ = fm - shift; 702 *flev++ = im[fm]; 703 fm = fill[fm]; 704 } 705 } 706 PetscFree(ajfill); 707 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 708 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 709 ierr = ISDestroy(isicol); CHKERRQ(ierr); 710 PetscFree(fill); PetscFree(im); 711 712 { 713 double af = ((double)ainew[n])/((double)ai[n]); 714 PLogInfo(A,"Info:MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 715 realloc,f,af); 716 PLogInfo(A,"Info:MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 717 PLogInfo(A,"Info:MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 718 PLogInfo(A,"Info:MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 719 } 720 721 /* put together the new matrix */ 722 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact); CHKERRQ(ierr); 723 b = (Mat_SeqAIJ *) (*fact)->data; 724 PetscFree(b->imax); 725 b->singlemalloc = 0; 726 len = (ainew[n] + shift)*sizeof(Scalar); 727 /* the next line frees the default space generated by the Create() */ 728 PetscFree(b->a); PetscFree(b->ilen); 729 b->a = (Scalar *) PetscMalloc( len+1 ); CHKPTRQ(b->a); 730 b->j = ajnew; 731 b->i = ainew; 732 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 733 b->diag = dloc; 734 b->ilen = 0; 735 b->imax = 0; 736 b->row = isrow; 737 b->col = iscol; 738 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar)); 739 CHKPTRQ(b->solve_work); 740 /* In b structure: Free imax, ilen, old a, old j. 741 Allocate dloc, solve_work, new a, new j */ 742 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 743 b->maxnz = b->nz = ainew[n] + shift; 744 (*fact)->factor = FACTOR_LU; 745 746 (*fact)->info.factor_mallocs = realloc; 747 (*fact)->info.fill_ratio_given = f; 748 (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 749 750 PetscFunctionReturn(0); 751 } 752 753 754 755 756