1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/seq/aij.h" 4 #include "../src/inline/dot.h" 5 #include "../src/inline/spops.h" 6 #include "petscbt.h" 7 #include "../src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ_saad" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ_saad(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 const PetscInt *c,*r,*ic; 64 PetscInt i,n = A->rmap->n,*cc,*cr; 65 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 PetscInt *ordcol,*iwk,*iperm,*jw; 67 PetscInt jmax,lfill,job,*o_i,*o_j; 68 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 69 PetscReal af,dt,dtcount,dtcol,fill; 70 71 PetscFunctionBegin; 72 73 if (info->dt == PETSC_DEFAULT) dt = .005; else dt = info->dt; 74 if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); else dtcount = info->dtcount; 75 if (info->dtcol == PETSC_DEFAULT) dtcol = .01; else dtcol = info->dtcol; 76 if (info->fill == PETSC_DEFAULT) fill = ((double)(n*(dtcount+1)))/a->nz; else fill = info->fill; 77 lfill = (PetscInt)(dtcount/2.0); 78 jmax = (PetscInt)(fill*a->nz); 79 80 81 /* ------------------------------------------------------------ 82 If reorder=.TRUE., then the original matrix has to be 83 reordered to reflect the user selected ordering scheme, and 84 then de-reordered so it is in it's original format. 85 Because Saad's dperm() is NOT in place, we have to copy 86 the original matrix and allocate more storage. . . 87 ------------------------------------------------------------ 88 */ 89 90 /* set reorder to true if either isrow or iscol is not identity */ 91 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 92 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 93 reorder = PetscNot(reorder); 94 95 96 /* storage for ilu factor */ 97 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 100 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 101 102 /* ------------------------------------------------------------ 103 Make sure that everything is Fortran formatted (1-Based) 104 ------------------------------------------------------------ 105 */ 106 for (i=old_i[0];i<old_i[n];i++) { 107 old_j[i]++; 108 } 109 for(i=0;i<n+1;i++) { 110 old_i[i]++; 111 }; 112 113 114 if (reorder) { 115 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 116 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 117 ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 cr[i] = r[i]+1; 120 cc[i] = c[i]+1; 121 } 122 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 123 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 124 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job); 128 ierr = PetscFree2(cc,cr);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 222 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = MAT_FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 b->free_a = PETSC_TRUE; 230 b->free_ij = PETSC_TRUE; 231 b->singlemalloc = PETSC_FALSE; 232 b->a = new_a; 233 b->j = new_j; 234 b->i = new_i; 235 b->ilen = 0; 236 b->imax = 0; 237 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238 b->row = isirow; 239 b->col = iscolf; 240 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 241 b->maxnz = b->nz = new_i[n]; 242 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 243 (*fact)->info.factor_mallocs = 0; 244 245 af = ((double)b->nz)/((double)a->nz) + .001; 246 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr); 247 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 249 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 250 251 if (reorder) (*fact)->ops->solve = MatSolve_SeqAIJ; 252 else (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 253 (*fact)->ops->solveadd = MatSolveAdd_SeqAIJ; 254 (*fact)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 255 (*fact)->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 256 257 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 258 259 PetscFunctionReturn(0); 260 #endif 261 } 262 263 EXTERN_C_BEGIN 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 266 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 267 { 268 PetscFunctionBegin; 269 *flg = PETSC_TRUE; 270 PetscFunctionReturn(0); 271 } 272 EXTERN_C_END 273 274 EXTERN_C_BEGIN 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 277 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 278 { 279 PetscInt n = A->rmap->n; 280 PetscErrorCode ierr; 281 282 PetscFunctionBegin; 283 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 284 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 285 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 286 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 287 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 288 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 289 (*B)->ops->iludtfactor = MatILUDTFactor_SeqAIJ; 290 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 291 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 292 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 293 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 294 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 295 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 296 (*B)->factor = ftype; 297 PetscFunctionReturn(0); 298 } 299 EXTERN_C_END 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 303 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 304 { 305 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 306 IS isicol; 307 PetscErrorCode ierr; 308 const PetscInt *r,*ic; 309 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 310 PetscInt *bi,*bj,*ajtmp; 311 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 312 PetscReal f; 313 PetscInt nlnk,*lnk,k,**bi_ptr; 314 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 315 PetscBT lnkbt; 316 317 PetscFunctionBegin; 318 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 319 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 320 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 321 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 322 323 /* get new row pointers */ 324 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 325 bi[0] = 0; 326 327 /* bdiag is location of diagonal in factor */ 328 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 329 bdiag[0] = 0; 330 331 /* linked list for storing column indices of the active row */ 332 nlnk = n + 1; 333 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 334 335 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 336 337 /* initial FreeSpace size is f*(ai[n]+1) */ 338 f = info->fill; 339 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 340 current_space = free_space; 341 342 for (i=0; i<n; i++) { 343 /* copy previous fill into linked list */ 344 nzi = 0; 345 nnz = ai[r[i]+1] - ai[r[i]]; 346 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 347 ajtmp = aj + ai[r[i]]; 348 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 349 nzi += nlnk; 350 351 /* add pivot rows into linked list */ 352 row = lnk[n]; 353 while (row < i) { 354 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 355 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 356 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 357 nzi += nlnk; 358 row = lnk[row]; 359 } 360 bi[i+1] = bi[i] + nzi; 361 im[i] = nzi; 362 363 /* mark bdiag */ 364 nzbd = 0; 365 nnz = nzi; 366 k = lnk[n]; 367 while (nnz-- && k < i){ 368 nzbd++; 369 k = lnk[k]; 370 } 371 bdiag[i] = bi[i] + nzbd; 372 373 /* if free space is not available, make more free space */ 374 if (current_space->local_remaining<nzi) { 375 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 376 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 377 reallocs++; 378 } 379 380 /* copy data into free space, then initialize lnk */ 381 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 382 bi_ptr[i] = current_space->array; 383 current_space->array += nzi; 384 current_space->local_used += nzi; 385 current_space->local_remaining -= nzi; 386 } 387 #if defined(PETSC_USE_INFO) 388 if (ai[n] != 0) { 389 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 390 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 391 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 392 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 393 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 394 } else { 395 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 396 } 397 #endif 398 399 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 400 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 401 402 /* destroy list of free space and other temporary array(s) */ 403 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 404 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 405 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 406 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 407 408 /* put together the new matrix */ 409 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 410 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 411 b = (Mat_SeqAIJ*)(B)->data; 412 b->free_a = PETSC_TRUE; 413 b->free_ij = PETSC_TRUE; 414 b->singlemalloc = PETSC_FALSE; 415 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 416 b->j = bj; 417 b->i = bi; 418 b->diag = bdiag; 419 b->ilen = 0; 420 b->imax = 0; 421 b->row = isrow; 422 b->col = iscol; 423 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 424 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 425 b->icol = isicol; 426 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 427 428 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 429 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 430 b->maxnz = b->nz = bi[n] ; 431 432 (B)->factor = MAT_FACTOR_LU; 433 (B)->info.factor_mallocs = reallocs; 434 (B)->info.fill_ratio_given = f; 435 436 if (ai[n]) { 437 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 438 } else { 439 (B)->info.fill_ratio_needed = 0.0; 440 } 441 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 442 (B)->ops->solve = MatSolve_SeqAIJ; 443 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 444 /* switch to inodes if appropriate */ 445 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 /* 450 Trouble in factorization, should we dump the original matrix? 451 */ 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatFactorDumpMatrix" 454 PetscErrorCode MatFactorDumpMatrix(Mat A) 455 { 456 PetscErrorCode ierr; 457 PetscTruth flg = PETSC_FALSE; 458 459 PetscFunctionBegin; 460 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 461 if (flg) { 462 PetscViewer viewer; 463 char filename[PETSC_MAX_PATH_LEN]; 464 465 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 466 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 467 ierr = MatView(A,viewer);CHKERRQ(ierr); 468 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 469 } 470 PetscFunctionReturn(0); 471 } 472 473 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 474 475 /* ----------------------------------------------------------- */ 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 478 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 479 { 480 Mat C=B; 481 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 482 IS isrow = b->row,isicol = b->icol; 483 PetscErrorCode ierr; 484 const PetscInt *r,*ic,*ics; 485 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 486 PetscInt *ajtmp,*bjtmp,nz,row,*diag_offset = b->diag,diag,*pj; 487 MatScalar *rtmp,*pc,multiplier,*v,*pv,d,*aa=a->a; 488 PetscReal rs = 0.0; 489 LUShift_Ctx sctx; 490 PetscInt newshift,*ddiag; 491 492 PetscFunctionBegin; 493 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 494 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 495 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 496 ics = ic; 497 498 sctx.shift_top = 0; 499 sctx.nshift_max = 0; 500 sctx.shift_lo = 0; 501 sctx.shift_hi = 0; 502 sctx.shift_fraction = 0; 503 504 /* if both shift schemes are chosen by user, only use info->shiftpd */ 505 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 506 ddiag = a->diag; 507 sctx.shift_top = info->zeropivot; 508 for (i=0; i<n; i++) { 509 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 510 d = (aa)[ddiag[i]]; 511 rs = -PetscAbsScalar(d) - PetscRealPart(d); 512 v = aa+ai[i]; 513 nz = ai[i+1] - ai[i]; 514 for (j=0; j<nz; j++) 515 rs += PetscAbsScalar(v[j]); 516 if (rs>sctx.shift_top) sctx.shift_top = rs; 517 } 518 sctx.shift_top *= 1.1; 519 sctx.nshift_max = 5; 520 sctx.shift_lo = 0.; 521 sctx.shift_hi = 1.; 522 } 523 524 sctx.shift_amount = 0.0; 525 sctx.nshift = 0; 526 do { 527 sctx.lushift = PETSC_FALSE; 528 for (i=0; i<n; i++){ 529 nz = bi[i+1] - bi[i]; 530 bjtmp = bj + bi[i]; 531 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 532 533 /* load in initial (unfactored row) */ 534 nz = ai[r[i]+1] - ai[r[i]]; 535 ajtmp = aj + ai[r[i]]; 536 v = aa + ai[r[i]]; 537 for (j=0; j<nz; j++) { 538 rtmp[ics[ajtmp[j]]] = v[j]; 539 } 540 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 541 542 row = *bjtmp++; 543 while (row < i) { 544 pc = rtmp + row; 545 if (*pc != 0.0) { 546 pv = b->a + diag_offset[row]; 547 pj = b->j + diag_offset[row] + 1; 548 multiplier = *pc / *pv++; 549 *pc = multiplier; 550 nz = bi[row+1] - diag_offset[row] - 1; 551 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 552 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 553 } 554 row = *bjtmp++; 555 } 556 /* finished row so stick it into b->a */ 557 pv = b->a + bi[i] ; 558 pj = b->j + bi[i] ; 559 nz = bi[i+1] - bi[i]; 560 diag = diag_offset[i] - bi[i]; 561 rs = -PetscAbsScalar(pv[diag]); 562 for (j=0; j<nz; j++) { 563 pv[j] = rtmp[pj[j]]; 564 rs += PetscAbsScalar(pv[j]); 565 } 566 567 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 568 sctx.rs = rs; 569 sctx.pv = pv[diag]; 570 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 571 if (newshift == 1) break; 572 } 573 574 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 575 /* 576 * if no shift in this attempt & shifting & started shifting & can refine, 577 * then try lower shift 578 */ 579 sctx.shift_hi = sctx.shift_fraction; 580 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 581 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 582 sctx.lushift = PETSC_TRUE; 583 sctx.nshift++; 584 } 585 } while (sctx.lushift); 586 587 /* invert diagonal entries for simplier triangular solves */ 588 for (i=0; i<n; i++) { 589 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 590 } 591 ierr = PetscFree(rtmp);CHKERRQ(ierr); 592 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 593 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 594 if (b->inode.use) { 595 C->ops->solve = MatSolve_Inode; 596 } else { 597 PetscTruth row_identity, col_identity; 598 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 599 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 600 if (row_identity && col_identity) { 601 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 602 } else { 603 C->ops->solve = MatSolve_SeqAIJ; 604 } 605 } 606 C->ops->solveadd = MatSolveAdd_SeqAIJ; 607 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 608 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 609 C->ops->matsolve = MatMatSolve_SeqAIJ; 610 C->assembled = PETSC_TRUE; 611 C->preallocated = PETSC_TRUE; 612 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 613 if (sctx.nshift){ 614 if (info->shiftpd) { 615 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 616 } else if (info->shiftnz) { 617 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 618 } 619 } 620 PetscFunctionReturn(0); 621 } 622 623 /* 624 This routine implements inplace ILU(0) with row or/and column permutations. 625 Input: 626 A - original matrix 627 Output; 628 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 629 a->j (col index) is permuted by the inverse of colperm, then sorted 630 a->a reordered accordingly with a->j 631 a->diag (ptr to diagonal elements) is updated. 632 */ 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 635 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 636 { 637 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 638 IS isrow = a->row,isicol = a->icol; 639 PetscErrorCode ierr; 640 const PetscInt *r,*ic,*ics; 641 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 642 PetscInt *ajtmp,nz,row; 643 PetscInt *diag = a->diag,nbdiag,*pj; 644 PetscScalar *rtmp,*pc,multiplier,d; 645 MatScalar *v,*pv; 646 PetscReal rs; 647 LUShift_Ctx sctx; 648 PetscInt newshift; 649 650 PetscFunctionBegin; 651 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 652 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 653 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 654 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 655 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 656 ics = ic; 657 658 sctx.shift_top = 0; 659 sctx.nshift_max = 0; 660 sctx.shift_lo = 0; 661 sctx.shift_hi = 0; 662 sctx.shift_fraction = 0; 663 664 /* if both shift schemes are chosen by user, only use info->shiftpd */ 665 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 666 sctx.shift_top = 0; 667 for (i=0; i<n; i++) { 668 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 669 d = (a->a)[diag[i]]; 670 rs = -PetscAbsScalar(d) - PetscRealPart(d); 671 v = a->a+ai[i]; 672 nz = ai[i+1] - ai[i]; 673 for (j=0; j<nz; j++) 674 rs += PetscAbsScalar(v[j]); 675 if (rs>sctx.shift_top) sctx.shift_top = rs; 676 } 677 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 678 sctx.shift_top *= 1.1; 679 sctx.nshift_max = 5; 680 sctx.shift_lo = 0.; 681 sctx.shift_hi = 1.; 682 } 683 684 sctx.shift_amount = 0; 685 sctx.nshift = 0; 686 do { 687 sctx.lushift = PETSC_FALSE; 688 for (i=0; i<n; i++){ 689 /* load in initial unfactored row */ 690 nz = ai[r[i]+1] - ai[r[i]]; 691 ajtmp = aj + ai[r[i]]; 692 v = a->a + ai[r[i]]; 693 /* sort permuted ajtmp and values v accordingly */ 694 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 695 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 696 697 diag[r[i]] = ai[r[i]]; 698 for (j=0; j<nz; j++) { 699 rtmp[ajtmp[j]] = v[j]; 700 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 701 } 702 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 703 704 row = *ajtmp++; 705 while (row < i) { 706 pc = rtmp + row; 707 if (*pc != 0.0) { 708 pv = a->a + diag[r[row]]; 709 pj = aj + diag[r[row]] + 1; 710 711 multiplier = *pc / *pv++; 712 *pc = multiplier; 713 nz = ai[r[row]+1] - diag[r[row]] - 1; 714 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 715 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 716 } 717 row = *ajtmp++; 718 } 719 /* finished row so overwrite it onto a->a */ 720 pv = a->a + ai[r[i]] ; 721 pj = aj + ai[r[i]] ; 722 nz = ai[r[i]+1] - ai[r[i]]; 723 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 724 725 rs = 0.0; 726 for (j=0; j<nz; j++) { 727 pv[j] = rtmp[pj[j]]; 728 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 729 } 730 731 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 732 sctx.rs = rs; 733 sctx.pv = pv[nbdiag]; 734 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 735 if (newshift == 1) break; 736 } 737 738 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 739 /* 740 * if no shift in this attempt & shifting & started shifting & can refine, 741 * then try lower shift 742 */ 743 sctx.shift_hi = sctx.shift_fraction; 744 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 745 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 746 sctx.lushift = PETSC_TRUE; 747 sctx.nshift++; 748 } 749 } while (sctx.lushift); 750 751 /* invert diagonal entries for simplier triangular solves */ 752 for (i=0; i<n; i++) { 753 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 754 } 755 756 ierr = PetscFree(rtmp);CHKERRQ(ierr); 757 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 758 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 759 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 760 A->ops->solveadd = MatSolveAdd_SeqAIJ; 761 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 762 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 763 A->assembled = PETSC_TRUE; 764 A->preallocated = PETSC_TRUE; 765 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 766 if (sctx.nshift){ 767 if (info->shiftpd) { 768 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 769 } else if (info->shiftnz) { 770 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 771 } 772 } 773 PetscFunctionReturn(0); 774 } 775 776 /* ----------------------------------------------------------- */ 777 #undef __FUNCT__ 778 #define __FUNCT__ "MatLUFactor_SeqAIJ" 779 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 780 { 781 PetscErrorCode ierr; 782 Mat C; 783 784 PetscFunctionBegin; 785 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 786 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 787 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 788 A->ops->solve = C->ops->solve; 789 A->ops->solvetranspose = C->ops->solvetranspose; 790 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 791 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 /* ----------------------------------------------------------- */ 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatSolve_SeqAIJ" 798 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 799 { 800 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 801 IS iscol = a->col,isrow = a->row; 802 PetscErrorCode ierr; 803 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 804 PetscInt nz; 805 const PetscInt *rout,*cout,*r,*c; 806 PetscScalar *x,*tmp,*tmps,sum; 807 const PetscScalar *b; 808 const MatScalar *aa = a->a,*v; 809 810 PetscFunctionBegin; 811 if (!n) PetscFunctionReturn(0); 812 813 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 814 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 815 tmp = a->solve_work; 816 817 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 818 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 819 820 /* forward solve the lower triangular */ 821 tmp[0] = b[*r++]; 822 tmps = tmp; 823 for (i=1; i<n; i++) { 824 v = aa + ai[i] ; 825 vi = aj + ai[i] ; 826 nz = a->diag[i] - ai[i]; 827 sum = b[*r++]; 828 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 829 tmp[i] = sum; 830 } 831 832 /* backward solve the upper triangular */ 833 for (i=n-1; i>=0; i--){ 834 v = aa + a->diag[i] + 1; 835 vi = aj + a->diag[i] + 1; 836 nz = ai[i+1] - a->diag[i] - 1; 837 sum = tmp[i]; 838 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 839 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 840 } 841 842 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 843 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 844 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 845 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 846 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatMatSolve_SeqAIJ" 852 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 853 { 854 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 855 IS iscol = a->col,isrow = a->row; 856 PetscErrorCode ierr; 857 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 858 PetscInt nz,neq; 859 const PetscInt *rout,*cout,*r,*c; 860 PetscScalar *x,*b,*tmp,*tmps,sum; 861 const MatScalar *aa = a->a,*v; 862 PetscTruth bisdense,xisdense; 863 864 PetscFunctionBegin; 865 if (!n) PetscFunctionReturn(0); 866 867 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 868 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 869 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 870 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 871 872 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 873 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 874 875 tmp = a->solve_work; 876 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 877 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 878 879 for (neq=0; neq<B->cmap->n; neq++){ 880 /* forward solve the lower triangular */ 881 tmp[0] = b[r[0]]; 882 tmps = tmp; 883 for (i=1; i<n; i++) { 884 v = aa + ai[i] ; 885 vi = aj + ai[i] ; 886 nz = a->diag[i] - ai[i]; 887 sum = b[r[i]]; 888 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 889 tmp[i] = sum; 890 } 891 /* backward solve the upper triangular */ 892 for (i=n-1; i>=0; i--){ 893 v = aa + a->diag[i] + 1; 894 vi = aj + a->diag[i] + 1; 895 nz = ai[i+1] - a->diag[i] - 1; 896 sum = tmp[i]; 897 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 898 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 899 } 900 901 b += n; 902 x += n; 903 } 904 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 905 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 906 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 907 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 908 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 909 PetscFunctionReturn(0); 910 } 911 912 #undef __FUNCT__ 913 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 914 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 915 { 916 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 917 IS iscol = a->col,isrow = a->row; 918 PetscErrorCode ierr; 919 const PetscInt *r,*c,*rout,*cout; 920 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 921 PetscInt nz,row; 922 PetscScalar *x,*b,*tmp,*tmps,sum; 923 const MatScalar *aa = a->a,*v; 924 925 PetscFunctionBegin; 926 if (!n) PetscFunctionReturn(0); 927 928 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 929 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 930 tmp = a->solve_work; 931 932 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 933 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 934 935 /* forward solve the lower triangular */ 936 tmp[0] = b[*r++]; 937 tmps = tmp; 938 for (row=1; row<n; row++) { 939 i = rout[row]; /* permuted row */ 940 v = aa + ai[i] ; 941 vi = aj + ai[i] ; 942 nz = a->diag[i] - ai[i]; 943 sum = b[*r++]; 944 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 945 tmp[row] = sum; 946 } 947 948 /* backward solve the upper triangular */ 949 for (row=n-1; row>=0; row--){ 950 i = rout[row]; /* permuted row */ 951 v = aa + a->diag[i] + 1; 952 vi = aj + a->diag[i] + 1; 953 nz = ai[i+1] - a->diag[i] - 1; 954 sum = tmp[row]; 955 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 956 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 957 } 958 959 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 960 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 961 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 962 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 963 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 964 PetscFunctionReturn(0); 965 } 966 967 /* ----------------------------------------------------------- */ 968 #undef __FUNCT__ 969 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 970 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 971 { 972 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 973 PetscErrorCode ierr; 974 PetscInt n = A->rmap->n; 975 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 976 PetscScalar *x; 977 const PetscScalar *b; 978 const MatScalar *aa = a->a; 979 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 980 PetscInt adiag_i,i,nz,ai_i; 981 const MatScalar *v; 982 PetscScalar sum; 983 #endif 984 985 PetscFunctionBegin; 986 if (!n) PetscFunctionReturn(0); 987 988 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 989 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 990 991 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 992 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 993 #else 994 /* forward solve the lower triangular */ 995 x[0] = b[0]; 996 for (i=1; i<n; i++) { 997 ai_i = ai[i]; 998 v = aa + ai_i; 999 vi = aj + ai_i; 1000 nz = adiag[i] - ai_i; 1001 sum = b[i]; 1002 while (nz--) sum -= *v++ * x[*vi++]; 1003 x[i] = sum; 1004 } 1005 1006 /* backward solve the upper triangular */ 1007 for (i=n-1; i>=0; i--){ 1008 adiag_i = adiag[i]; 1009 v = aa + adiag_i + 1; 1010 vi = aj + adiag_i + 1; 1011 nz = ai[i+1] - adiag_i - 1; 1012 sum = x[i]; 1013 while (nz--) sum -= *v++ * x[*vi++]; 1014 x[i] = sum*aa[adiag_i]; 1015 } 1016 #endif 1017 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1018 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1019 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1020 PetscFunctionReturn(0); 1021 } 1022 1023 #undef __FUNCT__ 1024 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1025 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1026 { 1027 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1028 IS iscol = a->col,isrow = a->row; 1029 PetscErrorCode ierr; 1030 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1031 PetscInt nz; 1032 const PetscInt *rout,*cout,*r,*c; 1033 PetscScalar *x,*b,*tmp,sum; 1034 const MatScalar *aa = a->a,*v; 1035 1036 PetscFunctionBegin; 1037 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1038 1039 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1040 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1041 tmp = a->solve_work; 1042 1043 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1044 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1045 1046 /* forward solve the lower triangular */ 1047 tmp[0] = b[*r++]; 1048 for (i=1; i<n; i++) { 1049 v = aa + ai[i] ; 1050 vi = aj + ai[i] ; 1051 nz = a->diag[i] - ai[i]; 1052 sum = b[*r++]; 1053 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1054 tmp[i] = sum; 1055 } 1056 1057 /* backward solve the upper triangular */ 1058 for (i=n-1; i>=0; i--){ 1059 v = aa + a->diag[i] + 1; 1060 vi = aj + a->diag[i] + 1; 1061 nz = ai[i+1] - a->diag[i] - 1; 1062 sum = tmp[i]; 1063 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1064 tmp[i] = sum*aa[a->diag[i]]; 1065 x[*c--] += tmp[i]; 1066 } 1067 1068 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1069 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1070 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1071 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1072 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1073 1074 PetscFunctionReturn(0); 1075 } 1076 /* -------------------------------------------------------------------*/ 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1079 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1080 { 1081 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1082 IS iscol = a->col,isrow = a->row; 1083 PetscErrorCode ierr; 1084 const PetscInt *rout,*cout,*r,*c; 1085 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1086 PetscInt nz,*diag = a->diag; 1087 PetscScalar *x,*b,*tmp,s1; 1088 const MatScalar *aa = a->a,*v; 1089 1090 PetscFunctionBegin; 1091 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1092 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1093 tmp = a->solve_work; 1094 1095 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1096 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1097 1098 /* copy the b into temp work space according to permutation */ 1099 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1100 1101 /* forward solve the U^T */ 1102 for (i=0; i<n; i++) { 1103 v = aa + diag[i] ; 1104 vi = aj + diag[i] + 1; 1105 nz = ai[i+1] - diag[i] - 1; 1106 s1 = tmp[i]; 1107 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1108 while (nz--) { 1109 tmp[*vi++ ] -= (*v++)*s1; 1110 } 1111 tmp[i] = s1; 1112 } 1113 1114 /* backward solve the L^T */ 1115 for (i=n-1; i>=0; i--){ 1116 v = aa + diag[i] - 1 ; 1117 vi = aj + diag[i] - 1 ; 1118 nz = diag[i] - ai[i]; 1119 s1 = tmp[i]; 1120 while (nz--) { 1121 tmp[*vi-- ] -= (*v--)*s1; 1122 } 1123 } 1124 1125 /* copy tmp into x according to permutation */ 1126 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1127 1128 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1129 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1130 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1131 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1132 1133 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1134 PetscFunctionReturn(0); 1135 } 1136 1137 #undef __FUNCT__ 1138 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1139 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1140 { 1141 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1142 IS iscol = a->col,isrow = a->row; 1143 PetscErrorCode ierr; 1144 const PetscInt *r,*c,*rout,*cout; 1145 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1146 PetscInt nz,*diag = a->diag; 1147 PetscScalar *x,*b,*tmp; 1148 const MatScalar *aa = a->a,*v; 1149 1150 PetscFunctionBegin; 1151 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1152 1153 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1154 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1155 tmp = a->solve_work; 1156 1157 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1158 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1159 1160 /* copy the b into temp work space according to permutation */ 1161 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1162 1163 /* forward solve the U^T */ 1164 for (i=0; i<n; i++) { 1165 v = aa + diag[i] ; 1166 vi = aj + diag[i] + 1; 1167 nz = ai[i+1] - diag[i] - 1; 1168 tmp[i] *= *v++; 1169 while (nz--) { 1170 tmp[*vi++ ] -= (*v++)*tmp[i]; 1171 } 1172 } 1173 1174 /* backward solve the L^T */ 1175 for (i=n-1; i>=0; i--){ 1176 v = aa + diag[i] - 1 ; 1177 vi = aj + diag[i] - 1 ; 1178 nz = diag[i] - ai[i]; 1179 while (nz--) { 1180 tmp[*vi-- ] -= (*v--)*tmp[i]; 1181 } 1182 } 1183 1184 /* copy tmp into x according to permutation */ 1185 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1186 1187 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1188 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1189 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1190 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1191 1192 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 /* ----------------------------------------------------------------*/ 1196 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1197 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption); 1198 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1201 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1202 { 1203 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1204 IS isicol; 1205 PetscErrorCode ierr; 1206 const PetscInt *r,*ic; 1207 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1208 PetscInt *bi,*cols,nnz,*cols_lvl; 1209 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1210 PetscInt i,levels,diagonal_fill; 1211 PetscTruth col_identity,row_identity; 1212 PetscReal f; 1213 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1214 PetscBT lnkbt; 1215 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1216 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1217 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1218 PetscTruth missing; 1219 1220 PetscFunctionBegin; 1221 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1222 f = info->fill; 1223 levels = (PetscInt)info->levels; 1224 diagonal_fill = (PetscInt)info->diagonal_fill; 1225 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1226 1227 /* special case that simply copies fill pattern */ 1228 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1229 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1230 if (!levels && row_identity && col_identity) { 1231 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr); 1232 fact->factor = MAT_FACTOR_ILU; 1233 (fact)->info.factor_mallocs = 0; 1234 (fact)->info.fill_ratio_given = info->fill; 1235 (fact)->info.fill_ratio_needed = 1.0; 1236 b = (Mat_SeqAIJ*)(fact)->data; 1237 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1238 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1239 b->row = isrow; 1240 b->col = iscol; 1241 b->icol = isicol; 1242 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1243 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1244 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1245 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1246 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1251 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1252 1253 /* get new row pointers */ 1254 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1255 bi[0] = 0; 1256 /* bdiag is location of diagonal in factor */ 1257 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1258 bdiag[0] = 0; 1259 1260 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1261 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1262 1263 /* create a linked list for storing column indices of the active row */ 1264 nlnk = n + 1; 1265 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1266 1267 /* initial FreeSpace size is f*(ai[n]+1) */ 1268 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1269 current_space = free_space; 1270 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1271 current_space_lvl = free_space_lvl; 1272 1273 for (i=0; i<n; i++) { 1274 nzi = 0; 1275 /* copy current row into linked list */ 1276 nnz = ai[r[i]+1] - ai[r[i]]; 1277 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1278 cols = aj + ai[r[i]]; 1279 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1280 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1281 nzi += nlnk; 1282 1283 /* make sure diagonal entry is included */ 1284 if (diagonal_fill && lnk[i] == -1) { 1285 fm = n; 1286 while (lnk[fm] < i) fm = lnk[fm]; 1287 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1288 lnk[fm] = i; 1289 lnk_lvl[i] = 0; 1290 nzi++; dcount++; 1291 } 1292 1293 /* add pivot rows into the active row */ 1294 nzbd = 0; 1295 prow = lnk[n]; 1296 while (prow < i) { 1297 nnz = bdiag[prow]; 1298 cols = bj_ptr[prow] + nnz + 1; 1299 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1300 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1301 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1302 nzi += nlnk; 1303 prow = lnk[prow]; 1304 nzbd++; 1305 } 1306 bdiag[i] = nzbd; 1307 bi[i+1] = bi[i] + nzi; 1308 1309 /* if free space is not available, make more free space */ 1310 if (current_space->local_remaining<nzi) { 1311 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1312 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1313 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1314 reallocs++; 1315 } 1316 1317 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1318 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1319 bj_ptr[i] = current_space->array; 1320 bjlvl_ptr[i] = current_space_lvl->array; 1321 1322 /* make sure the active row i has diagonal entry */ 1323 if (*(bj_ptr[i]+bdiag[i]) != i) { 1324 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1325 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1326 } 1327 1328 current_space->array += nzi; 1329 current_space->local_used += nzi; 1330 current_space->local_remaining -= nzi; 1331 current_space_lvl->array += nzi; 1332 current_space_lvl->local_used += nzi; 1333 current_space_lvl->local_remaining -= nzi; 1334 } 1335 1336 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1337 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1338 1339 /* destroy list of free space and other temporary arrays */ 1340 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1341 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1342 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1343 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1344 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1345 1346 #if defined(PETSC_USE_INFO) 1347 { 1348 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1349 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1350 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1351 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1352 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1353 if (diagonal_fill) { 1354 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1355 } 1356 } 1357 #endif 1358 1359 /* put together the new matrix */ 1360 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1361 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1362 b = (Mat_SeqAIJ*)(fact)->data; 1363 b->free_a = PETSC_TRUE; 1364 b->free_ij = PETSC_TRUE; 1365 b->singlemalloc = PETSC_FALSE; 1366 ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1367 b->j = bj; 1368 b->i = bi; 1369 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1370 b->diag = bdiag; 1371 b->ilen = 0; 1372 b->imax = 0; 1373 b->row = isrow; 1374 b->col = iscol; 1375 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1376 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1377 b->icol = isicol; 1378 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1379 /* In b structure: Free imax, ilen, old a, old j. 1380 Allocate bdiag, solve_work, new a, new j */ 1381 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1382 b->maxnz = b->nz = bi[n] ; 1383 (fact)->info.factor_mallocs = reallocs; 1384 (fact)->info.fill_ratio_given = f; 1385 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1386 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1387 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1388 PetscFunctionReturn(0); 1389 } 1390 1391 #include "../src/mat/impls/sbaij/seq/sbaij.h" 1392 #undef __FUNCT__ 1393 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1394 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 1395 { 1396 Mat C = B; 1397 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1398 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1399 IS ip=b->row,iip = b->icol; 1400 PetscErrorCode ierr; 1401 const PetscInt *rip,*riip; 1402 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1403 PetscInt *ai=a->i,*aj=a->j; 1404 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1405 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1406 PetscReal zeropivot,rs,shiftnz; 1407 PetscReal shiftpd; 1408 ChShift_Ctx sctx; 1409 PetscInt newshift; 1410 PetscTruth perm_identity; 1411 1412 PetscFunctionBegin; 1413 1414 shiftnz = info->shiftnz; 1415 shiftpd = info->shiftpd; 1416 zeropivot = info->zeropivot; 1417 1418 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1419 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1420 1421 /* initialization */ 1422 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1423 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1424 jl = il + mbs; 1425 rtmp = (MatScalar*)(jl + mbs); 1426 1427 sctx.shift_amount = 0; 1428 sctx.nshift = 0; 1429 do { 1430 sctx.chshift = PETSC_FALSE; 1431 for (i=0; i<mbs; i++) { 1432 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1433 } 1434 1435 for (k = 0; k<mbs; k++){ 1436 bval = ba + bi[k]; 1437 /* initialize k-th row by the perm[k]-th row of A */ 1438 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1439 for (j = jmin; j < jmax; j++){ 1440 col = riip[aj[j]]; 1441 if (col >= k){ /* only take upper triangular entry */ 1442 rtmp[col] = aa[j]; 1443 *bval++ = 0.0; /* for in-place factorization */ 1444 } 1445 } 1446 /* shift the diagonal of the matrix */ 1447 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1448 1449 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1450 dk = rtmp[k]; 1451 i = jl[k]; /* first row to be added to k_th row */ 1452 1453 while (i < k){ 1454 nexti = jl[i]; /* next row to be added to k_th row */ 1455 1456 /* compute multiplier, update diag(k) and U(i,k) */ 1457 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1458 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1459 dk += uikdi*ba[ili]; 1460 ba[ili] = uikdi; /* -U(i,k) */ 1461 1462 /* add multiple of row i to k-th row */ 1463 jmin = ili + 1; jmax = bi[i+1]; 1464 if (jmin < jmax){ 1465 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1466 /* update il and jl for row i */ 1467 il[i] = jmin; 1468 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1469 } 1470 i = nexti; 1471 } 1472 1473 /* shift the diagonals when zero pivot is detected */ 1474 /* compute rs=sum of abs(off-diagonal) */ 1475 rs = 0.0; 1476 jmin = bi[k]+1; 1477 nz = bi[k+1] - jmin; 1478 bcol = bj + jmin; 1479 while (nz--){ 1480 rs += PetscAbsScalar(rtmp[*bcol]); 1481 bcol++; 1482 } 1483 1484 sctx.rs = rs; 1485 sctx.pv = dk; 1486 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1487 1488 if (newshift == 1) { 1489 if (!sctx.shift_amount) { 1490 sctx.shift_amount = 1e-5; 1491 } 1492 break; 1493 } 1494 1495 /* copy data into U(k,:) */ 1496 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1497 jmin = bi[k]+1; jmax = bi[k+1]; 1498 if (jmin < jmax) { 1499 for (j=jmin; j<jmax; j++){ 1500 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1501 } 1502 /* add the k-th row into il and jl */ 1503 il[k] = jmin; 1504 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1505 } 1506 } 1507 } while (sctx.chshift); 1508 ierr = PetscFree(il);CHKERRQ(ierr); 1509 1510 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1511 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1512 1513 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1514 if (perm_identity){ 1515 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1516 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1517 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1518 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1519 } else { 1520 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1521 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1522 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1523 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1524 } 1525 1526 C->assembled = PETSC_TRUE; 1527 C->preallocated = PETSC_TRUE; 1528 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1529 if (sctx.nshift){ 1530 if (shiftnz) { 1531 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1532 } else if (shiftpd) { 1533 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1534 } 1535 } 1536 PetscFunctionReturn(0); 1537 } 1538 1539 #undef __FUNCT__ 1540 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1541 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1542 { 1543 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1544 Mat_SeqSBAIJ *b; 1545 PetscErrorCode ierr; 1546 PetscTruth perm_identity,missing; 1547 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1548 const PetscInt *rip,*riip; 1549 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1550 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1551 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1552 PetscReal fill=info->fill,levels=info->levels; 1553 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1554 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1555 PetscBT lnkbt; 1556 IS iperm; 1557 1558 PetscFunctionBegin; 1559 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1560 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1561 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1562 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1563 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1564 1565 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1566 ui[0] = 0; 1567 1568 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1569 if (!levels && perm_identity) { 1570 1571 for (i=0; i<am; i++) { 1572 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1573 } 1574 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1575 cols = uj; 1576 for (i=0; i<am; i++) { 1577 aj = a->j + a->diag[i]; 1578 ncols = ui[i+1] - ui[i]; 1579 for (j=0; j<ncols; j++) *cols++ = *aj++; 1580 } 1581 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1582 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1583 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1584 1585 /* initialization */ 1586 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1587 1588 /* jl: linked list for storing indices of the pivot rows 1589 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1590 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1591 il = jl + am; 1592 uj_ptr = (PetscInt**)(il + am); 1593 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1594 for (i=0; i<am; i++){ 1595 jl[i] = am; il[i] = 0; 1596 } 1597 1598 /* create and initialize a linked list for storing column indices of the active row k */ 1599 nlnk = am + 1; 1600 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1601 1602 /* initial FreeSpace size is fill*(ai[am]+1) */ 1603 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1604 current_space = free_space; 1605 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1606 current_space_lvl = free_space_lvl; 1607 1608 for (k=0; k<am; k++){ /* for each active row k */ 1609 /* initialize lnk by the column indices of row rip[k] of A */ 1610 nzk = 0; 1611 ncols = ai[rip[k]+1] - ai[rip[k]]; 1612 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 1613 ncols_upper = 0; 1614 for (j=0; j<ncols; j++){ 1615 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1616 if (riip[i] >= k){ /* only take upper triangular entry */ 1617 ajtmp[ncols_upper] = i; 1618 ncols_upper++; 1619 } 1620 } 1621 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1622 nzk += nlnk; 1623 1624 /* update lnk by computing fill-in for each pivot row to be merged in */ 1625 prow = jl[k]; /* 1st pivot row */ 1626 1627 while (prow < k){ 1628 nextprow = jl[prow]; 1629 1630 /* merge prow into k-th row */ 1631 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1632 jmax = ui[prow+1]; 1633 ncols = jmax-jmin; 1634 i = jmin - ui[prow]; 1635 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1636 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1637 j = *(uj - 1); 1638 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1639 nzk += nlnk; 1640 1641 /* update il and jl for prow */ 1642 if (jmin < jmax){ 1643 il[prow] = jmin; 1644 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1645 } 1646 prow = nextprow; 1647 } 1648 1649 /* if free space is not available, make more free space */ 1650 if (current_space->local_remaining<nzk) { 1651 i = am - k + 1; /* num of unfactored rows */ 1652 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1653 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1654 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1655 reallocs++; 1656 } 1657 1658 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1659 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1660 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1661 1662 /* add the k-th row into il and jl */ 1663 if (nzk > 1){ 1664 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1665 jl[k] = jl[i]; jl[i] = k; 1666 il[k] = ui[k] + 1; 1667 } 1668 uj_ptr[k] = current_space->array; 1669 uj_lvl_ptr[k] = current_space_lvl->array; 1670 1671 current_space->array += nzk; 1672 current_space->local_used += nzk; 1673 current_space->local_remaining -= nzk; 1674 1675 current_space_lvl->array += nzk; 1676 current_space_lvl->local_used += nzk; 1677 current_space_lvl->local_remaining -= nzk; 1678 1679 ui[k+1] = ui[k] + nzk; 1680 } 1681 1682 #if defined(PETSC_USE_INFO) 1683 if (ai[am] != 0) { 1684 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1685 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1686 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1687 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1688 } else { 1689 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1690 } 1691 #endif 1692 1693 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1694 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1695 ierr = PetscFree(jl);CHKERRQ(ierr); 1696 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1697 1698 /* destroy list of free space and other temporary array(s) */ 1699 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1700 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1701 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1702 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1703 1704 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1705 1706 /* put together the new matrix in MATSEQSBAIJ format */ 1707 1708 b = (Mat_SeqSBAIJ*)(fact)->data; 1709 b->singlemalloc = PETSC_FALSE; 1710 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1711 b->j = uj; 1712 b->i = ui; 1713 b->diag = 0; 1714 b->ilen = 0; 1715 b->imax = 0; 1716 b->row = perm; 1717 b->col = perm; 1718 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1719 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1720 b->icol = iperm; 1721 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1722 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1723 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1724 b->maxnz = b->nz = ui[am]; 1725 b->free_a = PETSC_TRUE; 1726 b->free_ij = PETSC_TRUE; 1727 1728 (fact)->info.factor_mallocs = reallocs; 1729 (fact)->info.fill_ratio_given = fill; 1730 if (ai[am] != 0) { 1731 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1732 } else { 1733 (fact)->info.fill_ratio_needed = 0.0; 1734 } 1735 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1736 PetscFunctionReturn(0); 1737 } 1738 1739 #undef __FUNCT__ 1740 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1741 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1742 { 1743 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1744 Mat_SeqSBAIJ *b; 1745 PetscErrorCode ierr; 1746 PetscTruth perm_identity; 1747 PetscReal fill = info->fill; 1748 const PetscInt *rip,*riip; 1749 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1750 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1751 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1752 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1753 PetscBT lnkbt; 1754 IS iperm; 1755 1756 PetscFunctionBegin; 1757 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1758 /* check whether perm is the identity mapping */ 1759 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1760 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1761 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1762 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1763 1764 /* initialization */ 1765 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1766 ui[0] = 0; 1767 1768 /* jl: linked list for storing indices of the pivot rows 1769 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1770 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1771 il = jl + am; 1772 cols = il + am; 1773 ui_ptr = (PetscInt**)(cols + am); 1774 for (i=0; i<am; i++){ 1775 jl[i] = am; il[i] = 0; 1776 } 1777 1778 /* create and initialize a linked list for storing column indices of the active row k */ 1779 nlnk = am + 1; 1780 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1781 1782 /* initial FreeSpace size is fill*(ai[am]+1) */ 1783 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1784 current_space = free_space; 1785 1786 for (k=0; k<am; k++){ /* for each active row k */ 1787 /* initialize lnk by the column indices of row rip[k] of A */ 1788 nzk = 0; 1789 ncols = ai[rip[k]+1] - ai[rip[k]]; 1790 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 1791 ncols_upper = 0; 1792 for (j=0; j<ncols; j++){ 1793 i = riip[*(aj + ai[rip[k]] + j)]; 1794 if (i >= k){ /* only take upper triangular entry */ 1795 cols[ncols_upper] = i; 1796 ncols_upper++; 1797 } 1798 } 1799 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1800 nzk += nlnk; 1801 1802 /* update lnk by computing fill-in for each pivot row to be merged in */ 1803 prow = jl[k]; /* 1st pivot row */ 1804 1805 while (prow < k){ 1806 nextprow = jl[prow]; 1807 /* merge prow into k-th row */ 1808 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1809 jmax = ui[prow+1]; 1810 ncols = jmax-jmin; 1811 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1812 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1813 nzk += nlnk; 1814 1815 /* update il and jl for prow */ 1816 if (jmin < jmax){ 1817 il[prow] = jmin; 1818 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1819 } 1820 prow = nextprow; 1821 } 1822 1823 /* if free space is not available, make more free space */ 1824 if (current_space->local_remaining<nzk) { 1825 i = am - k + 1; /* num of unfactored rows */ 1826 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1827 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1828 reallocs++; 1829 } 1830 1831 /* copy data into free space, then initialize lnk */ 1832 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1833 1834 /* add the k-th row into il and jl */ 1835 if (nzk-1 > 0){ 1836 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1837 jl[k] = jl[i]; jl[i] = k; 1838 il[k] = ui[k] + 1; 1839 } 1840 ui_ptr[k] = current_space->array; 1841 current_space->array += nzk; 1842 current_space->local_used += nzk; 1843 current_space->local_remaining -= nzk; 1844 1845 ui[k+1] = ui[k] + nzk; 1846 } 1847 1848 #if defined(PETSC_USE_INFO) 1849 if (ai[am] != 0) { 1850 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1851 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1852 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1853 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1854 } else { 1855 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1856 } 1857 #endif 1858 1859 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1860 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1861 ierr = PetscFree(jl);CHKERRQ(ierr); 1862 1863 /* destroy list of free space and other temporary array(s) */ 1864 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1865 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1866 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1867 1868 /* put together the new matrix in MATSEQSBAIJ format */ 1869 1870 b = (Mat_SeqSBAIJ*)(fact)->data; 1871 b->singlemalloc = PETSC_FALSE; 1872 b->free_a = PETSC_TRUE; 1873 b->free_ij = PETSC_TRUE; 1874 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1875 b->j = uj; 1876 b->i = ui; 1877 b->diag = 0; 1878 b->ilen = 0; 1879 b->imax = 0; 1880 b->row = perm; 1881 b->col = perm; 1882 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1883 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1884 b->icol = iperm; 1885 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1886 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1887 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1888 b->maxnz = b->nz = ui[am]; 1889 1890 (fact)->info.factor_mallocs = reallocs; 1891 (fact)->info.fill_ratio_given = fill; 1892 if (ai[am] != 0) { 1893 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1894 } else { 1895 (fact)->info.fill_ratio_needed = 0.0; 1896 } 1897 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1898 PetscFunctionReturn(0); 1899 } 1900 1901 #undef __FUNCT__ 1902 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_iludt" 1903 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_iludt(Mat A,Vec bb,Vec xx) 1904 { 1905 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1906 PetscErrorCode ierr; 1907 PetscInt n = A->rmap->n; 1908 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 1909 PetscScalar *x,sum; 1910 const PetscScalar *b; 1911 const MatScalar *aa = a->a,*v; 1912 PetscInt i,nz; 1913 1914 PetscFunctionBegin; 1915 if (!n) PetscFunctionReturn(0); 1916 1917 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1918 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1919 1920 /* forward solve the lower triangular */ 1921 x[0] = b[0]; 1922 v = aa; 1923 vi = aj; 1924 for (i=1; i<n; i++) { 1925 nz = ai[i+1] - ai[i]; 1926 sum = b[i]; 1927 while (nz--) sum -= *v++ * x[*vi++]; 1928 x[i] = sum; 1929 } 1930 1931 /* backward solve the upper triangular */ 1932 v = aa + adiag[n] + 1; 1933 vi = aj + adiag[n] + 1; 1934 for (i=n-1; i>=0; i--){ 1935 nz = adiag[i] - adiag[i+1] - 1; 1936 sum = x[i]; 1937 while (nz--) sum -= *v++ * x[*vi++]; 1938 x[i] = sum*aa[adiag[i]]; 1939 v++; vi++; 1940 } 1941 1942 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1943 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1944 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1945 PetscFunctionReturn(0); 1946 } 1947 1948 #undef __FUNCT__ 1949 #define __FUNCT__ "MatSolve_SeqAIJ_iludt" 1950 PetscErrorCode MatSolve_SeqAIJ_iludt(Mat A,Vec bb,Vec xx) 1951 { 1952 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1953 IS iscol = a->col,isrow = a->row; 1954 PetscErrorCode ierr; 1955 PetscInt i,n=A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag=a->diag; 1956 PetscInt nz; 1957 const PetscInt *rout,*cout,*r,*c; 1958 PetscScalar *x,*tmp,*tmps; 1959 const PetscScalar *b; 1960 const MatScalar *aa = a->a,*v; 1961 1962 PetscFunctionBegin; 1963 if (!n) PetscFunctionReturn(0); 1964 1965 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1966 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1967 tmp = a->solve_work; 1968 1969 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1970 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1971 1972 /* forward solve the lower triangular */ 1973 tmp[0] = b[*r++]; 1974 tmps = tmp; 1975 v = aa; 1976 vi = aj; 1977 for (i=1; i<n; i++) { 1978 nz = ai[i+1] - ai[i]; 1979 tmp[i] = b[*r++]; 1980 SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz); 1981 v += nz; vi += nz; 1982 } 1983 1984 /* backward solve the upper triangular */ 1985 v = aa + adiag[n] + 1; 1986 vi = aj + adiag[n] + 1; 1987 for (i=n-1; i>=0; i--){ 1988 nz = adiag[i] - adiag[i+1] - 1; 1989 SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz); 1990 x[*c--] = tmp[i] = tmp[i]*aa[adiag[i]]; 1991 v += nz+1; vi += nz+1; 1992 } 1993 1994 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1995 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1996 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1997 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1998 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 #undef __FUNCT__ 2003 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 2004 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 2005 { 2006 Mat B = *fact; 2007 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 2008 IS isicol; 2009 PetscErrorCode ierr; 2010 const PetscInt *r,*ic; 2011 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 2012 PetscInt *bi,*bj,*bdiag; 2013 PetscInt row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au; 2014 PetscInt nlnk,*lnk; 2015 PetscBT lnkbt; 2016 PetscTruth row_identity,icol_identity,both_identity; 2017 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp; 2018 const PetscInt *ics; 2019 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 2020 PetscReal dt=info->dt,shift=info->shiftinblocks; 2021 PetscInt nnz_max; 2022 PetscTruth missing; 2023 2024 PetscFunctionBegin; 2025 //printf("MatILUDTFactor_SeqAIJ is called...\n"); 2026 /* ------- symbolic factorization, can be reused ---------*/ 2027 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2028 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2029 adiag=a->diag; 2030 2031 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 2032 2033 /* bdiag is location of diagonal in factor */ 2034 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 2035 2036 /* allocate row pointers bi */ 2037 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 2038 2039 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 2040 dtcount = (PetscInt)info->dtcount; 2041 if (dtcount > n/2) dtcount = n/2; 2042 nnz_max = ai[n]+2*n*dtcount+2; 2043 ierr = PetscMalloc(nnz_max*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2044 ierr = PetscMalloc(nnz_max*sizeof(MatScalar),&ba);CHKERRQ(ierr); 2045 2046 /* put together the new matrix */ 2047 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2048 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 2049 b = (Mat_SeqAIJ*)(B)->data; 2050 b->free_a = PETSC_TRUE; 2051 b->free_ij = PETSC_TRUE; 2052 b->singlemalloc = PETSC_FALSE; 2053 b->a = ba; 2054 b->j = bj; 2055 b->i = bi; 2056 b->diag = bdiag; 2057 b->ilen = 0; 2058 b->imax = 0; 2059 b->row = isrow; 2060 b->col = iscol; 2061 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2062 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2063 b->icol = isicol; 2064 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2065 2066 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2067 b->maxnz = nnz_max; 2068 2069 (B)->factor = MAT_FACTOR_ILUDT; 2070 (B)->info.factor_mallocs = 0; 2071 (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 2072 CHKMEMQ; 2073 /* ------- end of symbolic factorization ---------*/ 2074 2075 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2076 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 2077 ics = ic; 2078 2079 /* linked list for storing column indices of the active row */ 2080 nlnk = n + 1; 2081 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2082 2083 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 2084 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 2085 jtmp = im + n; 2086 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 2087 ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 2088 vtmp = rtmp + n; 2089 2090 bi[0] = 0; 2091 bdiag[0] = nnz_max-1; /* location of diagonal in factor B */ 2092 for (i=0; i<n; i++) { 2093 //printf("row %d ...\n",i); 2094 /* copy initial fill into linked list */ 2095 nzi = 0; /* nonzeros for active row i */ 2096 nzi = ai[r[i]+1] - ai[r[i]]; 2097 if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 2098 nzi_al = adiag[r[i]] - ai[r[i]]; 2099 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 2100 ajtmp = aj + ai[r[i]]; 2101 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2102 2103 /* load in initial (unfactored row) */ 2104 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2105 aatmp = a->a + ai[r[i]]; 2106 for (j=0; j<nzi; j++) { 2107 rtmp[ics[*ajtmp++]] = *aatmp++; 2108 } 2109 2110 /* add pivot rows into linked list */ 2111 row = lnk[n]; 2112 while (row < i) { 2113 nzi_bl = bi[row+1] - bi[row] + 1; 2114 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 2115 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 2116 nzi += nlnk; 2117 row = lnk[row]; 2118 } 2119 2120 /* copy data from lnk into jtmp, then initialize lnk */ 2121 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 2122 2123 /* numerical factorization */ 2124 //printf("row %d:\n",i); 2125 bjtmp = jtmp; 2126 row = *bjtmp++; /* 1st pivot row */ 2127 while (row < i) { 2128 //printf(" prow %d\n",row); 2129 pc = rtmp + row; 2130 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 2131 multiplier = (*pc) * (*pv); 2132 *pc = multiplier; 2133 if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 2134 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 2135 pv = ba + bdiag[row+1] + 1; 2136 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 2137 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 2138 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 2139 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 2140 } 2141 row = *bjtmp++; 2142 } 2143 2144 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 2145 nzi_bl = 0; j = 0; 2146 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 2147 vtmp[j] = rtmp[jtmp[j]]; 2148 nzi_bl++; j++; 2149 } 2150 nzi_bu = nzi - nzi_bl -1; 2151 while (j < nzi){ 2152 vtmp[j] = rtmp[jtmp[j]]; 2153 j++; 2154 } 2155 2156 bjtmp = bj + bi[i]; 2157 batmp = ba + bi[i]; 2158 /* apply level dropping rule to L part */ 2159 ncut = nzi_al + dtcount; 2160 if (ncut < nzi_bl){ 2161 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 2162 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 2163 } else { 2164 ncut = nzi_bl; 2165 } 2166 for (j=0; j<ncut; j++){ 2167 bjtmp[j] = jtmp[j]; 2168 batmp[j] = vtmp[j]; 2169 //printf(" (%d,%g),",bjtmp[j],batmp[j]); 2170 } 2171 bi[i+1] = bi[i] + ncut; 2172 nzi = ncut + 1; 2173 2174 /* apply level dropping rule to U part */ 2175 ncut = nzi_au + dtcount; 2176 if (ncut < nzi_bu){ 2177 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 2178 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 2179 } else { 2180 ncut = nzi_bu; 2181 } 2182 nzi += ncut; 2183 2184 /* mark bdiagonal */ 2185 bdiag[i+1] = bdiag[i] - (ncut + 1); 2186 bjtmp = bj + bdiag[i]; 2187 batmp = ba + bdiag[i]; 2188 *bjtmp = i; 2189 *batmp = rtmp[i]; 2190 if (*batmp == 0.0) *batmp = dt+shift; 2191 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 2192 //printf(" (%d,%g),",*bjtmp,*batmp); 2193 2194 bjtmp = bj + bdiag[i+1]+1; 2195 batmp = ba + bdiag[i+1]+1; 2196 for (k=0; k<ncut; k++){ 2197 bjtmp[k] = jtmp[nzi_bl+1+k]; 2198 batmp[k] = vtmp[nzi_bl+1+k]; 2199 //printf(" (%d,%g),",bjtmp[k],batmp[k]); 2200 } 2201 //printf("\n"); 2202 2203 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 2204 /* 2205 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 2206 printf(" ----------------------------\n"); 2207 */ 2208 } /* for (i=0; i<n; i++) */ 2209 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 2210 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 2211 2212 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2213 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2214 2215 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2216 ierr = PetscFree(im);CHKERRQ(ierr); 2217 ierr = PetscFree(rtmp);CHKERRQ(ierr); 2218 2219 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 2220 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 2221 2222 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 2223 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 2224 both_identity = (PetscTruth) (row_identity && icol_identity); 2225 if (row_identity && icol_identity) { 2226 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_iludt; 2227 } else { 2228 B->ops->solve = MatSolve_SeqAIJ_iludt; 2229 } 2230 2231 B->ops->lufactorsymbolic = MatILUDTFactorSymbolic_SeqAIJ; 2232 B->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 2233 B->ops->solveadd = 0; 2234 B->ops->solvetranspose = 0; 2235 B->ops->solvetransposeadd = 0; 2236 B->ops->matsolve = 0; 2237 B->assembled = PETSC_TRUE; 2238 B->preallocated = PETSC_TRUE; 2239 PetscFunctionReturn(0); 2240 } 2241 2242 /* a wraper of MatILUDTFactor_SeqAIJ() */ 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 2245 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 2246 { 2247 PetscErrorCode ierr; 2248 2249 PetscFunctionBegin; 2250 //printf("MatILUDTFactorSymbolic_SeqAIJ is called...\n"); 2251 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 2252 2253 fact->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 2254 PetscFunctionReturn(0); 2255 } 2256 2257 /* 2258 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 2259 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 2260 */ 2261 #undef __FUNCT__ 2262 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 2263 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 2264 { 2265 Mat C=fact; 2266 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 2267 IS isrow = b->row,isicol = b->icol; 2268 PetscErrorCode ierr; 2269 const PetscInt *r,*ic,*ics; 2270 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 2271 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 2272 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 2273 PetscReal dt=info->dt,shift=info->shiftinblocks; 2274 PetscTruth row_identity, col_identity; 2275 2276 PetscFunctionBegin; 2277 //printf("MatILUDTFactorNumeric_SeqAIJ is called...\n"); 2278 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2279 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 2280 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 2281 ics = ic; 2282 2283 for (i=0; i<n; i++){ 2284 /* initialize rtmp array */ 2285 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 2286 bjtmp = bj + bi[i]; 2287 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 2288 rtmp[i] = 0.0; 2289 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 2290 bjtmp = bj + bdiag[i+1] + 1; 2291 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 2292 2293 /* load in initial unfactored row of A */ 2294 //printf("row %d\n",i); 2295 nz = ai[r[i]+1] - ai[r[i]]; 2296 ajtmp = aj + ai[r[i]]; 2297 v = aa + ai[r[i]]; 2298 for (j=0; j<nz; j++) { 2299 rtmp[ics[*ajtmp++]] = v[j]; 2300 //printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); 2301 } 2302 //printf("\n"); 2303 2304 /* numerical factorization */ 2305 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 2306 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 2307 //printf("%d-: nzl %d\n",i,nzl); 2308 k = 0; 2309 while (k < nzl){ 2310 row = *bjtmp++; 2311 //printf(" prow %d\n",row); 2312 pc = rtmp + row; 2313 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 2314 multiplier = (*pc) * (*pv); 2315 *pc = multiplier; 2316 if (PetscAbsScalar(multiplier) > dt){ 2317 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 2318 pv = b->a + bdiag[row+1] + 1; 2319 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 2320 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 2321 //ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 2322 } 2323 k++; 2324 } 2325 2326 /* finished row so stick it into b->a */ 2327 /* L-part */ 2328 pv = b->a + bi[i] ; 2329 pj = bj + bi[i] ; 2330 nzl = bi[i+1] - bi[i]; 2331 for (j=0; j<nzl; j++) { 2332 pv[j] = rtmp[pj[j]]; 2333 //printf(" (%d,%g),",pj[j],pv[j]); 2334 } 2335 2336 /* diagonal: invert diagonal entries for simplier triangular solves */ 2337 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 2338 b->a[bdiag[i]] = 1.0/rtmp[i]; 2339 //printf(" (%d,%g),",i,b->a[bdiag[i]]); 2340 2341 /* U-part */ 2342 pv = b->a + bdiag[i+1] + 1; 2343 pj = bj + bdiag[i+1] + 1; 2344 nzu = bdiag[i] - bdiag[i+1] - 1; 2345 for (j=0; j<nzu; j++) { 2346 pv[j] = rtmp[pj[j]]; 2347 //printf(" (%d,%g),",pj[j],pv[j]); 2348 } 2349 //printf("\n"); 2350 } 2351 2352 ierr = PetscFree(rtmp);CHKERRQ(ierr); 2353 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2354 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2355 2356 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 2357 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 2358 if (row_identity && col_identity) { 2359 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_iludt; 2360 } else { 2361 C->ops->solve = MatSolve_SeqAIJ_iludt; 2362 } 2363 C->ops->solveadd = 0; 2364 C->ops->solvetranspose = 0; 2365 C->ops->solvetransposeadd = 0; 2366 C->ops->matsolve = 0; 2367 C->assembled = PETSC_TRUE; 2368 C->preallocated = PETSC_TRUE; 2369 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 2370 PetscFunctionReturn(0); 2371 } 2372