1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 const PetscInt *c,*r,*ic; 64 PetscInt i,n = A->rmap->n,*cc,*cr; 65 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 PetscInt *ordcol,*iwk,*iperm,*jw; 67 PetscInt jmax,lfill,job,*o_i,*o_j; 68 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 69 PetscReal af; 70 71 PetscFunctionBegin; 72 73 if (info->dt == PETSC_DEFAULT) info->dt = .005; 74 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 75 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 76 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 77 lfill = (PetscInt)(info->dtcount/2.0); 78 jmax = (PetscInt)(info->fill*a->nz); 79 80 81 /* ------------------------------------------------------------ 82 If reorder=.TRUE., then the original matrix has to be 83 reordered to reflect the user selected ordering scheme, and 84 then de-reordered so it is in it's original format. 85 Because Saad's dperm() is NOT in place, we have to copy 86 the original matrix and allocate more storage. . . 87 ------------------------------------------------------------ 88 */ 89 90 /* set reorder to true if either isrow or iscol is not identity */ 91 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 92 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 93 reorder = PetscNot(reorder); 94 95 96 /* storage for ilu factor */ 97 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 100 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 101 102 /* ------------------------------------------------------------ 103 Make sure that everything is Fortran formatted (1-Based) 104 ------------------------------------------------------------ 105 */ 106 for (i=old_i[0];i<old_i[n];i++) { 107 old_j[i]++; 108 } 109 for(i=0;i<n+1;i++) { 110 old_i[i]++; 111 }; 112 113 114 if (reorder) { 115 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 116 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 117 ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 cr[i] = r[i]+1; 120 cc[i] = c[i]+1; 121 } 122 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 123 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 124 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job); 128 ierr = PetscFree2(cc,cr);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 222 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = MAT_FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 b->free_a = PETSC_TRUE; 230 b->free_ij = PETSC_TRUE; 231 b->singlemalloc = PETSC_FALSE; 232 b->a = new_a; 233 b->j = new_j; 234 b->i = new_i; 235 b->ilen = 0; 236 b->imax = 0; 237 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238 b->row = isirow; 239 b->col = iscolf; 240 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 241 b->maxnz = b->nz = new_i[n]; 242 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 243 (*fact)->info.factor_mallocs = 0; 244 245 af = ((double)b->nz)/((double)a->nz) + .001; 246 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 247 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 249 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 250 251 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 252 253 PetscFunctionReturn(0); 254 #endif 255 } 256 257 EXTERN_C_BEGIN 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 260 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 261 { 262 PetscFunctionBegin; 263 *flg = PETSC_TRUE; 264 PetscFunctionReturn(0); 265 } 266 EXTERN_C_END 267 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 271 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 272 { 273 PetscInt n = A->rmap->n; 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 278 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 279 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 280 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 281 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 282 (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ; 283 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 284 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 285 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 286 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 287 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 288 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 289 (*B)->factor = ftype; 290 PetscFunctionReturn(0); 291 } 292 EXTERN_C_END 293 294 #undef __FUNCT__ 295 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 296 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,MatFactorInfo *info) 297 { 298 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 299 IS isicol; 300 PetscErrorCode ierr; 301 const PetscInt *r,*ic; 302 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 303 PetscInt *bi,*bj,*ajtmp; 304 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 305 PetscReal f; 306 PetscInt nlnk,*lnk,k,**bi_ptr; 307 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 308 PetscBT lnkbt; 309 310 PetscFunctionBegin; 311 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 312 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 313 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 314 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 315 316 /* get new row pointers */ 317 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 318 bi[0] = 0; 319 320 /* bdiag is location of diagonal in factor */ 321 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 322 bdiag[0] = 0; 323 324 /* linked list for storing column indices of the active row */ 325 nlnk = n + 1; 326 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 327 328 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 329 330 /* initial FreeSpace size is f*(ai[n]+1) */ 331 f = info->fill; 332 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 333 current_space = free_space; 334 335 for (i=0; i<n; i++) { 336 /* copy previous fill into linked list */ 337 nzi = 0; 338 nnz = ai[r[i]+1] - ai[r[i]]; 339 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 340 ajtmp = aj + ai[r[i]]; 341 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 342 nzi += nlnk; 343 344 /* add pivot rows into linked list */ 345 row = lnk[n]; 346 while (row < i) { 347 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 348 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 349 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 350 nzi += nlnk; 351 row = lnk[row]; 352 } 353 bi[i+1] = bi[i] + nzi; 354 im[i] = nzi; 355 356 /* mark bdiag */ 357 nzbd = 0; 358 nnz = nzi; 359 k = lnk[n]; 360 while (nnz-- && k < i){ 361 nzbd++; 362 k = lnk[k]; 363 } 364 bdiag[i] = bi[i] + nzbd; 365 366 /* if free space is not available, make more free space */ 367 if (current_space->local_remaining<nzi) { 368 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 369 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 370 reallocs++; 371 } 372 373 /* copy data into free space, then initialize lnk */ 374 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 375 bi_ptr[i] = current_space->array; 376 current_space->array += nzi; 377 current_space->local_used += nzi; 378 current_space->local_remaining -= nzi; 379 } 380 #if defined(PETSC_USE_INFO) 381 if (ai[n] != 0) { 382 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 383 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 384 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 385 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 386 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 387 } else { 388 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 389 } 390 #endif 391 392 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 393 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 394 395 /* destroy list of free space and other temporary array(s) */ 396 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 397 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 398 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 399 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 400 401 /* put together the new matrix */ 402 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 403 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 404 b = (Mat_SeqAIJ*)(B)->data; 405 b->free_a = PETSC_TRUE; 406 b->free_ij = PETSC_TRUE; 407 b->singlemalloc = PETSC_FALSE; 408 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 409 b->j = bj; 410 b->i = bi; 411 b->diag = bdiag; 412 b->ilen = 0; 413 b->imax = 0; 414 b->row = isrow; 415 b->col = iscol; 416 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 417 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 418 b->icol = isicol; 419 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 420 421 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 422 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 423 b->maxnz = b->nz = bi[n] ; 424 425 (B)->factor = MAT_FACTOR_LU; 426 (B)->info.factor_mallocs = reallocs; 427 (B)->info.fill_ratio_given = f; 428 429 if (ai[n] != 0) { 430 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 431 } else { 432 (B)->info.fill_ratio_needed = 0.0; 433 } 434 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 435 (B)->ops->solve = MatSolve_SeqAIJ; 436 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 437 /* switch to inodes if appropriate */ 438 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 /* 443 Trouble in factorization, should we dump the original matrix? 444 */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatFactorDumpMatrix" 447 PetscErrorCode MatFactorDumpMatrix(Mat A) 448 { 449 PetscErrorCode ierr; 450 PetscTruth flg; 451 452 PetscFunctionBegin; 453 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 454 if (flg) { 455 PetscViewer viewer; 456 char filename[PETSC_MAX_PATH_LEN]; 457 458 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 459 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 460 ierr = MatView(A,viewer);CHKERRQ(ierr); 461 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 462 } 463 PetscFunctionReturn(0); 464 } 465 466 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 467 468 /* ----------------------------------------------------------- */ 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 471 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,MatFactorInfo *info) 472 { 473 Mat C=B; 474 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 475 IS isrow = b->row,isicol = b->icol; 476 PetscErrorCode ierr; 477 const PetscInt *r,*ic,*ics; 478 PetscInt i,j,n=A->rmap->n,*bi=b->i,*bj=b->j; 479 PetscInt *ajtmp,*bjtmp,nz,row; 480 PetscInt *diag_offset = b->diag,diag,*pj; 481 PetscScalar *rtmp,*pc,multiplier,*rtmps; 482 MatScalar *v,*pv; 483 PetscScalar d; 484 PetscReal rs; 485 LUShift_Ctx sctx; 486 PetscInt newshift,*ddiag; 487 488 PetscFunctionBegin; 489 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 490 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 491 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 492 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 493 rtmps = rtmp; ics = ic; 494 495 sctx.shift_top = 0; 496 sctx.nshift_max = 0; 497 sctx.shift_lo = 0; 498 sctx.shift_hi = 0; 499 500 /* if both shift schemes are chosen by user, only use info->shiftpd */ 501 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 502 PetscInt *aai = a->i; 503 ddiag = a->diag; 504 sctx.shift_top = 0; 505 for (i=0; i<n; i++) { 506 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 507 d = (a->a)[ddiag[i]]; 508 rs = -PetscAbsScalar(d) - PetscRealPart(d); 509 v = a->a+aai[i]; 510 nz = aai[i+1] - aai[i]; 511 for (j=0; j<nz; j++) 512 rs += PetscAbsScalar(v[j]); 513 if (rs>sctx.shift_top) sctx.shift_top = rs; 514 } 515 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 516 sctx.shift_top *= 1.1; 517 sctx.nshift_max = 5; 518 sctx.shift_lo = 0.; 519 sctx.shift_hi = 1.; 520 } 521 522 sctx.shift_amount = 0; 523 sctx.nshift = 0; 524 do { 525 sctx.lushift = PETSC_FALSE; 526 for (i=0; i<n; i++){ 527 nz = bi[i+1] - bi[i]; 528 bjtmp = bj + bi[i]; 529 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 530 531 /* load in initial (unfactored row) */ 532 nz = a->i[r[i]+1] - a->i[r[i]]; 533 ajtmp = a->j + a->i[r[i]]; 534 v = a->a + a->i[r[i]]; 535 for (j=0; j<nz; j++) { 536 rtmp[ics[ajtmp[j]]] = v[j]; 537 } 538 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 539 540 row = *bjtmp++; 541 while (row < i) { 542 pc = rtmp + row; 543 if (*pc != 0.0) { 544 pv = b->a + diag_offset[row]; 545 pj = b->j + diag_offset[row] + 1; 546 multiplier = *pc / *pv++; 547 *pc = multiplier; 548 nz = bi[row+1] - diag_offset[row] - 1; 549 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 550 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 551 } 552 row = *bjtmp++; 553 } 554 /* finished row so stick it into b->a */ 555 pv = b->a + bi[i] ; 556 pj = b->j + bi[i] ; 557 nz = bi[i+1] - bi[i]; 558 diag = diag_offset[i] - bi[i]; 559 rs = 0.0; 560 for (j=0; j<nz; j++) { 561 pv[j] = rtmps[pj[j]]; 562 if (j != diag) rs += PetscAbsScalar(pv[j]); 563 } 564 565 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 566 sctx.rs = rs; 567 sctx.pv = pv[diag]; 568 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 569 if (newshift == 1) break; 570 } 571 572 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 573 /* 574 * if no shift in this attempt & shifting & started shifting & can refine, 575 * then try lower shift 576 */ 577 sctx.shift_hi = info->shift_fraction; 578 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 579 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 580 sctx.lushift = PETSC_TRUE; 581 sctx.nshift++; 582 } 583 } while (sctx.lushift); 584 585 /* invert diagonal entries for simplier triangular solves */ 586 for (i=0; i<n; i++) { 587 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 588 } 589 ierr = PetscFree(rtmp);CHKERRQ(ierr); 590 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 591 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 592 if (b->inode.use) { 593 C->ops->solve = MatSolve_Inode; 594 } else { 595 C->ops->solve = MatSolve_SeqAIJ; 596 } 597 C->ops->solveadd = MatSolveAdd_SeqAIJ; 598 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 599 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 600 C->ops->matsolve = MatMatSolve_SeqAIJ; 601 C->assembled = PETSC_TRUE; 602 C->preallocated = PETSC_TRUE; 603 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 604 if (sctx.nshift){ 605 if (info->shiftpd) { 606 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 607 } else if (info->shiftnz) { 608 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 609 } 610 } 611 PetscFunctionReturn(0); 612 } 613 614 /* 615 This routine implements inplace ILU(0) with row or/and column permutations. 616 Input: 617 A - original matrix 618 Output; 619 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 620 a->j (col index) is permuted by the inverse of colperm, then sorted 621 a->a reordered accordingly with a->j 622 a->diag (ptr to diagonal elements) is updated. 623 */ 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 626 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,MatFactorInfo *info) 627 { 628 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 629 IS isrow = a->row,isicol = a->icol; 630 PetscErrorCode ierr; 631 const PetscInt *r,*ic,*ics; 632 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 633 PetscInt *ajtmp,nz,row; 634 PetscInt *diag = a->diag,nbdiag,*pj; 635 PetscScalar *rtmp,*pc,multiplier,d; 636 MatScalar *v,*pv; 637 PetscReal rs; 638 LUShift_Ctx sctx; 639 PetscInt newshift; 640 641 PetscFunctionBegin; 642 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 643 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 644 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 645 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 646 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 647 ics = ic; 648 649 sctx.shift_top = 0; 650 sctx.nshift_max = 0; 651 sctx.shift_lo = 0; 652 sctx.shift_hi = 0; 653 654 /* if both shift schemes are chosen by user, only use info->shiftpd */ 655 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 656 sctx.shift_top = 0; 657 for (i=0; i<n; i++) { 658 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 659 d = (a->a)[diag[i]]; 660 rs = -PetscAbsScalar(d) - PetscRealPart(d); 661 v = a->a+ai[i]; 662 nz = ai[i+1] - ai[i]; 663 for (j=0; j<nz; j++) 664 rs += PetscAbsScalar(v[j]); 665 if (rs>sctx.shift_top) sctx.shift_top = rs; 666 } 667 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 668 sctx.shift_top *= 1.1; 669 sctx.nshift_max = 5; 670 sctx.shift_lo = 0.; 671 sctx.shift_hi = 1.; 672 } 673 674 sctx.shift_amount = 0; 675 sctx.nshift = 0; 676 do { 677 sctx.lushift = PETSC_FALSE; 678 for (i=0; i<n; i++){ 679 /* load in initial unfactored row */ 680 nz = ai[r[i]+1] - ai[r[i]]; 681 ajtmp = aj + ai[r[i]]; 682 v = a->a + ai[r[i]]; 683 /* sort permuted ajtmp and values v accordingly */ 684 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 685 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 686 687 diag[r[i]] = ai[r[i]]; 688 for (j=0; j<nz; j++) { 689 rtmp[ajtmp[j]] = v[j]; 690 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 691 } 692 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 693 694 row = *ajtmp++; 695 while (row < i) { 696 pc = rtmp + row; 697 if (*pc != 0.0) { 698 pv = a->a + diag[r[row]]; 699 pj = aj + diag[r[row]] + 1; 700 701 multiplier = *pc / *pv++; 702 *pc = multiplier; 703 nz = ai[r[row]+1] - diag[r[row]] - 1; 704 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 705 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 706 } 707 row = *ajtmp++; 708 } 709 /* finished row so overwrite it onto a->a */ 710 pv = a->a + ai[r[i]] ; 711 pj = aj + ai[r[i]] ; 712 nz = ai[r[i]+1] - ai[r[i]]; 713 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 714 715 rs = 0.0; 716 for (j=0; j<nz; j++) { 717 pv[j] = rtmp[pj[j]]; 718 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 719 } 720 721 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 722 sctx.rs = rs; 723 sctx.pv = pv[nbdiag]; 724 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 725 if (newshift == 1) break; 726 } 727 728 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 729 /* 730 * if no shift in this attempt & shifting & started shifting & can refine, 731 * then try lower shift 732 */ 733 sctx.shift_hi = info->shift_fraction; 734 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 735 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 736 sctx.lushift = PETSC_TRUE; 737 sctx.nshift++; 738 } 739 } while (sctx.lushift); 740 741 /* invert diagonal entries for simplier triangular solves */ 742 for (i=0; i<n; i++) { 743 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 744 } 745 746 ierr = PetscFree(rtmp);CHKERRQ(ierr); 747 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 748 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 749 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 750 A->ops->solveadd = MatSolveAdd_SeqAIJ; 751 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 752 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 753 A->assembled = PETSC_TRUE; 754 A->preallocated = PETSC_TRUE; 755 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 756 if (sctx.nshift){ 757 if (info->shiftpd) { 758 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 759 } else if (info->shiftnz) { 760 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 761 } 762 } 763 PetscFunctionReturn(0); 764 } 765 766 /* ----------------------------------------------------------- */ 767 #undef __FUNCT__ 768 #define __FUNCT__ "MatLUFactor_SeqAIJ" 769 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 770 { 771 PetscErrorCode ierr; 772 Mat C; 773 774 PetscFunctionBegin; 775 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 776 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 777 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 778 A->ops->solve = C->ops->solve; 779 A->ops->solvetranspose = C->ops->solvetranspose; 780 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 781 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 782 PetscFunctionReturn(0); 783 } 784 /* ----------------------------------------------------------- */ 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatSolve_SeqAIJ" 787 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 788 { 789 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 790 IS iscol = a->col,isrow = a->row; 791 PetscErrorCode ierr; 792 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 793 PetscInt nz; 794 const PetscInt *rout,*cout,*r,*c; 795 PetscScalar *x,*tmp,*tmps,sum; 796 const PetscScalar *b; 797 const MatScalar *aa = a->a,*v; 798 799 PetscFunctionBegin; 800 if (!n) PetscFunctionReturn(0); 801 802 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 803 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 804 tmp = a->solve_work; 805 806 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 807 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 808 809 /* forward solve the lower triangular */ 810 tmp[0] = b[*r++]; 811 tmps = tmp; 812 for (i=1; i<n; i++) { 813 v = aa + ai[i] ; 814 vi = aj + ai[i] ; 815 nz = a->diag[i] - ai[i]; 816 sum = b[*r++]; 817 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 818 tmp[i] = sum; 819 } 820 821 /* backward solve the upper triangular */ 822 for (i=n-1; i>=0; i--){ 823 v = aa + a->diag[i] + 1; 824 vi = aj + a->diag[i] + 1; 825 nz = ai[i+1] - a->diag[i] - 1; 826 sum = tmp[i]; 827 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 828 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 829 } 830 831 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 832 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 833 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 834 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 835 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 836 PetscFunctionReturn(0); 837 } 838 839 #undef __FUNCT__ 840 #define __FUNCT__ "MatMatSolve_SeqAIJ" 841 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 842 { 843 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 844 IS iscol = a->col,isrow = a->row; 845 PetscErrorCode ierr; 846 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 847 PetscInt nz,neq; 848 const PetscInt *rout,*cout,*r,*c; 849 PetscScalar *x,*b,*tmp,*tmps,sum; 850 const MatScalar *aa = a->a,*v; 851 PetscTruth bisdense,xisdense; 852 853 PetscFunctionBegin; 854 if (!n) PetscFunctionReturn(0); 855 856 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 857 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 858 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 859 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 860 861 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 862 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 863 864 tmp = a->solve_work; 865 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 866 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 867 868 for (neq=0; neq<B->cmap->n; neq++){ 869 /* forward solve the lower triangular */ 870 tmp[0] = b[r[0]]; 871 tmps = tmp; 872 for (i=1; i<n; i++) { 873 v = aa + ai[i] ; 874 vi = aj + ai[i] ; 875 nz = a->diag[i] - ai[i]; 876 sum = b[r[i]]; 877 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 878 tmp[i] = sum; 879 } 880 /* backward solve the upper triangular */ 881 for (i=n-1; i>=0; i--){ 882 v = aa + a->diag[i] + 1; 883 vi = aj + a->diag[i] + 1; 884 nz = ai[i+1] - a->diag[i] - 1; 885 sum = tmp[i]; 886 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 887 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 888 } 889 890 b += n; 891 x += n; 892 } 893 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 894 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 895 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 896 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 897 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 903 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 904 { 905 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 906 IS iscol = a->col,isrow = a->row; 907 PetscErrorCode ierr; 908 const PetscInt *r,*c,*rout,*cout; 909 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 910 PetscInt nz,row; 911 PetscScalar *x,*b,*tmp,*tmps,sum; 912 const MatScalar *aa = a->a,*v; 913 914 PetscFunctionBegin; 915 if (!n) PetscFunctionReturn(0); 916 917 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 918 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 919 tmp = a->solve_work; 920 921 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 922 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 923 924 /* forward solve the lower triangular */ 925 tmp[0] = b[*r++]; 926 tmps = tmp; 927 for (row=1; row<n; row++) { 928 i = rout[row]; /* permuted row */ 929 v = aa + ai[i] ; 930 vi = aj + ai[i] ; 931 nz = a->diag[i] - ai[i]; 932 sum = b[*r++]; 933 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 934 tmp[row] = sum; 935 } 936 937 /* backward solve the upper triangular */ 938 for (row=n-1; row>=0; row--){ 939 i = rout[row]; /* permuted row */ 940 v = aa + a->diag[i] + 1; 941 vi = aj + a->diag[i] + 1; 942 nz = ai[i+1] - a->diag[i] - 1; 943 sum = tmp[row]; 944 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 945 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 946 } 947 948 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 949 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 950 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 951 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 952 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 953 PetscFunctionReturn(0); 954 } 955 956 /* ----------------------------------------------------------- */ 957 #undef __FUNCT__ 958 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 959 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 960 { 961 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 962 PetscErrorCode ierr; 963 PetscInt n = A->rmap->n; 964 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 965 PetscScalar *x; 966 const PetscScalar *b; 967 const MatScalar *aa = a->a; 968 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 969 PetscInt adiag_i,i,nz,ai_i; 970 const MatScalar *v; 971 PetscScalar sum; 972 #endif 973 974 PetscFunctionBegin; 975 if (!n) PetscFunctionReturn(0); 976 977 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 978 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 979 980 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 981 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 982 #else 983 /* forward solve the lower triangular */ 984 x[0] = b[0]; 985 for (i=1; i<n; i++) { 986 ai_i = ai[i]; 987 v = aa + ai_i; 988 vi = aj + ai_i; 989 nz = adiag[i] - ai_i; 990 sum = b[i]; 991 while (nz--) sum -= *v++ * x[*vi++]; 992 x[i] = sum; 993 } 994 995 /* backward solve the upper triangular */ 996 for (i=n-1; i>=0; i--){ 997 adiag_i = adiag[i]; 998 v = aa + adiag_i + 1; 999 vi = aj + adiag_i + 1; 1000 nz = ai[i+1] - adiag_i - 1; 1001 sum = x[i]; 1002 while (nz--) sum -= *v++ * x[*vi++]; 1003 x[i] = sum*aa[adiag_i]; 1004 } 1005 #endif 1006 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 1007 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1008 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #undef __FUNCT__ 1013 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1014 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1015 { 1016 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1017 IS iscol = a->col,isrow = a->row; 1018 PetscErrorCode ierr; 1019 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1020 PetscInt nz; 1021 const PetscInt *rout,*cout,*r,*c; 1022 PetscScalar *x,*b,*tmp,sum; 1023 const MatScalar *aa = a->a,*v; 1024 1025 PetscFunctionBegin; 1026 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1027 1028 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1029 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1030 tmp = a->solve_work; 1031 1032 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1033 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1034 1035 /* forward solve the lower triangular */ 1036 tmp[0] = b[*r++]; 1037 for (i=1; i<n; i++) { 1038 v = aa + ai[i] ; 1039 vi = aj + ai[i] ; 1040 nz = a->diag[i] - ai[i]; 1041 sum = b[*r++]; 1042 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1043 tmp[i] = sum; 1044 } 1045 1046 /* backward solve the upper triangular */ 1047 for (i=n-1; i>=0; i--){ 1048 v = aa + a->diag[i] + 1; 1049 vi = aj + a->diag[i] + 1; 1050 nz = ai[i+1] - a->diag[i] - 1; 1051 sum = tmp[i]; 1052 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1053 tmp[i] = sum*aa[a->diag[i]]; 1054 x[*c--] += tmp[i]; 1055 } 1056 1057 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1058 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1059 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1060 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1061 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1062 1063 PetscFunctionReturn(0); 1064 } 1065 /* -------------------------------------------------------------------*/ 1066 #undef __FUNCT__ 1067 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1068 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1069 { 1070 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1071 IS iscol = a->col,isrow = a->row; 1072 PetscErrorCode ierr; 1073 const PetscInt *rout,*cout,*r,*c; 1074 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1075 PetscInt nz,*diag = a->diag; 1076 PetscScalar *x,*b,*tmp,s1; 1077 const MatScalar *aa = a->a,*v; 1078 1079 PetscFunctionBegin; 1080 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1081 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1082 tmp = a->solve_work; 1083 1084 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1085 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1086 1087 /* copy the b into temp work space according to permutation */ 1088 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1089 1090 /* forward solve the U^T */ 1091 for (i=0; i<n; i++) { 1092 v = aa + diag[i] ; 1093 vi = aj + diag[i] + 1; 1094 nz = ai[i+1] - diag[i] - 1; 1095 s1 = tmp[i]; 1096 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1097 while (nz--) { 1098 tmp[*vi++ ] -= (*v++)*s1; 1099 } 1100 tmp[i] = s1; 1101 } 1102 1103 /* backward solve the L^T */ 1104 for (i=n-1; i>=0; i--){ 1105 v = aa + diag[i] - 1 ; 1106 vi = aj + diag[i] - 1 ; 1107 nz = diag[i] - ai[i]; 1108 s1 = tmp[i]; 1109 while (nz--) { 1110 tmp[*vi-- ] -= (*v--)*s1; 1111 } 1112 } 1113 1114 /* copy tmp into x according to permutation */ 1115 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1116 1117 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1118 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1119 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1120 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1121 1122 ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1128 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1129 { 1130 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1131 IS iscol = a->col,isrow = a->row; 1132 PetscErrorCode ierr; 1133 const PetscInt *r,*c,*rout,*cout; 1134 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1135 PetscInt nz,*diag = a->diag; 1136 PetscScalar *x,*b,*tmp; 1137 const MatScalar *aa = a->a,*v; 1138 1139 PetscFunctionBegin; 1140 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1141 1142 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1143 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1144 tmp = a->solve_work; 1145 1146 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1147 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1148 1149 /* copy the b into temp work space according to permutation */ 1150 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1151 1152 /* forward solve the U^T */ 1153 for (i=0; i<n; i++) { 1154 v = aa + diag[i] ; 1155 vi = aj + diag[i] + 1; 1156 nz = ai[i+1] - diag[i] - 1; 1157 tmp[i] *= *v++; 1158 while (nz--) { 1159 tmp[*vi++ ] -= (*v++)*tmp[i]; 1160 } 1161 } 1162 1163 /* backward solve the L^T */ 1164 for (i=n-1; i>=0; i--){ 1165 v = aa + diag[i] - 1 ; 1166 vi = aj + diag[i] - 1 ; 1167 nz = diag[i] - ai[i]; 1168 while (nz--) { 1169 tmp[*vi-- ] -= (*v--)*tmp[i]; 1170 } 1171 } 1172 1173 /* copy tmp into x according to permutation */ 1174 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1175 1176 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1177 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1178 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1179 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1180 1181 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1182 PetscFunctionReturn(0); 1183 } 1184 /* ----------------------------------------------------------------*/ 1185 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1186 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption); 1187 1188 #undef __FUNCT__ 1189 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1190 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,MatFactorInfo *info) 1191 { 1192 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1193 IS isicol; 1194 PetscErrorCode ierr; 1195 const PetscInt *r,*ic; 1196 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1197 PetscInt *bi,*cols,nnz,*cols_lvl; 1198 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1199 PetscInt i,levels,diagonal_fill; 1200 PetscTruth col_identity,row_identity; 1201 PetscReal f; 1202 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1203 PetscBT lnkbt; 1204 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1205 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1206 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1207 PetscTruth missing; 1208 1209 PetscFunctionBegin; 1210 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1211 f = info->fill; 1212 levels = (PetscInt)info->levels; 1213 diagonal_fill = (PetscInt)info->diagonal_fill; 1214 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1215 1216 /* special case that simply copies fill pattern */ 1217 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1218 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1219 if (!levels && row_identity && col_identity) { 1220 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr); 1221 fact->factor = MAT_FACTOR_ILU; 1222 (fact)->info.factor_mallocs = 0; 1223 (fact)->info.fill_ratio_given = info->fill; 1224 (fact)->info.fill_ratio_needed = 1.0; 1225 b = (Mat_SeqAIJ*)(fact)->data; 1226 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1227 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1228 b->row = isrow; 1229 b->col = iscol; 1230 b->icol = isicol; 1231 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1232 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1233 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1234 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1235 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1240 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1241 1242 /* get new row pointers */ 1243 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1244 bi[0] = 0; 1245 /* bdiag is location of diagonal in factor */ 1246 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1247 bdiag[0] = 0; 1248 1249 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1250 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1251 1252 /* create a linked list for storing column indices of the active row */ 1253 nlnk = n + 1; 1254 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1255 1256 /* initial FreeSpace size is f*(ai[n]+1) */ 1257 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1258 current_space = free_space; 1259 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1260 current_space_lvl = free_space_lvl; 1261 1262 for (i=0; i<n; i++) { 1263 nzi = 0; 1264 /* copy current row into linked list */ 1265 nnz = ai[r[i]+1] - ai[r[i]]; 1266 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1267 cols = aj + ai[r[i]]; 1268 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1269 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1270 nzi += nlnk; 1271 1272 /* make sure diagonal entry is included */ 1273 if (diagonal_fill && lnk[i] == -1) { 1274 fm = n; 1275 while (lnk[fm] < i) fm = lnk[fm]; 1276 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1277 lnk[fm] = i; 1278 lnk_lvl[i] = 0; 1279 nzi++; dcount++; 1280 } 1281 1282 /* add pivot rows into the active row */ 1283 nzbd = 0; 1284 prow = lnk[n]; 1285 while (prow < i) { 1286 nnz = bdiag[prow]; 1287 cols = bj_ptr[prow] + nnz + 1; 1288 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1289 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1290 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1291 nzi += nlnk; 1292 prow = lnk[prow]; 1293 nzbd++; 1294 } 1295 bdiag[i] = nzbd; 1296 bi[i+1] = bi[i] + nzi; 1297 1298 /* if free space is not available, make more free space */ 1299 if (current_space->local_remaining<nzi) { 1300 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1301 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1302 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1303 reallocs++; 1304 } 1305 1306 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1307 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1308 bj_ptr[i] = current_space->array; 1309 bjlvl_ptr[i] = current_space_lvl->array; 1310 1311 /* make sure the active row i has diagonal entry */ 1312 if (*(bj_ptr[i]+bdiag[i]) != i) { 1313 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1314 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1315 } 1316 1317 current_space->array += nzi; 1318 current_space->local_used += nzi; 1319 current_space->local_remaining -= nzi; 1320 current_space_lvl->array += nzi; 1321 current_space_lvl->local_used += nzi; 1322 current_space_lvl->local_remaining -= nzi; 1323 } 1324 1325 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1326 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1327 1328 /* destroy list of free space and other temporary arrays */ 1329 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1330 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1331 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1332 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1333 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1334 1335 #if defined(PETSC_USE_INFO) 1336 { 1337 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1338 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1339 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1340 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1341 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1342 if (diagonal_fill) { 1343 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1344 } 1345 } 1346 #endif 1347 1348 /* put together the new matrix */ 1349 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1350 b = (Mat_SeqAIJ*)(fact)->data; 1351 b->free_a = PETSC_TRUE; 1352 b->free_ij = PETSC_TRUE; 1353 b->singlemalloc = PETSC_FALSE; 1354 len = (bi[n] )*sizeof(PetscScalar); 1355 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1356 b->j = bj; 1357 b->i = bi; 1358 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1359 b->diag = bdiag; 1360 b->ilen = 0; 1361 b->imax = 0; 1362 b->row = isrow; 1363 b->col = iscol; 1364 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1365 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1366 b->icol = isicol; 1367 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1368 /* In b structure: Free imax, ilen, old a, old j. 1369 Allocate bdiag, solve_work, new a, new j */ 1370 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1371 b->maxnz = b->nz = bi[n] ; 1372 (fact)->info.factor_mallocs = reallocs; 1373 (fact)->info.fill_ratio_given = f; 1374 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1375 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1376 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #include "src/mat/impls/sbaij/seq/sbaij.h" 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1383 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,MatFactorInfo *info) 1384 { 1385 Mat C = B; 1386 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1387 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1388 IS ip=b->row,iip = b->icol; 1389 PetscErrorCode ierr; 1390 const PetscInt *rip,*riip; 1391 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1392 PetscInt *ai=a->i,*aj=a->j; 1393 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1394 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1395 PetscReal zeropivot,rs,shiftnz; 1396 PetscReal shiftpd; 1397 ChShift_Ctx sctx; 1398 PetscInt newshift; 1399 PetscTruth perm_identity; 1400 1401 PetscFunctionBegin; 1402 1403 shiftnz = info->shiftnz; 1404 shiftpd = info->shiftpd; 1405 zeropivot = info->zeropivot; 1406 1407 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1408 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1409 1410 /* initialization */ 1411 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1412 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1413 jl = il + mbs; 1414 rtmp = (MatScalar*)(jl + mbs); 1415 1416 sctx.shift_amount = 0; 1417 sctx.nshift = 0; 1418 do { 1419 sctx.chshift = PETSC_FALSE; 1420 for (i=0; i<mbs; i++) { 1421 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1422 } 1423 1424 for (k = 0; k<mbs; k++){ 1425 bval = ba + bi[k]; 1426 /* initialize k-th row by the perm[k]-th row of A */ 1427 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1428 for (j = jmin; j < jmax; j++){ 1429 col = riip[aj[j]]; 1430 if (col >= k){ /* only take upper triangular entry */ 1431 rtmp[col] = aa[j]; 1432 *bval++ = 0.0; /* for in-place factorization */ 1433 } 1434 } 1435 /* shift the diagonal of the matrix */ 1436 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1437 1438 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1439 dk = rtmp[k]; 1440 i = jl[k]; /* first row to be added to k_th row */ 1441 1442 while (i < k){ 1443 nexti = jl[i]; /* next row to be added to k_th row */ 1444 1445 /* compute multiplier, update diag(k) and U(i,k) */ 1446 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1447 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1448 dk += uikdi*ba[ili]; 1449 ba[ili] = uikdi; /* -U(i,k) */ 1450 1451 /* add multiple of row i to k-th row */ 1452 jmin = ili + 1; jmax = bi[i+1]; 1453 if (jmin < jmax){ 1454 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1455 /* update il and jl for row i */ 1456 il[i] = jmin; 1457 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1458 } 1459 i = nexti; 1460 } 1461 1462 /* shift the diagonals when zero pivot is detected */ 1463 /* compute rs=sum of abs(off-diagonal) */ 1464 rs = 0.0; 1465 jmin = bi[k]+1; 1466 nz = bi[k+1] - jmin; 1467 bcol = bj + jmin; 1468 while (nz--){ 1469 rs += PetscAbsScalar(rtmp[*bcol]); 1470 bcol++; 1471 } 1472 1473 sctx.rs = rs; 1474 sctx.pv = dk; 1475 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1476 1477 if (newshift == 1) { 1478 if (!sctx.shift_amount) { 1479 sctx.shift_amount = 1e-5; 1480 } 1481 break; 1482 } 1483 1484 /* copy data into U(k,:) */ 1485 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1486 jmin = bi[k]+1; jmax = bi[k+1]; 1487 if (jmin < jmax) { 1488 for (j=jmin; j<jmax; j++){ 1489 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1490 } 1491 /* add the k-th row into il and jl */ 1492 il[k] = jmin; 1493 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1494 } 1495 } 1496 } while (sctx.chshift); 1497 ierr = PetscFree(il);CHKERRQ(ierr); 1498 1499 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1500 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1501 1502 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1503 if (perm_identity){ 1504 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1505 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1506 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1507 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1508 } else { 1509 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1510 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1511 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1512 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1513 } 1514 1515 C->assembled = PETSC_TRUE; 1516 C->preallocated = PETSC_TRUE; 1517 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1518 if (sctx.nshift){ 1519 if (shiftnz) { 1520 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1521 } else if (shiftpd) { 1522 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1523 } 1524 } 1525 PetscFunctionReturn(0); 1526 } 1527 1528 #undef __FUNCT__ 1529 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1530 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,MatFactorInfo *info) 1531 { 1532 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1533 Mat_SeqSBAIJ *b; 1534 PetscErrorCode ierr; 1535 PetscTruth perm_identity,missing; 1536 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1537 const PetscInt *rip,*riip; 1538 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1539 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1540 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1541 PetscReal fill=info->fill,levels=info->levels; 1542 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1543 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1544 PetscBT lnkbt; 1545 IS iperm; 1546 1547 PetscFunctionBegin; 1548 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1549 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1550 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1551 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1552 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1553 1554 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1555 ui[0] = 0; 1556 1557 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1558 if (!levels && perm_identity) { 1559 1560 for (i=0; i<am; i++) { 1561 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1562 } 1563 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1564 cols = uj; 1565 for (i=0; i<am; i++) { 1566 aj = a->j + a->diag[i]; 1567 ncols = ui[i+1] - ui[i]; 1568 for (j=0; j<ncols; j++) *cols++ = *aj++; 1569 } 1570 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1571 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1572 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1573 1574 /* initialization */ 1575 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1576 1577 /* jl: linked list for storing indices of the pivot rows 1578 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1579 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1580 il = jl + am; 1581 uj_ptr = (PetscInt**)(il + am); 1582 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1583 for (i=0; i<am; i++){ 1584 jl[i] = am; il[i] = 0; 1585 } 1586 1587 /* create and initialize a linked list for storing column indices of the active row k */ 1588 nlnk = am + 1; 1589 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1590 1591 /* initial FreeSpace size is fill*(ai[am]+1) */ 1592 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1593 current_space = free_space; 1594 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1595 current_space_lvl = free_space_lvl; 1596 1597 for (k=0; k<am; k++){ /* for each active row k */ 1598 /* initialize lnk by the column indices of row rip[k] of A */ 1599 nzk = 0; 1600 ncols = ai[rip[k]+1] - ai[rip[k]]; 1601 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1602 ncols_upper = 0; 1603 for (j=0; j<ncols; j++){ 1604 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1605 if (riip[i] >= k){ /* only take upper triangular entry */ 1606 ajtmp[ncols_upper] = i; 1607 ncols_upper++; 1608 } 1609 } 1610 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1611 nzk += nlnk; 1612 1613 /* update lnk by computing fill-in for each pivot row to be merged in */ 1614 prow = jl[k]; /* 1st pivot row */ 1615 1616 while (prow < k){ 1617 nextprow = jl[prow]; 1618 1619 /* merge prow into k-th row */ 1620 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1621 jmax = ui[prow+1]; 1622 ncols = jmax-jmin; 1623 i = jmin - ui[prow]; 1624 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1625 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1626 j = *(uj - 1); 1627 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1628 nzk += nlnk; 1629 1630 /* update il and jl for prow */ 1631 if (jmin < jmax){ 1632 il[prow] = jmin; 1633 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1634 } 1635 prow = nextprow; 1636 } 1637 1638 /* if free space is not available, make more free space */ 1639 if (current_space->local_remaining<nzk) { 1640 i = am - k + 1; /* num of unfactored rows */ 1641 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1642 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1643 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1644 reallocs++; 1645 } 1646 1647 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1648 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1649 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1650 1651 /* add the k-th row into il and jl */ 1652 if (nzk > 1){ 1653 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1654 jl[k] = jl[i]; jl[i] = k; 1655 il[k] = ui[k] + 1; 1656 } 1657 uj_ptr[k] = current_space->array; 1658 uj_lvl_ptr[k] = current_space_lvl->array; 1659 1660 current_space->array += nzk; 1661 current_space->local_used += nzk; 1662 current_space->local_remaining -= nzk; 1663 1664 current_space_lvl->array += nzk; 1665 current_space_lvl->local_used += nzk; 1666 current_space_lvl->local_remaining -= nzk; 1667 1668 ui[k+1] = ui[k] + nzk; 1669 } 1670 1671 #if defined(PETSC_USE_INFO) 1672 if (ai[am] != 0) { 1673 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1674 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1675 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1676 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1677 } else { 1678 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1679 } 1680 #endif 1681 1682 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1683 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1684 ierr = PetscFree(jl);CHKERRQ(ierr); 1685 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1686 1687 /* destroy list of free space and other temporary array(s) */ 1688 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1689 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1690 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1691 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1692 1693 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1694 1695 /* put together the new matrix in MATSEQSBAIJ format */ 1696 1697 b = (Mat_SeqSBAIJ*)(fact)->data; 1698 b->singlemalloc = PETSC_FALSE; 1699 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1700 b->j = uj; 1701 b->i = ui; 1702 b->diag = 0; 1703 b->ilen = 0; 1704 b->imax = 0; 1705 b->row = perm; 1706 b->col = perm; 1707 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1708 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1709 b->icol = iperm; 1710 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1711 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1712 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1713 b->maxnz = b->nz = ui[am]; 1714 b->free_a = PETSC_TRUE; 1715 b->free_ij = PETSC_TRUE; 1716 1717 (fact)->info.factor_mallocs = reallocs; 1718 (fact)->info.fill_ratio_given = fill; 1719 if (ai[am] != 0) { 1720 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1721 } else { 1722 (fact)->info.fill_ratio_needed = 0.0; 1723 } 1724 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1725 PetscFunctionReturn(0); 1726 } 1727 1728 #undef __FUNCT__ 1729 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1730 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,MatFactorInfo *info) 1731 { 1732 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1733 Mat_SeqSBAIJ *b; 1734 PetscErrorCode ierr; 1735 PetscTruth perm_identity; 1736 PetscReal fill = info->fill; 1737 const PetscInt *rip,*riip; 1738 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1739 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1740 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1741 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1742 PetscBT lnkbt; 1743 IS iperm; 1744 1745 PetscFunctionBegin; 1746 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1747 /* check whether perm is the identity mapping */ 1748 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1749 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1750 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1751 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1752 1753 /* initialization */ 1754 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1755 ui[0] = 0; 1756 1757 /* jl: linked list for storing indices of the pivot rows 1758 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1759 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1760 il = jl + am; 1761 cols = il + am; 1762 ui_ptr = (PetscInt**)(cols + am); 1763 for (i=0; i<am; i++){ 1764 jl[i] = am; il[i] = 0; 1765 } 1766 1767 /* create and initialize a linked list for storing column indices of the active row k */ 1768 nlnk = am + 1; 1769 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1770 1771 /* initial FreeSpace size is fill*(ai[am]+1) */ 1772 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1773 current_space = free_space; 1774 1775 for (k=0; k<am; k++){ /* for each active row k */ 1776 /* initialize lnk by the column indices of row rip[k] of A */ 1777 nzk = 0; 1778 ncols = ai[rip[k]+1] - ai[rip[k]]; 1779 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1780 ncols_upper = 0; 1781 for (j=0; j<ncols; j++){ 1782 i = riip[*(aj + ai[rip[k]] + j)]; 1783 if (i >= k){ /* only take upper triangular entry */ 1784 cols[ncols_upper] = i; 1785 ncols_upper++; 1786 } 1787 } 1788 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1789 nzk += nlnk; 1790 1791 /* update lnk by computing fill-in for each pivot row to be merged in */ 1792 prow = jl[k]; /* 1st pivot row */ 1793 1794 while (prow < k){ 1795 nextprow = jl[prow]; 1796 /* merge prow into k-th row */ 1797 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1798 jmax = ui[prow+1]; 1799 ncols = jmax-jmin; 1800 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1801 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1802 nzk += nlnk; 1803 1804 /* update il and jl for prow */ 1805 if (jmin < jmax){ 1806 il[prow] = jmin; 1807 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1808 } 1809 prow = nextprow; 1810 } 1811 1812 /* if free space is not available, make more free space */ 1813 if (current_space->local_remaining<nzk) { 1814 i = am - k + 1; /* num of unfactored rows */ 1815 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1816 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1817 reallocs++; 1818 } 1819 1820 /* copy data into free space, then initialize lnk */ 1821 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1822 1823 /* add the k-th row into il and jl */ 1824 if (nzk-1 > 0){ 1825 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1826 jl[k] = jl[i]; jl[i] = k; 1827 il[k] = ui[k] + 1; 1828 } 1829 ui_ptr[k] = current_space->array; 1830 current_space->array += nzk; 1831 current_space->local_used += nzk; 1832 current_space->local_remaining -= nzk; 1833 1834 ui[k+1] = ui[k] + nzk; 1835 } 1836 1837 #if defined(PETSC_USE_INFO) 1838 if (ai[am] != 0) { 1839 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1840 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1841 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1842 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1843 } else { 1844 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1845 } 1846 #endif 1847 1848 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1849 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1850 ierr = PetscFree(jl);CHKERRQ(ierr); 1851 1852 /* destroy list of free space and other temporary array(s) */ 1853 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1854 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1855 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1856 1857 /* put together the new matrix in MATSEQSBAIJ format */ 1858 1859 b = (Mat_SeqSBAIJ*)(fact)->data; 1860 b->singlemalloc = PETSC_FALSE; 1861 b->free_a = PETSC_TRUE; 1862 b->free_ij = PETSC_TRUE; 1863 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1864 b->j = uj; 1865 b->i = ui; 1866 b->diag = 0; 1867 b->ilen = 0; 1868 b->imax = 0; 1869 b->row = perm; 1870 b->col = perm; 1871 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1872 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1873 b->icol = iperm; 1874 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1875 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1876 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1877 b->maxnz = b->nz = ui[am]; 1878 1879 (fact)->info.factor_mallocs = reallocs; 1880 (fact)->info.fill_ratio_given = fill; 1881 if (ai[am] != 0) { 1882 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1883 } else { 1884 (fact)->info.fill_ratio_needed = 0.0; 1885 } 1886 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1887 PetscFunctionReturn(0); 1888 } 1889