1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 const PetscInt *c,*r,*ic; 64 PetscInt i,n = A->rmap->n,*cc,*cr; 65 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 PetscInt *ordcol,*iwk,*iperm,*jw; 67 PetscInt jmax,lfill,job,*o_i,*o_j; 68 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 69 PetscReal af,dt,dtcount,dtcol,fill; 70 71 PetscFunctionBegin; 72 73 if (info->dt == PETSC_DEFAULT) dt = .005; else dt = info->dt; 74 if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); else dtcount = info->dtcount; 75 if (info->dtcol == PETSC_DEFAULT) dtcol = .01; else dtcol = info->dtcol; 76 if (info->fill == PETSC_DEFAULT) fill = ((double)(n*(info->dtcount+1)))/a->nz; else fill = info->fill; 77 lfill = (PetscInt)(dtcount/2.0); 78 jmax = (PetscInt)(fill*a->nz); 79 80 81 /* ------------------------------------------------------------ 82 If reorder=.TRUE., then the original matrix has to be 83 reordered to reflect the user selected ordering scheme, and 84 then de-reordered so it is in it's original format. 85 Because Saad's dperm() is NOT in place, we have to copy 86 the original matrix and allocate more storage. . . 87 ------------------------------------------------------------ 88 */ 89 90 /* set reorder to true if either isrow or iscol is not identity */ 91 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 92 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 93 reorder = PetscNot(reorder); 94 95 96 /* storage for ilu factor */ 97 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 100 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 101 102 /* ------------------------------------------------------------ 103 Make sure that everything is Fortran formatted (1-Based) 104 ------------------------------------------------------------ 105 */ 106 for (i=old_i[0];i<old_i[n];i++) { 107 old_j[i]++; 108 } 109 for(i=0;i<n+1;i++) { 110 old_i[i]++; 111 }; 112 113 114 if (reorder) { 115 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 116 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 117 ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 cr[i] = r[i]+1; 120 cc[i] = c[i]+1; 121 } 122 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 123 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 124 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job); 128 ierr = PetscFree2(cc,cr);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 222 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = MAT_FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 b->free_a = PETSC_TRUE; 230 b->free_ij = PETSC_TRUE; 231 b->singlemalloc = PETSC_FALSE; 232 b->a = new_a; 233 b->j = new_j; 234 b->i = new_i; 235 b->ilen = 0; 236 b->imax = 0; 237 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238 b->row = isirow; 239 b->col = iscolf; 240 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 241 b->maxnz = b->nz = new_i[n]; 242 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 243 (*fact)->info.factor_mallocs = 0; 244 245 af = ((double)b->nz)/((double)a->nz) + .001; 246 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr); 247 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 249 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 250 251 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 252 253 PetscFunctionReturn(0); 254 #endif 255 } 256 257 EXTERN_C_BEGIN 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 260 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 261 { 262 PetscFunctionBegin; 263 *flg = PETSC_TRUE; 264 PetscFunctionReturn(0); 265 } 266 EXTERN_C_END 267 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 271 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 272 { 273 PetscInt n = A->rmap->n; 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 278 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 279 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 280 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 281 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 282 (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ; 283 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 284 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 285 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 286 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 287 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 288 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 289 (*B)->factor = ftype; 290 PetscFunctionReturn(0); 291 } 292 EXTERN_C_END 293 294 #undef __FUNCT__ 295 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 296 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 297 { 298 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 299 IS isicol; 300 PetscErrorCode ierr; 301 const PetscInt *r,*ic; 302 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 303 PetscInt *bi,*bj,*ajtmp; 304 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 305 PetscReal f; 306 PetscInt nlnk,*lnk,k,**bi_ptr; 307 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 308 PetscBT lnkbt; 309 310 PetscFunctionBegin; 311 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 312 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 313 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 314 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 315 316 /* get new row pointers */ 317 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 318 bi[0] = 0; 319 320 /* bdiag is location of diagonal in factor */ 321 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 322 bdiag[0] = 0; 323 324 /* linked list for storing column indices of the active row */ 325 nlnk = n + 1; 326 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 327 328 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 329 330 /* initial FreeSpace size is f*(ai[n]+1) */ 331 f = info->fill; 332 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 333 current_space = free_space; 334 335 for (i=0; i<n; i++) { 336 /* copy previous fill into linked list */ 337 nzi = 0; 338 nnz = ai[r[i]+1] - ai[r[i]]; 339 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 340 ajtmp = aj + ai[r[i]]; 341 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 342 nzi += nlnk; 343 344 /* add pivot rows into linked list */ 345 row = lnk[n]; 346 while (row < i) { 347 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 348 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 349 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 350 nzi += nlnk; 351 row = lnk[row]; 352 } 353 bi[i+1] = bi[i] + nzi; 354 im[i] = nzi; 355 356 /* mark bdiag */ 357 nzbd = 0; 358 nnz = nzi; 359 k = lnk[n]; 360 while (nnz-- && k < i){ 361 nzbd++; 362 k = lnk[k]; 363 } 364 bdiag[i] = bi[i] + nzbd; 365 366 /* if free space is not available, make more free space */ 367 if (current_space->local_remaining<nzi) { 368 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 369 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 370 reallocs++; 371 } 372 373 /* copy data into free space, then initialize lnk */ 374 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 375 bi_ptr[i] = current_space->array; 376 current_space->array += nzi; 377 current_space->local_used += nzi; 378 current_space->local_remaining -= nzi; 379 } 380 #if defined(PETSC_USE_INFO) 381 if (ai[n] != 0) { 382 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 383 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 384 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 385 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 386 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 387 } else { 388 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 389 } 390 #endif 391 392 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 393 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 394 395 /* destroy list of free space and other temporary array(s) */ 396 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 397 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 398 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 399 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 400 401 /* put together the new matrix */ 402 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 403 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 404 b = (Mat_SeqAIJ*)(B)->data; 405 b->free_a = PETSC_TRUE; 406 b->free_ij = PETSC_TRUE; 407 b->singlemalloc = PETSC_FALSE; 408 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 409 b->j = bj; 410 b->i = bi; 411 b->diag = bdiag; 412 b->ilen = 0; 413 b->imax = 0; 414 b->row = isrow; 415 b->col = iscol; 416 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 417 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 418 b->icol = isicol; 419 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 420 421 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 422 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 423 b->maxnz = b->nz = bi[n] ; 424 425 (B)->factor = MAT_FACTOR_LU; 426 (B)->info.factor_mallocs = reallocs; 427 (B)->info.fill_ratio_given = f; 428 429 if (ai[n] != 0) { 430 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 431 } else { 432 (B)->info.fill_ratio_needed = 0.0; 433 } 434 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 435 (B)->ops->solve = MatSolve_SeqAIJ; 436 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 437 /* switch to inodes if appropriate */ 438 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 /* 443 Trouble in factorization, should we dump the original matrix? 444 */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatFactorDumpMatrix" 447 PetscErrorCode MatFactorDumpMatrix(Mat A) 448 { 449 PetscErrorCode ierr; 450 PetscTruth flg; 451 452 PetscFunctionBegin; 453 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 454 if (flg) { 455 PetscViewer viewer; 456 char filename[PETSC_MAX_PATH_LEN]; 457 458 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 459 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 460 ierr = MatView(A,viewer);CHKERRQ(ierr); 461 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 462 } 463 PetscFunctionReturn(0); 464 } 465 466 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 467 468 /* ----------------------------------------------------------- */ 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 471 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 472 { 473 Mat C=B; 474 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 475 IS isrow = b->row,isicol = b->icol; 476 PetscErrorCode ierr; 477 const PetscInt *r,*ic,*ics; 478 PetscInt i,j,n=A->rmap->n,*bi=b->i,*bj=b->j; 479 PetscInt *ajtmp,*bjtmp,nz,row; 480 PetscInt *diag_offset = b->diag,diag,*pj; 481 PetscScalar *rtmp,*pc,multiplier,*rtmps; 482 MatScalar *v,*pv; 483 PetscScalar d; 484 PetscReal rs; 485 LUShift_Ctx sctx; 486 PetscInt newshift,*ddiag; 487 488 PetscFunctionBegin; 489 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 490 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 491 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 492 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 493 rtmps = rtmp; ics = ic; 494 495 sctx.shift_top = 0; 496 sctx.nshift_max = 0; 497 sctx.shift_lo = 0; 498 sctx.shift_hi = 0; 499 sctx.shift_fraction = 0; 500 501 /* if both shift schemes are chosen by user, only use info->shiftpd */ 502 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 503 PetscInt *aai = a->i; 504 ddiag = a->diag; 505 sctx.shift_top = 0; 506 for (i=0; i<n; i++) { 507 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 508 d = (a->a)[ddiag[i]]; 509 rs = -PetscAbsScalar(d) - PetscRealPart(d); 510 v = a->a+aai[i]; 511 nz = aai[i+1] - aai[i]; 512 for (j=0; j<nz; j++) 513 rs += PetscAbsScalar(v[j]); 514 if (rs>sctx.shift_top) sctx.shift_top = rs; 515 } 516 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 517 sctx.shift_top *= 1.1; 518 sctx.nshift_max = 5; 519 sctx.shift_lo = 0.; 520 sctx.shift_hi = 1.; 521 } 522 523 sctx.shift_amount = 0; 524 sctx.nshift = 0; 525 do { 526 sctx.lushift = PETSC_FALSE; 527 for (i=0; i<n; i++){ 528 nz = bi[i+1] - bi[i]; 529 bjtmp = bj + bi[i]; 530 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 531 532 /* load in initial (unfactored row) */ 533 nz = a->i[r[i]+1] - a->i[r[i]]; 534 ajtmp = a->j + a->i[r[i]]; 535 v = a->a + a->i[r[i]]; 536 for (j=0; j<nz; j++) { 537 rtmp[ics[ajtmp[j]]] = v[j]; 538 } 539 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 540 541 row = *bjtmp++; 542 while (row < i) { 543 pc = rtmp + row; 544 if (*pc != 0.0) { 545 pv = b->a + diag_offset[row]; 546 pj = b->j + diag_offset[row] + 1; 547 multiplier = *pc / *pv++; 548 *pc = multiplier; 549 nz = bi[row+1] - diag_offset[row] - 1; 550 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 551 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 552 } 553 row = *bjtmp++; 554 } 555 /* finished row so stick it into b->a */ 556 pv = b->a + bi[i] ; 557 pj = b->j + bi[i] ; 558 nz = bi[i+1] - bi[i]; 559 diag = diag_offset[i] - bi[i]; 560 rs = 0.0; 561 for (j=0; j<nz; j++) { 562 pv[j] = rtmps[pj[j]]; 563 if (j != diag) rs += PetscAbsScalar(pv[j]); 564 } 565 566 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 567 sctx.rs = rs; 568 sctx.pv = pv[diag]; 569 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 570 if (newshift == 1) break; 571 } 572 573 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 574 /* 575 * if no shift in this attempt & shifting & started shifting & can refine, 576 * then try lower shift 577 */ 578 sctx.shift_hi = sctx.shift_fraction; 579 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 580 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 581 sctx.lushift = PETSC_TRUE; 582 sctx.nshift++; 583 } 584 } while (sctx.lushift); 585 586 /* invert diagonal entries for simplier triangular solves */ 587 for (i=0; i<n; i++) { 588 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 589 } 590 ierr = PetscFree(rtmp);CHKERRQ(ierr); 591 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 592 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 593 if (b->inode.use) { 594 C->ops->solve = MatSolve_Inode; 595 } else { 596 C->ops->solve = MatSolve_SeqAIJ; 597 } 598 C->ops->solveadd = MatSolveAdd_SeqAIJ; 599 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 600 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 601 C->ops->matsolve = MatMatSolve_SeqAIJ; 602 C->assembled = PETSC_TRUE; 603 C->preallocated = PETSC_TRUE; 604 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 605 if (sctx.nshift){ 606 if (info->shiftpd) { 607 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 608 } else if (info->shiftnz) { 609 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 610 } 611 } 612 PetscFunctionReturn(0); 613 } 614 615 /* 616 This routine implements inplace ILU(0) with row or/and column permutations. 617 Input: 618 A - original matrix 619 Output; 620 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 621 a->j (col index) is permuted by the inverse of colperm, then sorted 622 a->a reordered accordingly with a->j 623 a->diag (ptr to diagonal elements) is updated. 624 */ 625 #undef __FUNCT__ 626 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 627 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 628 { 629 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 630 IS isrow = a->row,isicol = a->icol; 631 PetscErrorCode ierr; 632 const PetscInt *r,*ic,*ics; 633 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 634 PetscInt *ajtmp,nz,row; 635 PetscInt *diag = a->diag,nbdiag,*pj; 636 PetscScalar *rtmp,*pc,multiplier,d; 637 MatScalar *v,*pv; 638 PetscReal rs; 639 LUShift_Ctx sctx; 640 PetscInt newshift; 641 642 PetscFunctionBegin; 643 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 644 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 645 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 646 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 647 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 648 ics = ic; 649 650 sctx.shift_top = 0; 651 sctx.nshift_max = 0; 652 sctx.shift_lo = 0; 653 sctx.shift_hi = 0; 654 sctx.shift_fraction = 0; 655 656 /* if both shift schemes are chosen by user, only use info->shiftpd */ 657 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 658 sctx.shift_top = 0; 659 for (i=0; i<n; i++) { 660 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 661 d = (a->a)[diag[i]]; 662 rs = -PetscAbsScalar(d) - PetscRealPart(d); 663 v = a->a+ai[i]; 664 nz = ai[i+1] - ai[i]; 665 for (j=0; j<nz; j++) 666 rs += PetscAbsScalar(v[j]); 667 if (rs>sctx.shift_top) sctx.shift_top = rs; 668 } 669 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 670 sctx.shift_top *= 1.1; 671 sctx.nshift_max = 5; 672 sctx.shift_lo = 0.; 673 sctx.shift_hi = 1.; 674 } 675 676 sctx.shift_amount = 0; 677 sctx.nshift = 0; 678 do { 679 sctx.lushift = PETSC_FALSE; 680 for (i=0; i<n; i++){ 681 /* load in initial unfactored row */ 682 nz = ai[r[i]+1] - ai[r[i]]; 683 ajtmp = aj + ai[r[i]]; 684 v = a->a + ai[r[i]]; 685 /* sort permuted ajtmp and values v accordingly */ 686 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 687 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 688 689 diag[r[i]] = ai[r[i]]; 690 for (j=0; j<nz; j++) { 691 rtmp[ajtmp[j]] = v[j]; 692 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 693 } 694 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 695 696 row = *ajtmp++; 697 while (row < i) { 698 pc = rtmp + row; 699 if (*pc != 0.0) { 700 pv = a->a + diag[r[row]]; 701 pj = aj + diag[r[row]] + 1; 702 703 multiplier = *pc / *pv++; 704 *pc = multiplier; 705 nz = ai[r[row]+1] - diag[r[row]] - 1; 706 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 707 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 708 } 709 row = *ajtmp++; 710 } 711 /* finished row so overwrite it onto a->a */ 712 pv = a->a + ai[r[i]] ; 713 pj = aj + ai[r[i]] ; 714 nz = ai[r[i]+1] - ai[r[i]]; 715 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 716 717 rs = 0.0; 718 for (j=0; j<nz; j++) { 719 pv[j] = rtmp[pj[j]]; 720 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 721 } 722 723 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 724 sctx.rs = rs; 725 sctx.pv = pv[nbdiag]; 726 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 727 if (newshift == 1) break; 728 } 729 730 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 731 /* 732 * if no shift in this attempt & shifting & started shifting & can refine, 733 * then try lower shift 734 */ 735 sctx.shift_hi = sctx.shift_fraction; 736 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 737 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 738 sctx.lushift = PETSC_TRUE; 739 sctx.nshift++; 740 } 741 } while (sctx.lushift); 742 743 /* invert diagonal entries for simplier triangular solves */ 744 for (i=0; i<n; i++) { 745 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 746 } 747 748 ierr = PetscFree(rtmp);CHKERRQ(ierr); 749 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 750 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 751 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 752 A->ops->solveadd = MatSolveAdd_SeqAIJ; 753 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 754 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 755 A->assembled = PETSC_TRUE; 756 A->preallocated = PETSC_TRUE; 757 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 758 if (sctx.nshift){ 759 if (info->shiftpd) { 760 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 761 } else if (info->shiftnz) { 762 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 763 } 764 } 765 PetscFunctionReturn(0); 766 } 767 768 /* ----------------------------------------------------------- */ 769 #undef __FUNCT__ 770 #define __FUNCT__ "MatLUFactor_SeqAIJ" 771 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 772 { 773 PetscErrorCode ierr; 774 Mat C; 775 776 PetscFunctionBegin; 777 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 778 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 779 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 780 A->ops->solve = C->ops->solve; 781 A->ops->solvetranspose = C->ops->solvetranspose; 782 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 783 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 784 PetscFunctionReturn(0); 785 } 786 /* ----------------------------------------------------------- */ 787 #undef __FUNCT__ 788 #define __FUNCT__ "MatSolve_SeqAIJ" 789 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 790 { 791 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 792 IS iscol = a->col,isrow = a->row; 793 PetscErrorCode ierr; 794 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 795 PetscInt nz; 796 const PetscInt *rout,*cout,*r,*c; 797 PetscScalar *x,*tmp,*tmps,sum; 798 const PetscScalar *b; 799 const MatScalar *aa = a->a,*v; 800 801 PetscFunctionBegin; 802 if (!n) PetscFunctionReturn(0); 803 804 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 805 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 806 tmp = a->solve_work; 807 808 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 809 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 810 811 /* forward solve the lower triangular */ 812 tmp[0] = b[*r++]; 813 tmps = tmp; 814 for (i=1; i<n; i++) { 815 v = aa + ai[i] ; 816 vi = aj + ai[i] ; 817 nz = a->diag[i] - ai[i]; 818 sum = b[*r++]; 819 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 820 tmp[i] = sum; 821 } 822 823 /* backward solve the upper triangular */ 824 for (i=n-1; i>=0; i--){ 825 v = aa + a->diag[i] + 1; 826 vi = aj + a->diag[i] + 1; 827 nz = ai[i+1] - a->diag[i] - 1; 828 sum = tmp[i]; 829 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 830 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 831 } 832 833 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 834 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 835 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 836 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 837 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 838 PetscFunctionReturn(0); 839 } 840 841 #undef __FUNCT__ 842 #define __FUNCT__ "MatMatSolve_SeqAIJ" 843 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 844 { 845 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 846 IS iscol = a->col,isrow = a->row; 847 PetscErrorCode ierr; 848 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 849 PetscInt nz,neq; 850 const PetscInt *rout,*cout,*r,*c; 851 PetscScalar *x,*b,*tmp,*tmps,sum; 852 const MatScalar *aa = a->a,*v; 853 PetscTruth bisdense,xisdense; 854 855 PetscFunctionBegin; 856 if (!n) PetscFunctionReturn(0); 857 858 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 859 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 860 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 861 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 862 863 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 864 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 865 866 tmp = a->solve_work; 867 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 868 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 869 870 for (neq=0; neq<B->cmap->n; neq++){ 871 /* forward solve the lower triangular */ 872 tmp[0] = b[r[0]]; 873 tmps = tmp; 874 for (i=1; i<n; i++) { 875 v = aa + ai[i] ; 876 vi = aj + ai[i] ; 877 nz = a->diag[i] - ai[i]; 878 sum = b[r[i]]; 879 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 880 tmp[i] = sum; 881 } 882 /* backward solve the upper triangular */ 883 for (i=n-1; i>=0; i--){ 884 v = aa + a->diag[i] + 1; 885 vi = aj + a->diag[i] + 1; 886 nz = ai[i+1] - a->diag[i] - 1; 887 sum = tmp[i]; 888 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 889 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 890 } 891 892 b += n; 893 x += n; 894 } 895 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 896 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 897 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 898 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 899 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 905 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 906 { 907 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 908 IS iscol = a->col,isrow = a->row; 909 PetscErrorCode ierr; 910 const PetscInt *r,*c,*rout,*cout; 911 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 912 PetscInt nz,row; 913 PetscScalar *x,*b,*tmp,*tmps,sum; 914 const MatScalar *aa = a->a,*v; 915 916 PetscFunctionBegin; 917 if (!n) PetscFunctionReturn(0); 918 919 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 920 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 921 tmp = a->solve_work; 922 923 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 924 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 925 926 /* forward solve the lower triangular */ 927 tmp[0] = b[*r++]; 928 tmps = tmp; 929 for (row=1; row<n; row++) { 930 i = rout[row]; /* permuted row */ 931 v = aa + ai[i] ; 932 vi = aj + ai[i] ; 933 nz = a->diag[i] - ai[i]; 934 sum = b[*r++]; 935 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 936 tmp[row] = sum; 937 } 938 939 /* backward solve the upper triangular */ 940 for (row=n-1; row>=0; row--){ 941 i = rout[row]; /* permuted row */ 942 v = aa + a->diag[i] + 1; 943 vi = aj + a->diag[i] + 1; 944 nz = ai[i+1] - a->diag[i] - 1; 945 sum = tmp[row]; 946 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 947 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 948 } 949 950 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 951 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 952 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 953 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 954 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 /* ----------------------------------------------------------- */ 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 961 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 962 { 963 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 964 PetscErrorCode ierr; 965 PetscInt n = A->rmap->n; 966 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 967 PetscScalar *x; 968 const PetscScalar *b; 969 const MatScalar *aa = a->a; 970 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 971 PetscInt adiag_i,i,nz,ai_i; 972 const MatScalar *v; 973 PetscScalar sum; 974 #endif 975 976 PetscFunctionBegin; 977 if (!n) PetscFunctionReturn(0); 978 979 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 980 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 981 982 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 983 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 984 #else 985 /* forward solve the lower triangular */ 986 x[0] = b[0]; 987 for (i=1; i<n; i++) { 988 ai_i = ai[i]; 989 v = aa + ai_i; 990 vi = aj + ai_i; 991 nz = adiag[i] - ai_i; 992 sum = b[i]; 993 while (nz--) sum -= *v++ * x[*vi++]; 994 x[i] = sum; 995 } 996 997 /* backward solve the upper triangular */ 998 for (i=n-1; i>=0; i--){ 999 adiag_i = adiag[i]; 1000 v = aa + adiag_i + 1; 1001 vi = aj + adiag_i + 1; 1002 nz = ai[i+1] - adiag_i - 1; 1003 sum = x[i]; 1004 while (nz--) sum -= *v++ * x[*vi++]; 1005 x[i] = sum*aa[adiag_i]; 1006 } 1007 #endif 1008 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 1009 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1010 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1011 PetscFunctionReturn(0); 1012 } 1013 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1016 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1017 { 1018 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1019 IS iscol = a->col,isrow = a->row; 1020 PetscErrorCode ierr; 1021 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1022 PetscInt nz; 1023 const PetscInt *rout,*cout,*r,*c; 1024 PetscScalar *x,*b,*tmp,sum; 1025 const MatScalar *aa = a->a,*v; 1026 1027 PetscFunctionBegin; 1028 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1029 1030 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1031 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1032 tmp = a->solve_work; 1033 1034 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1035 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1036 1037 /* forward solve the lower triangular */ 1038 tmp[0] = b[*r++]; 1039 for (i=1; i<n; i++) { 1040 v = aa + ai[i] ; 1041 vi = aj + ai[i] ; 1042 nz = a->diag[i] - ai[i]; 1043 sum = b[*r++]; 1044 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1045 tmp[i] = sum; 1046 } 1047 1048 /* backward solve the upper triangular */ 1049 for (i=n-1; i>=0; i--){ 1050 v = aa + a->diag[i] + 1; 1051 vi = aj + a->diag[i] + 1; 1052 nz = ai[i+1] - a->diag[i] - 1; 1053 sum = tmp[i]; 1054 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1055 tmp[i] = sum*aa[a->diag[i]]; 1056 x[*c--] += tmp[i]; 1057 } 1058 1059 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1060 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1061 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1062 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1063 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1064 1065 PetscFunctionReturn(0); 1066 } 1067 /* -------------------------------------------------------------------*/ 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1070 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1071 { 1072 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1073 IS iscol = a->col,isrow = a->row; 1074 PetscErrorCode ierr; 1075 const PetscInt *rout,*cout,*r,*c; 1076 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1077 PetscInt nz,*diag = a->diag; 1078 PetscScalar *x,*b,*tmp,s1; 1079 const MatScalar *aa = a->a,*v; 1080 1081 PetscFunctionBegin; 1082 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1083 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1084 tmp = a->solve_work; 1085 1086 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1087 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1088 1089 /* copy the b into temp work space according to permutation */ 1090 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1091 1092 /* forward solve the U^T */ 1093 for (i=0; i<n; i++) { 1094 v = aa + diag[i] ; 1095 vi = aj + diag[i] + 1; 1096 nz = ai[i+1] - diag[i] - 1; 1097 s1 = tmp[i]; 1098 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1099 while (nz--) { 1100 tmp[*vi++ ] -= (*v++)*s1; 1101 } 1102 tmp[i] = s1; 1103 } 1104 1105 /* backward solve the L^T */ 1106 for (i=n-1; i>=0; i--){ 1107 v = aa + diag[i] - 1 ; 1108 vi = aj + diag[i] - 1 ; 1109 nz = diag[i] - ai[i]; 1110 s1 = tmp[i]; 1111 while (nz--) { 1112 tmp[*vi-- ] -= (*v--)*s1; 1113 } 1114 } 1115 1116 /* copy tmp into x according to permutation */ 1117 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1118 1119 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1120 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1121 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1122 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1123 1124 ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 #undef __FUNCT__ 1129 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1130 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1131 { 1132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1133 IS iscol = a->col,isrow = a->row; 1134 PetscErrorCode ierr; 1135 const PetscInt *r,*c,*rout,*cout; 1136 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1137 PetscInt nz,*diag = a->diag; 1138 PetscScalar *x,*b,*tmp; 1139 const MatScalar *aa = a->a,*v; 1140 1141 PetscFunctionBegin; 1142 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1143 1144 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1145 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1146 tmp = a->solve_work; 1147 1148 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1149 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1150 1151 /* copy the b into temp work space according to permutation */ 1152 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1153 1154 /* forward solve the U^T */ 1155 for (i=0; i<n; i++) { 1156 v = aa + diag[i] ; 1157 vi = aj + diag[i] + 1; 1158 nz = ai[i+1] - diag[i] - 1; 1159 tmp[i] *= *v++; 1160 while (nz--) { 1161 tmp[*vi++ ] -= (*v++)*tmp[i]; 1162 } 1163 } 1164 1165 /* backward solve the L^T */ 1166 for (i=n-1; i>=0; i--){ 1167 v = aa + diag[i] - 1 ; 1168 vi = aj + diag[i] - 1 ; 1169 nz = diag[i] - ai[i]; 1170 while (nz--) { 1171 tmp[*vi-- ] -= (*v--)*tmp[i]; 1172 } 1173 } 1174 1175 /* copy tmp into x according to permutation */ 1176 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1177 1178 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1179 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1180 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1181 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1182 1183 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1184 PetscFunctionReturn(0); 1185 } 1186 /* ----------------------------------------------------------------*/ 1187 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1188 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption); 1189 1190 #undef __FUNCT__ 1191 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1192 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1193 { 1194 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1195 IS isicol; 1196 PetscErrorCode ierr; 1197 const PetscInt *r,*ic; 1198 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1199 PetscInt *bi,*cols,nnz,*cols_lvl; 1200 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1201 PetscInt i,levels,diagonal_fill; 1202 PetscTruth col_identity,row_identity; 1203 PetscReal f; 1204 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1205 PetscBT lnkbt; 1206 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1207 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1208 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1209 PetscTruth missing; 1210 1211 PetscFunctionBegin; 1212 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1213 f = info->fill; 1214 levels = (PetscInt)info->levels; 1215 diagonal_fill = (PetscInt)info->diagonal_fill; 1216 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1217 1218 /* special case that simply copies fill pattern */ 1219 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1220 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1221 if (!levels && row_identity && col_identity) { 1222 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr); 1223 fact->factor = MAT_FACTOR_ILU; 1224 (fact)->info.factor_mallocs = 0; 1225 (fact)->info.fill_ratio_given = info->fill; 1226 (fact)->info.fill_ratio_needed = 1.0; 1227 b = (Mat_SeqAIJ*)(fact)->data; 1228 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1229 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1230 b->row = isrow; 1231 b->col = iscol; 1232 b->icol = isicol; 1233 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1234 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1235 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1236 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1237 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1242 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1243 1244 /* get new row pointers */ 1245 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1246 bi[0] = 0; 1247 /* bdiag is location of diagonal in factor */ 1248 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1249 bdiag[0] = 0; 1250 1251 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1252 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1253 1254 /* create a linked list for storing column indices of the active row */ 1255 nlnk = n + 1; 1256 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1257 1258 /* initial FreeSpace size is f*(ai[n]+1) */ 1259 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1260 current_space = free_space; 1261 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1262 current_space_lvl = free_space_lvl; 1263 1264 for (i=0; i<n; i++) { 1265 nzi = 0; 1266 /* copy current row into linked list */ 1267 nnz = ai[r[i]+1] - ai[r[i]]; 1268 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1269 cols = aj + ai[r[i]]; 1270 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1271 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1272 nzi += nlnk; 1273 1274 /* make sure diagonal entry is included */ 1275 if (diagonal_fill && lnk[i] == -1) { 1276 fm = n; 1277 while (lnk[fm] < i) fm = lnk[fm]; 1278 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1279 lnk[fm] = i; 1280 lnk_lvl[i] = 0; 1281 nzi++; dcount++; 1282 } 1283 1284 /* add pivot rows into the active row */ 1285 nzbd = 0; 1286 prow = lnk[n]; 1287 while (prow < i) { 1288 nnz = bdiag[prow]; 1289 cols = bj_ptr[prow] + nnz + 1; 1290 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1291 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1292 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1293 nzi += nlnk; 1294 prow = lnk[prow]; 1295 nzbd++; 1296 } 1297 bdiag[i] = nzbd; 1298 bi[i+1] = bi[i] + nzi; 1299 1300 /* if free space is not available, make more free space */ 1301 if (current_space->local_remaining<nzi) { 1302 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1303 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1304 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1305 reallocs++; 1306 } 1307 1308 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1309 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1310 bj_ptr[i] = current_space->array; 1311 bjlvl_ptr[i] = current_space_lvl->array; 1312 1313 /* make sure the active row i has diagonal entry */ 1314 if (*(bj_ptr[i]+bdiag[i]) != i) { 1315 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1316 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1317 } 1318 1319 current_space->array += nzi; 1320 current_space->local_used += nzi; 1321 current_space->local_remaining -= nzi; 1322 current_space_lvl->array += nzi; 1323 current_space_lvl->local_used += nzi; 1324 current_space_lvl->local_remaining -= nzi; 1325 } 1326 1327 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1328 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1329 1330 /* destroy list of free space and other temporary arrays */ 1331 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1332 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1333 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1334 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1335 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1336 1337 #if defined(PETSC_USE_INFO) 1338 { 1339 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1340 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1341 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1342 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1343 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1344 if (diagonal_fill) { 1345 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1346 } 1347 } 1348 #endif 1349 1350 /* put together the new matrix */ 1351 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1352 b = (Mat_SeqAIJ*)(fact)->data; 1353 b->free_a = PETSC_TRUE; 1354 b->free_ij = PETSC_TRUE; 1355 b->singlemalloc = PETSC_FALSE; 1356 len = (bi[n] )*sizeof(PetscScalar); 1357 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1358 b->j = bj; 1359 b->i = bi; 1360 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1361 b->diag = bdiag; 1362 b->ilen = 0; 1363 b->imax = 0; 1364 b->row = isrow; 1365 b->col = iscol; 1366 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1367 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1368 b->icol = isicol; 1369 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1370 /* In b structure: Free imax, ilen, old a, old j. 1371 Allocate bdiag, solve_work, new a, new j */ 1372 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1373 b->maxnz = b->nz = bi[n] ; 1374 (fact)->info.factor_mallocs = reallocs; 1375 (fact)->info.fill_ratio_given = f; 1376 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1377 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1378 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1379 PetscFunctionReturn(0); 1380 } 1381 1382 #include "src/mat/impls/sbaij/seq/sbaij.h" 1383 #undef __FUNCT__ 1384 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1385 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 1386 { 1387 Mat C = B; 1388 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1389 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1390 IS ip=b->row,iip = b->icol; 1391 PetscErrorCode ierr; 1392 const PetscInt *rip,*riip; 1393 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1394 PetscInt *ai=a->i,*aj=a->j; 1395 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1396 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1397 PetscReal zeropivot,rs,shiftnz; 1398 PetscReal shiftpd; 1399 ChShift_Ctx sctx; 1400 PetscInt newshift; 1401 PetscTruth perm_identity; 1402 1403 PetscFunctionBegin; 1404 1405 shiftnz = info->shiftnz; 1406 shiftpd = info->shiftpd; 1407 zeropivot = info->zeropivot; 1408 1409 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1410 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1411 1412 /* initialization */ 1413 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1414 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1415 jl = il + mbs; 1416 rtmp = (MatScalar*)(jl + mbs); 1417 1418 sctx.shift_amount = 0; 1419 sctx.nshift = 0; 1420 do { 1421 sctx.chshift = PETSC_FALSE; 1422 for (i=0; i<mbs; i++) { 1423 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1424 } 1425 1426 for (k = 0; k<mbs; k++){ 1427 bval = ba + bi[k]; 1428 /* initialize k-th row by the perm[k]-th row of A */ 1429 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1430 for (j = jmin; j < jmax; j++){ 1431 col = riip[aj[j]]; 1432 if (col >= k){ /* only take upper triangular entry */ 1433 rtmp[col] = aa[j]; 1434 *bval++ = 0.0; /* for in-place factorization */ 1435 } 1436 } 1437 /* shift the diagonal of the matrix */ 1438 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1439 1440 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1441 dk = rtmp[k]; 1442 i = jl[k]; /* first row to be added to k_th row */ 1443 1444 while (i < k){ 1445 nexti = jl[i]; /* next row to be added to k_th row */ 1446 1447 /* compute multiplier, update diag(k) and U(i,k) */ 1448 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1449 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1450 dk += uikdi*ba[ili]; 1451 ba[ili] = uikdi; /* -U(i,k) */ 1452 1453 /* add multiple of row i to k-th row */ 1454 jmin = ili + 1; jmax = bi[i+1]; 1455 if (jmin < jmax){ 1456 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1457 /* update il and jl for row i */ 1458 il[i] = jmin; 1459 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1460 } 1461 i = nexti; 1462 } 1463 1464 /* shift the diagonals when zero pivot is detected */ 1465 /* compute rs=sum of abs(off-diagonal) */ 1466 rs = 0.0; 1467 jmin = bi[k]+1; 1468 nz = bi[k+1] - jmin; 1469 bcol = bj + jmin; 1470 while (nz--){ 1471 rs += PetscAbsScalar(rtmp[*bcol]); 1472 bcol++; 1473 } 1474 1475 sctx.rs = rs; 1476 sctx.pv = dk; 1477 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1478 1479 if (newshift == 1) { 1480 if (!sctx.shift_amount) { 1481 sctx.shift_amount = 1e-5; 1482 } 1483 break; 1484 } 1485 1486 /* copy data into U(k,:) */ 1487 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1488 jmin = bi[k]+1; jmax = bi[k+1]; 1489 if (jmin < jmax) { 1490 for (j=jmin; j<jmax; j++){ 1491 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1492 } 1493 /* add the k-th row into il and jl */ 1494 il[k] = jmin; 1495 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1496 } 1497 } 1498 } while (sctx.chshift); 1499 ierr = PetscFree(il);CHKERRQ(ierr); 1500 1501 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1502 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1503 1504 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1505 if (perm_identity){ 1506 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1507 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1508 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1509 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1510 } else { 1511 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1512 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1513 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1514 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1515 } 1516 1517 C->assembled = PETSC_TRUE; 1518 C->preallocated = PETSC_TRUE; 1519 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1520 if (sctx.nshift){ 1521 if (shiftnz) { 1522 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1523 } else if (shiftpd) { 1524 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1525 } 1526 } 1527 PetscFunctionReturn(0); 1528 } 1529 1530 #undef __FUNCT__ 1531 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1532 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1533 { 1534 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1535 Mat_SeqSBAIJ *b; 1536 PetscErrorCode ierr; 1537 PetscTruth perm_identity,missing; 1538 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1539 const PetscInt *rip,*riip; 1540 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1541 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1542 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1543 PetscReal fill=info->fill,levels=info->levels; 1544 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1545 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1546 PetscBT lnkbt; 1547 IS iperm; 1548 1549 PetscFunctionBegin; 1550 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1551 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1552 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1553 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1554 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1555 1556 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1557 ui[0] = 0; 1558 1559 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1560 if (!levels && perm_identity) { 1561 1562 for (i=0; i<am; i++) { 1563 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1564 } 1565 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1566 cols = uj; 1567 for (i=0; i<am; i++) { 1568 aj = a->j + a->diag[i]; 1569 ncols = ui[i+1] - ui[i]; 1570 for (j=0; j<ncols; j++) *cols++ = *aj++; 1571 } 1572 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1573 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1574 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1575 1576 /* initialization */ 1577 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1578 1579 /* jl: linked list for storing indices of the pivot rows 1580 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1581 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1582 il = jl + am; 1583 uj_ptr = (PetscInt**)(il + am); 1584 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1585 for (i=0; i<am; i++){ 1586 jl[i] = am; il[i] = 0; 1587 } 1588 1589 /* create and initialize a linked list for storing column indices of the active row k */ 1590 nlnk = am + 1; 1591 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1592 1593 /* initial FreeSpace size is fill*(ai[am]+1) */ 1594 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1595 current_space = free_space; 1596 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1597 current_space_lvl = free_space_lvl; 1598 1599 for (k=0; k<am; k++){ /* for each active row k */ 1600 /* initialize lnk by the column indices of row rip[k] of A */ 1601 nzk = 0; 1602 ncols = ai[rip[k]+1] - ai[rip[k]]; 1603 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1604 ncols_upper = 0; 1605 for (j=0; j<ncols; j++){ 1606 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1607 if (riip[i] >= k){ /* only take upper triangular entry */ 1608 ajtmp[ncols_upper] = i; 1609 ncols_upper++; 1610 } 1611 } 1612 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1613 nzk += nlnk; 1614 1615 /* update lnk by computing fill-in for each pivot row to be merged in */ 1616 prow = jl[k]; /* 1st pivot row */ 1617 1618 while (prow < k){ 1619 nextprow = jl[prow]; 1620 1621 /* merge prow into k-th row */ 1622 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1623 jmax = ui[prow+1]; 1624 ncols = jmax-jmin; 1625 i = jmin - ui[prow]; 1626 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1627 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1628 j = *(uj - 1); 1629 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1630 nzk += nlnk; 1631 1632 /* update il and jl for prow */ 1633 if (jmin < jmax){ 1634 il[prow] = jmin; 1635 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1636 } 1637 prow = nextprow; 1638 } 1639 1640 /* if free space is not available, make more free space */ 1641 if (current_space->local_remaining<nzk) { 1642 i = am - k + 1; /* num of unfactored rows */ 1643 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1644 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1645 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1646 reallocs++; 1647 } 1648 1649 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1650 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1651 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1652 1653 /* add the k-th row into il and jl */ 1654 if (nzk > 1){ 1655 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1656 jl[k] = jl[i]; jl[i] = k; 1657 il[k] = ui[k] + 1; 1658 } 1659 uj_ptr[k] = current_space->array; 1660 uj_lvl_ptr[k] = current_space_lvl->array; 1661 1662 current_space->array += nzk; 1663 current_space->local_used += nzk; 1664 current_space->local_remaining -= nzk; 1665 1666 current_space_lvl->array += nzk; 1667 current_space_lvl->local_used += nzk; 1668 current_space_lvl->local_remaining -= nzk; 1669 1670 ui[k+1] = ui[k] + nzk; 1671 } 1672 1673 #if defined(PETSC_USE_INFO) 1674 if (ai[am] != 0) { 1675 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1676 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1677 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1678 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1679 } else { 1680 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1681 } 1682 #endif 1683 1684 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1685 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1686 ierr = PetscFree(jl);CHKERRQ(ierr); 1687 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1688 1689 /* destroy list of free space and other temporary array(s) */ 1690 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1691 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1692 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1693 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1694 1695 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1696 1697 /* put together the new matrix in MATSEQSBAIJ format */ 1698 1699 b = (Mat_SeqSBAIJ*)(fact)->data; 1700 b->singlemalloc = PETSC_FALSE; 1701 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1702 b->j = uj; 1703 b->i = ui; 1704 b->diag = 0; 1705 b->ilen = 0; 1706 b->imax = 0; 1707 b->row = perm; 1708 b->col = perm; 1709 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1710 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1711 b->icol = iperm; 1712 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1713 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1714 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1715 b->maxnz = b->nz = ui[am]; 1716 b->free_a = PETSC_TRUE; 1717 b->free_ij = PETSC_TRUE; 1718 1719 (fact)->info.factor_mallocs = reallocs; 1720 (fact)->info.fill_ratio_given = fill; 1721 if (ai[am] != 0) { 1722 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1723 } else { 1724 (fact)->info.fill_ratio_needed = 0.0; 1725 } 1726 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1727 PetscFunctionReturn(0); 1728 } 1729 1730 #undef __FUNCT__ 1731 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1732 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1733 { 1734 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1735 Mat_SeqSBAIJ *b; 1736 PetscErrorCode ierr; 1737 PetscTruth perm_identity; 1738 PetscReal fill = info->fill; 1739 const PetscInt *rip,*riip; 1740 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1741 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1742 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1743 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1744 PetscBT lnkbt; 1745 IS iperm; 1746 1747 PetscFunctionBegin; 1748 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1749 /* check whether perm is the identity mapping */ 1750 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1751 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1752 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1753 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1754 1755 /* initialization */ 1756 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1757 ui[0] = 0; 1758 1759 /* jl: linked list for storing indices of the pivot rows 1760 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1761 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1762 il = jl + am; 1763 cols = il + am; 1764 ui_ptr = (PetscInt**)(cols + am); 1765 for (i=0; i<am; i++){ 1766 jl[i] = am; il[i] = 0; 1767 } 1768 1769 /* create and initialize a linked list for storing column indices of the active row k */ 1770 nlnk = am + 1; 1771 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1772 1773 /* initial FreeSpace size is fill*(ai[am]+1) */ 1774 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1775 current_space = free_space; 1776 1777 for (k=0; k<am; k++){ /* for each active row k */ 1778 /* initialize lnk by the column indices of row rip[k] of A */ 1779 nzk = 0; 1780 ncols = ai[rip[k]+1] - ai[rip[k]]; 1781 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1782 ncols_upper = 0; 1783 for (j=0; j<ncols; j++){ 1784 i = riip[*(aj + ai[rip[k]] + j)]; 1785 if (i >= k){ /* only take upper triangular entry */ 1786 cols[ncols_upper] = i; 1787 ncols_upper++; 1788 } 1789 } 1790 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1791 nzk += nlnk; 1792 1793 /* update lnk by computing fill-in for each pivot row to be merged in */ 1794 prow = jl[k]; /* 1st pivot row */ 1795 1796 while (prow < k){ 1797 nextprow = jl[prow]; 1798 /* merge prow into k-th row */ 1799 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1800 jmax = ui[prow+1]; 1801 ncols = jmax-jmin; 1802 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1803 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1804 nzk += nlnk; 1805 1806 /* update il and jl for prow */ 1807 if (jmin < jmax){ 1808 il[prow] = jmin; 1809 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1810 } 1811 prow = nextprow; 1812 } 1813 1814 /* if free space is not available, make more free space */ 1815 if (current_space->local_remaining<nzk) { 1816 i = am - k + 1; /* num of unfactored rows */ 1817 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1818 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1819 reallocs++; 1820 } 1821 1822 /* copy data into free space, then initialize lnk */ 1823 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1824 1825 /* add the k-th row into il and jl */ 1826 if (nzk-1 > 0){ 1827 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1828 jl[k] = jl[i]; jl[i] = k; 1829 il[k] = ui[k] + 1; 1830 } 1831 ui_ptr[k] = current_space->array; 1832 current_space->array += nzk; 1833 current_space->local_used += nzk; 1834 current_space->local_remaining -= nzk; 1835 1836 ui[k+1] = ui[k] + nzk; 1837 } 1838 1839 #if defined(PETSC_USE_INFO) 1840 if (ai[am] != 0) { 1841 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1842 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1843 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1844 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1845 } else { 1846 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1847 } 1848 #endif 1849 1850 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1851 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1852 ierr = PetscFree(jl);CHKERRQ(ierr); 1853 1854 /* destroy list of free space and other temporary array(s) */ 1855 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1856 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1857 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1858 1859 /* put together the new matrix in MATSEQSBAIJ format */ 1860 1861 b = (Mat_SeqSBAIJ*)(fact)->data; 1862 b->singlemalloc = PETSC_FALSE; 1863 b->free_a = PETSC_TRUE; 1864 b->free_ij = PETSC_TRUE; 1865 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1866 b->j = uj; 1867 b->i = ui; 1868 b->diag = 0; 1869 b->ilen = 0; 1870 b->imax = 0; 1871 b->row = perm; 1872 b->col = perm; 1873 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1874 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1875 b->icol = iperm; 1876 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1877 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1878 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1879 b->maxnz = b->nz = ui[am]; 1880 1881 (fact)->info.factor_mallocs = reallocs; 1882 (fact)->info.fill_ratio_given = fill; 1883 if (ai[am] != 0) { 1884 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1885 } else { 1886 (fact)->info.fill_ratio_needed = 0.0; 1887 } 1888 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1889 PetscFunctionReturn(0); 1890 } 1891