1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 const PetscInt *c,*r,*ic; 64 PetscInt i,n = A->rmap->n,*cc,*cr; 65 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 PetscInt *ordcol,*iwk,*iperm,*jw; 67 PetscInt jmax,lfill,job,*o_i,*o_j; 68 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 69 PetscReal af,dt,dtcount,dtcol,fill; 70 71 PetscFunctionBegin; 72 73 if (info->dt == PETSC_DEFAULT) dt = .005; else dt = info->dt; 74 if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); else dtcount = info->dtcount; 75 if (info->dtcol == PETSC_DEFAULT) dtcol = .01; else dtcol = info->dtcol; 76 if (info->fill == PETSC_DEFAULT) fill = ((double)(n*(info->dtcount+1)))/a->nz; else fill = info->fill; 77 lfill = (PetscInt)(dtcount/2.0); 78 jmax = (PetscInt)(fill*a->nz); 79 80 81 /* ------------------------------------------------------------ 82 If reorder=.TRUE., then the original matrix has to be 83 reordered to reflect the user selected ordering scheme, and 84 then de-reordered so it is in it's original format. 85 Because Saad's dperm() is NOT in place, we have to copy 86 the original matrix and allocate more storage. . . 87 ------------------------------------------------------------ 88 */ 89 90 /* set reorder to true if either isrow or iscol is not identity */ 91 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 92 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 93 reorder = PetscNot(reorder); 94 95 96 /* storage for ilu factor */ 97 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 100 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 101 102 /* ------------------------------------------------------------ 103 Make sure that everything is Fortran formatted (1-Based) 104 ------------------------------------------------------------ 105 */ 106 for (i=old_i[0];i<old_i[n];i++) { 107 old_j[i]++; 108 } 109 for(i=0;i<n+1;i++) { 110 old_i[i]++; 111 }; 112 113 114 if (reorder) { 115 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 116 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 117 ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 cr[i] = r[i]+1; 120 cc[i] = c[i]+1; 121 } 122 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 123 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 124 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job); 128 ierr = PetscFree2(cc,cr);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 222 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = MAT_FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 b->free_a = PETSC_TRUE; 230 b->free_ij = PETSC_TRUE; 231 b->singlemalloc = PETSC_FALSE; 232 b->a = new_a; 233 b->j = new_j; 234 b->i = new_i; 235 b->ilen = 0; 236 b->imax = 0; 237 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238 b->row = isirow; 239 b->col = iscolf; 240 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 241 b->maxnz = b->nz = new_i[n]; 242 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 243 (*fact)->info.factor_mallocs = 0; 244 245 af = ((double)b->nz)/((double)a->nz) + .001; 246 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr); 247 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 249 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 250 251 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 252 253 PetscFunctionReturn(0); 254 #endif 255 } 256 257 EXTERN_C_BEGIN 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 260 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 261 { 262 PetscFunctionBegin; 263 *flg = PETSC_TRUE; 264 PetscFunctionReturn(0); 265 } 266 EXTERN_C_END 267 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 271 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 272 { 273 PetscInt n = A->rmap->n; 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 278 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 279 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 280 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 281 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 282 (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ; 283 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 284 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 285 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 286 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 287 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 288 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 289 (*B)->factor = ftype; 290 PetscFunctionReturn(0); 291 } 292 EXTERN_C_END 293 294 #undef __FUNCT__ 295 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 296 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 297 { 298 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 299 IS isicol; 300 PetscErrorCode ierr; 301 const PetscInt *r,*ic; 302 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 303 PetscInt *bi,*bj,*ajtmp; 304 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 305 PetscReal f; 306 PetscInt nlnk,*lnk,k,**bi_ptr; 307 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 308 PetscBT lnkbt; 309 310 PetscFunctionBegin; 311 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 312 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 313 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 314 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 315 316 /* get new row pointers */ 317 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 318 bi[0] = 0; 319 320 /* bdiag is location of diagonal in factor */ 321 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 322 bdiag[0] = 0; 323 324 /* linked list for storing column indices of the active row */ 325 nlnk = n + 1; 326 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 327 328 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 329 330 /* initial FreeSpace size is f*(ai[n]+1) */ 331 f = info->fill; 332 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 333 current_space = free_space; 334 335 for (i=0; i<n; i++) { 336 /* copy previous fill into linked list */ 337 nzi = 0; 338 nnz = ai[r[i]+1] - ai[r[i]]; 339 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 340 ajtmp = aj + ai[r[i]]; 341 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 342 nzi += nlnk; 343 344 /* add pivot rows into linked list */ 345 row = lnk[n]; 346 while (row < i) { 347 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 348 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 349 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 350 nzi += nlnk; 351 row = lnk[row]; 352 } 353 bi[i+1] = bi[i] + nzi; 354 im[i] = nzi; 355 356 /* mark bdiag */ 357 nzbd = 0; 358 nnz = nzi; 359 k = lnk[n]; 360 while (nnz-- && k < i){ 361 nzbd++; 362 k = lnk[k]; 363 } 364 bdiag[i] = bi[i] + nzbd; 365 366 /* if free space is not available, make more free space */ 367 if (current_space->local_remaining<nzi) { 368 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 369 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 370 reallocs++; 371 } 372 373 /* copy data into free space, then initialize lnk */ 374 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 375 bi_ptr[i] = current_space->array; 376 current_space->array += nzi; 377 current_space->local_used += nzi; 378 current_space->local_remaining -= nzi; 379 } 380 #if defined(PETSC_USE_INFO) 381 if (ai[n] != 0) { 382 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 383 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 384 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 385 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 386 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 387 } else { 388 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 389 } 390 #endif 391 392 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 393 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 394 395 /* destroy list of free space and other temporary array(s) */ 396 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 397 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 398 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 399 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 400 401 /* put together the new matrix */ 402 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 403 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 404 b = (Mat_SeqAIJ*)(B)->data; 405 b->free_a = PETSC_TRUE; 406 b->free_ij = PETSC_TRUE; 407 b->singlemalloc = PETSC_FALSE; 408 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 409 b->j = bj; 410 b->i = bi; 411 b->diag = bdiag; 412 b->ilen = 0; 413 b->imax = 0; 414 b->row = isrow; 415 b->col = iscol; 416 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 417 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 418 b->icol = isicol; 419 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 420 421 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 422 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 423 b->maxnz = b->nz = bi[n] ; 424 425 (B)->factor = MAT_FACTOR_LU; 426 (B)->info.factor_mallocs = reallocs; 427 (B)->info.fill_ratio_given = f; 428 429 if (ai[n] != 0) { 430 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 431 } else { 432 (B)->info.fill_ratio_needed = 0.0; 433 } 434 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 435 (B)->ops->solve = MatSolve_SeqAIJ; 436 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 437 /* switch to inodes if appropriate */ 438 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 /* 443 Trouble in factorization, should we dump the original matrix? 444 */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatFactorDumpMatrix" 447 PetscErrorCode MatFactorDumpMatrix(Mat A) 448 { 449 PetscErrorCode ierr; 450 PetscTruth flg; 451 452 PetscFunctionBegin; 453 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 454 if (flg) { 455 PetscViewer viewer; 456 char filename[PETSC_MAX_PATH_LEN]; 457 458 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 459 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 460 ierr = MatView(A,viewer);CHKERRQ(ierr); 461 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 462 } 463 PetscFunctionReturn(0); 464 } 465 466 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 467 468 /* ----------------------------------------------------------- */ 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 471 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 472 { 473 Mat C=B; 474 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 475 IS isrow = b->row,isicol = b->icol; 476 PetscErrorCode ierr; 477 const PetscInt *r,*ic,*ics; 478 PetscInt i,j,n=A->rmap->n,*bi=b->i,*bj=b->j; 479 PetscInt *ajtmp,*bjtmp,nz,row; 480 PetscInt *diag_offset = b->diag,diag,*pj; 481 PetscScalar *rtmp,*pc,multiplier,*rtmps; 482 MatScalar *v,*pv; 483 PetscScalar d; 484 PetscReal rs; 485 LUShift_Ctx sctx; 486 PetscInt newshift,*ddiag; 487 488 PetscFunctionBegin; 489 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 490 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 491 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 492 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 493 rtmps = rtmp; ics = ic; 494 495 sctx.shift_top = 0; 496 sctx.nshift_max = 0; 497 sctx.shift_lo = 0; 498 sctx.shift_hi = 0; 499 sctx.shift_fraction = 0; 500 501 /* if both shift schemes are chosen by user, only use info->shiftpd */ 502 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 503 PetscInt *aai = a->i; 504 ddiag = a->diag; 505 sctx.shift_top = 0; 506 for (i=0; i<n; i++) { 507 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 508 d = (a->a)[ddiag[i]]; 509 rs = -PetscAbsScalar(d) - PetscRealPart(d); 510 v = a->a+aai[i]; 511 nz = aai[i+1] - aai[i]; 512 for (j=0; j<nz; j++) 513 rs += PetscAbsScalar(v[j]); 514 if (rs>sctx.shift_top) sctx.shift_top = rs; 515 } 516 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 517 sctx.shift_top *= 1.1; 518 sctx.nshift_max = 5; 519 sctx.shift_lo = 0.; 520 sctx.shift_hi = 1.; 521 } 522 523 sctx.shift_amount = 0; 524 sctx.nshift = 0; 525 do { 526 sctx.lushift = PETSC_FALSE; 527 for (i=0; i<n; i++){ 528 nz = bi[i+1] - bi[i]; 529 bjtmp = bj + bi[i]; 530 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 531 532 /* load in initial (unfactored row) */ 533 nz = a->i[r[i]+1] - a->i[r[i]]; 534 ajtmp = a->j + a->i[r[i]]; 535 v = a->a + a->i[r[i]]; 536 for (j=0; j<nz; j++) { 537 rtmp[ics[ajtmp[j]]] = v[j]; 538 } 539 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 540 541 row = *bjtmp++; 542 while (row < i) { 543 pc = rtmp + row; 544 if (*pc != 0.0) { 545 pv = b->a + diag_offset[row]; 546 pj = b->j + diag_offset[row] + 1; 547 multiplier = *pc / *pv++; 548 *pc = multiplier; 549 nz = bi[row+1] - diag_offset[row] - 1; 550 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 551 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 552 } 553 row = *bjtmp++; 554 } 555 /* finished row so stick it into b->a */ 556 pv = b->a + bi[i] ; 557 pj = b->j + bi[i] ; 558 nz = bi[i+1] - bi[i]; 559 diag = diag_offset[i] - bi[i]; 560 rs = 0.0; 561 for (j=0; j<nz; j++) { 562 pv[j] = rtmps[pj[j]]; 563 if (j != diag) rs += PetscAbsScalar(pv[j]); 564 } 565 566 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 567 sctx.rs = rs; 568 sctx.pv = pv[diag]; 569 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 570 if (newshift == 1) break; 571 } 572 573 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 574 /* 575 * if no shift in this attempt & shifting & started shifting & can refine, 576 * then try lower shift 577 */ 578 sctx.shift_hi = sctx.shift_fraction; 579 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 580 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 581 sctx.lushift = PETSC_TRUE; 582 sctx.nshift++; 583 } 584 } while (sctx.lushift); 585 586 /* invert diagonal entries for simplier triangular solves */ 587 for (i=0; i<n; i++) { 588 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 589 } 590 ierr = PetscFree(rtmp);CHKERRQ(ierr); 591 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 592 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 593 if (b->inode.use) { 594 C->ops->solve = MatSolve_Inode; 595 } else { 596 PetscTruth row_identity, col_identity; 597 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 598 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 599 if (row_identity && col_identity) { 600 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 601 } else { 602 C->ops->solve = MatSolve_SeqAIJ; 603 } 604 } 605 C->ops->solveadd = MatSolveAdd_SeqAIJ; 606 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 607 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 608 C->ops->matsolve = MatMatSolve_SeqAIJ; 609 C->assembled = PETSC_TRUE; 610 C->preallocated = PETSC_TRUE; 611 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 612 if (sctx.nshift){ 613 if (info->shiftpd) { 614 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 615 } else if (info->shiftnz) { 616 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 617 } 618 } 619 PetscFunctionReturn(0); 620 } 621 622 /* 623 This routine implements inplace ILU(0) with row or/and column permutations. 624 Input: 625 A - original matrix 626 Output; 627 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 628 a->j (col index) is permuted by the inverse of colperm, then sorted 629 a->a reordered accordingly with a->j 630 a->diag (ptr to diagonal elements) is updated. 631 */ 632 #undef __FUNCT__ 633 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 634 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 635 { 636 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 637 IS isrow = a->row,isicol = a->icol; 638 PetscErrorCode ierr; 639 const PetscInt *r,*ic,*ics; 640 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 641 PetscInt *ajtmp,nz,row; 642 PetscInt *diag = a->diag,nbdiag,*pj; 643 PetscScalar *rtmp,*pc,multiplier,d; 644 MatScalar *v,*pv; 645 PetscReal rs; 646 LUShift_Ctx sctx; 647 PetscInt newshift; 648 649 PetscFunctionBegin; 650 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 651 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 652 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 653 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 654 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 655 ics = ic; 656 657 sctx.shift_top = 0; 658 sctx.nshift_max = 0; 659 sctx.shift_lo = 0; 660 sctx.shift_hi = 0; 661 sctx.shift_fraction = 0; 662 663 /* if both shift schemes are chosen by user, only use info->shiftpd */ 664 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 665 sctx.shift_top = 0; 666 for (i=0; i<n; i++) { 667 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 668 d = (a->a)[diag[i]]; 669 rs = -PetscAbsScalar(d) - PetscRealPart(d); 670 v = a->a+ai[i]; 671 nz = ai[i+1] - ai[i]; 672 for (j=0; j<nz; j++) 673 rs += PetscAbsScalar(v[j]); 674 if (rs>sctx.shift_top) sctx.shift_top = rs; 675 } 676 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 677 sctx.shift_top *= 1.1; 678 sctx.nshift_max = 5; 679 sctx.shift_lo = 0.; 680 sctx.shift_hi = 1.; 681 } 682 683 sctx.shift_amount = 0; 684 sctx.nshift = 0; 685 do { 686 sctx.lushift = PETSC_FALSE; 687 for (i=0; i<n; i++){ 688 /* load in initial unfactored row */ 689 nz = ai[r[i]+1] - ai[r[i]]; 690 ajtmp = aj + ai[r[i]]; 691 v = a->a + ai[r[i]]; 692 /* sort permuted ajtmp and values v accordingly */ 693 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 694 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 695 696 diag[r[i]] = ai[r[i]]; 697 for (j=0; j<nz; j++) { 698 rtmp[ajtmp[j]] = v[j]; 699 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 700 } 701 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 702 703 row = *ajtmp++; 704 while (row < i) { 705 pc = rtmp + row; 706 if (*pc != 0.0) { 707 pv = a->a + diag[r[row]]; 708 pj = aj + diag[r[row]] + 1; 709 710 multiplier = *pc / *pv++; 711 *pc = multiplier; 712 nz = ai[r[row]+1] - diag[r[row]] - 1; 713 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 714 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 715 } 716 row = *ajtmp++; 717 } 718 /* finished row so overwrite it onto a->a */ 719 pv = a->a + ai[r[i]] ; 720 pj = aj + ai[r[i]] ; 721 nz = ai[r[i]+1] - ai[r[i]]; 722 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 723 724 rs = 0.0; 725 for (j=0; j<nz; j++) { 726 pv[j] = rtmp[pj[j]]; 727 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 728 } 729 730 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 731 sctx.rs = rs; 732 sctx.pv = pv[nbdiag]; 733 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 734 if (newshift == 1) break; 735 } 736 737 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 738 /* 739 * if no shift in this attempt & shifting & started shifting & can refine, 740 * then try lower shift 741 */ 742 sctx.shift_hi = sctx.shift_fraction; 743 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 744 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 745 sctx.lushift = PETSC_TRUE; 746 sctx.nshift++; 747 } 748 } while (sctx.lushift); 749 750 /* invert diagonal entries for simplier triangular solves */ 751 for (i=0; i<n; i++) { 752 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 753 } 754 755 ierr = PetscFree(rtmp);CHKERRQ(ierr); 756 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 757 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 758 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 759 A->ops->solveadd = MatSolveAdd_SeqAIJ; 760 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 761 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 762 A->assembled = PETSC_TRUE; 763 A->preallocated = PETSC_TRUE; 764 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 765 if (sctx.nshift){ 766 if (info->shiftpd) { 767 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 768 } else if (info->shiftnz) { 769 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 770 } 771 } 772 PetscFunctionReturn(0); 773 } 774 775 /* ----------------------------------------------------------- */ 776 #undef __FUNCT__ 777 #define __FUNCT__ "MatLUFactor_SeqAIJ" 778 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 779 { 780 PetscErrorCode ierr; 781 Mat C; 782 783 PetscFunctionBegin; 784 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 785 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 786 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 787 A->ops->solve = C->ops->solve; 788 A->ops->solvetranspose = C->ops->solvetranspose; 789 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 790 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 791 PetscFunctionReturn(0); 792 } 793 /* ----------------------------------------------------------- */ 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatSolve_SeqAIJ" 796 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 797 { 798 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 799 IS iscol = a->col,isrow = a->row; 800 PetscErrorCode ierr; 801 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 802 PetscInt nz; 803 const PetscInt *rout,*cout,*r,*c; 804 PetscScalar *x,*tmp,*tmps,sum; 805 const PetscScalar *b; 806 const MatScalar *aa = a->a,*v; 807 808 PetscFunctionBegin; 809 if (!n) PetscFunctionReturn(0); 810 811 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 812 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 813 tmp = a->solve_work; 814 815 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 816 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 817 818 /* forward solve the lower triangular */ 819 tmp[0] = b[*r++]; 820 tmps = tmp; 821 for (i=1; i<n; i++) { 822 v = aa + ai[i] ; 823 vi = aj + ai[i] ; 824 nz = a->diag[i] - ai[i]; 825 sum = b[*r++]; 826 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 827 tmp[i] = sum; 828 } 829 830 /* backward solve the upper triangular */ 831 for (i=n-1; i>=0; i--){ 832 v = aa + a->diag[i] + 1; 833 vi = aj + a->diag[i] + 1; 834 nz = ai[i+1] - a->diag[i] - 1; 835 sum = tmp[i]; 836 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 837 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 838 } 839 840 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 841 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 842 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 843 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 844 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatMatSolve_SeqAIJ" 850 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 851 { 852 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 853 IS iscol = a->col,isrow = a->row; 854 PetscErrorCode ierr; 855 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 856 PetscInt nz,neq; 857 const PetscInt *rout,*cout,*r,*c; 858 PetscScalar *x,*b,*tmp,*tmps,sum; 859 const MatScalar *aa = a->a,*v; 860 PetscTruth bisdense,xisdense; 861 862 PetscFunctionBegin; 863 if (!n) PetscFunctionReturn(0); 864 865 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 866 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 867 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 868 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 869 870 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 871 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 872 873 tmp = a->solve_work; 874 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 875 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 876 877 for (neq=0; neq<B->cmap->n; neq++){ 878 /* forward solve the lower triangular */ 879 tmp[0] = b[r[0]]; 880 tmps = tmp; 881 for (i=1; i<n; i++) { 882 v = aa + ai[i] ; 883 vi = aj + ai[i] ; 884 nz = a->diag[i] - ai[i]; 885 sum = b[r[i]]; 886 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 887 tmp[i] = sum; 888 } 889 /* backward solve the upper triangular */ 890 for (i=n-1; i>=0; i--){ 891 v = aa + a->diag[i] + 1; 892 vi = aj + a->diag[i] + 1; 893 nz = ai[i+1] - a->diag[i] - 1; 894 sum = tmp[i]; 895 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 896 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 897 } 898 899 b += n; 900 x += n; 901 } 902 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 903 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 904 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 905 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 906 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 #undef __FUNCT__ 911 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 912 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 913 { 914 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 915 IS iscol = a->col,isrow = a->row; 916 PetscErrorCode ierr; 917 const PetscInt *r,*c,*rout,*cout; 918 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 919 PetscInt nz,row; 920 PetscScalar *x,*b,*tmp,*tmps,sum; 921 const MatScalar *aa = a->a,*v; 922 923 PetscFunctionBegin; 924 if (!n) PetscFunctionReturn(0); 925 926 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 927 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 928 tmp = a->solve_work; 929 930 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 931 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 932 933 /* forward solve the lower triangular */ 934 tmp[0] = b[*r++]; 935 tmps = tmp; 936 for (row=1; row<n; row++) { 937 i = rout[row]; /* permuted row */ 938 v = aa + ai[i] ; 939 vi = aj + ai[i] ; 940 nz = a->diag[i] - ai[i]; 941 sum = b[*r++]; 942 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 943 tmp[row] = sum; 944 } 945 946 /* backward solve the upper triangular */ 947 for (row=n-1; row>=0; row--){ 948 i = rout[row]; /* permuted row */ 949 v = aa + a->diag[i] + 1; 950 vi = aj + a->diag[i] + 1; 951 nz = ai[i+1] - a->diag[i] - 1; 952 sum = tmp[row]; 953 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 954 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 955 } 956 957 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 958 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 959 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 960 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 961 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 965 /* ----------------------------------------------------------- */ 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 968 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 969 { 970 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 971 PetscErrorCode ierr; 972 PetscInt n = A->rmap->n; 973 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 974 PetscScalar *x; 975 const PetscScalar *b; 976 const MatScalar *aa = a->a; 977 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 978 PetscInt adiag_i,i,nz,ai_i; 979 const MatScalar *v; 980 PetscScalar sum; 981 #endif 982 983 PetscFunctionBegin; 984 if (!n) PetscFunctionReturn(0); 985 986 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 987 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 988 989 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 990 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 991 #else 992 /* forward solve the lower triangular */ 993 x[0] = b[0]; 994 for (i=1; i<n; i++) { 995 ai_i = ai[i]; 996 v = aa + ai_i; 997 vi = aj + ai_i; 998 nz = adiag[i] - ai_i; 999 sum = b[i]; 1000 while (nz--) sum -= *v++ * x[*vi++]; 1001 x[i] = sum; 1002 } 1003 1004 /* backward solve the upper triangular */ 1005 for (i=n-1; i>=0; i--){ 1006 adiag_i = adiag[i]; 1007 v = aa + adiag_i + 1; 1008 vi = aj + adiag_i + 1; 1009 nz = ai[i+1] - adiag_i - 1; 1010 sum = x[i]; 1011 while (nz--) sum -= *v++ * x[*vi++]; 1012 x[i] = sum*aa[adiag_i]; 1013 } 1014 #endif 1015 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 1016 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1017 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1023 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1024 { 1025 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1026 IS iscol = a->col,isrow = a->row; 1027 PetscErrorCode ierr; 1028 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1029 PetscInt nz; 1030 const PetscInt *rout,*cout,*r,*c; 1031 PetscScalar *x,*b,*tmp,sum; 1032 const MatScalar *aa = a->a,*v; 1033 1034 PetscFunctionBegin; 1035 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1036 1037 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1038 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1039 tmp = a->solve_work; 1040 1041 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1042 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1043 1044 /* forward solve the lower triangular */ 1045 tmp[0] = b[*r++]; 1046 for (i=1; i<n; i++) { 1047 v = aa + ai[i] ; 1048 vi = aj + ai[i] ; 1049 nz = a->diag[i] - ai[i]; 1050 sum = b[*r++]; 1051 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1052 tmp[i] = sum; 1053 } 1054 1055 /* backward solve the upper triangular */ 1056 for (i=n-1; i>=0; i--){ 1057 v = aa + a->diag[i] + 1; 1058 vi = aj + a->diag[i] + 1; 1059 nz = ai[i+1] - a->diag[i] - 1; 1060 sum = tmp[i]; 1061 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1062 tmp[i] = sum*aa[a->diag[i]]; 1063 x[*c--] += tmp[i]; 1064 } 1065 1066 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1067 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1068 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1069 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1070 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1071 1072 PetscFunctionReturn(0); 1073 } 1074 /* -------------------------------------------------------------------*/ 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1077 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1078 { 1079 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1080 IS iscol = a->col,isrow = a->row; 1081 PetscErrorCode ierr; 1082 const PetscInt *rout,*cout,*r,*c; 1083 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1084 PetscInt nz,*diag = a->diag; 1085 PetscScalar *x,*b,*tmp,s1; 1086 const MatScalar *aa = a->a,*v; 1087 1088 PetscFunctionBegin; 1089 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1090 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1091 tmp = a->solve_work; 1092 1093 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1094 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1095 1096 /* copy the b into temp work space according to permutation */ 1097 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1098 1099 /* forward solve the U^T */ 1100 for (i=0; i<n; i++) { 1101 v = aa + diag[i] ; 1102 vi = aj + diag[i] + 1; 1103 nz = ai[i+1] - diag[i] - 1; 1104 s1 = tmp[i]; 1105 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1106 while (nz--) { 1107 tmp[*vi++ ] -= (*v++)*s1; 1108 } 1109 tmp[i] = s1; 1110 } 1111 1112 /* backward solve the L^T */ 1113 for (i=n-1; i>=0; i--){ 1114 v = aa + diag[i] - 1 ; 1115 vi = aj + diag[i] - 1 ; 1116 nz = diag[i] - ai[i]; 1117 s1 = tmp[i]; 1118 while (nz--) { 1119 tmp[*vi-- ] -= (*v--)*s1; 1120 } 1121 } 1122 1123 /* copy tmp into x according to permutation */ 1124 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1125 1126 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1127 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1128 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1129 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1130 1131 ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 #undef __FUNCT__ 1136 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1137 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1138 { 1139 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1140 IS iscol = a->col,isrow = a->row; 1141 PetscErrorCode ierr; 1142 const PetscInt *r,*c,*rout,*cout; 1143 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1144 PetscInt nz,*diag = a->diag; 1145 PetscScalar *x,*b,*tmp; 1146 const MatScalar *aa = a->a,*v; 1147 1148 PetscFunctionBegin; 1149 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1150 1151 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1152 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1153 tmp = a->solve_work; 1154 1155 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1156 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1157 1158 /* copy the b into temp work space according to permutation */ 1159 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1160 1161 /* forward solve the U^T */ 1162 for (i=0; i<n; i++) { 1163 v = aa + diag[i] ; 1164 vi = aj + diag[i] + 1; 1165 nz = ai[i+1] - diag[i] - 1; 1166 tmp[i] *= *v++; 1167 while (nz--) { 1168 tmp[*vi++ ] -= (*v++)*tmp[i]; 1169 } 1170 } 1171 1172 /* backward solve the L^T */ 1173 for (i=n-1; i>=0; i--){ 1174 v = aa + diag[i] - 1 ; 1175 vi = aj + diag[i] - 1 ; 1176 nz = diag[i] - ai[i]; 1177 while (nz--) { 1178 tmp[*vi-- ] -= (*v--)*tmp[i]; 1179 } 1180 } 1181 1182 /* copy tmp into x according to permutation */ 1183 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1184 1185 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1186 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1187 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1188 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1189 1190 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1191 PetscFunctionReturn(0); 1192 } 1193 /* ----------------------------------------------------------------*/ 1194 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1195 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption); 1196 1197 #undef __FUNCT__ 1198 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1199 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1200 { 1201 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1202 IS isicol; 1203 PetscErrorCode ierr; 1204 const PetscInt *r,*ic; 1205 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1206 PetscInt *bi,*cols,nnz,*cols_lvl; 1207 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1208 PetscInt i,levels,diagonal_fill; 1209 PetscTruth col_identity,row_identity; 1210 PetscReal f; 1211 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1212 PetscBT lnkbt; 1213 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1214 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1215 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1216 PetscTruth missing; 1217 1218 PetscFunctionBegin; 1219 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1220 f = info->fill; 1221 levels = (PetscInt)info->levels; 1222 diagonal_fill = (PetscInt)info->diagonal_fill; 1223 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1224 1225 /* special case that simply copies fill pattern */ 1226 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1227 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1228 if (!levels && row_identity && col_identity) { 1229 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr); 1230 fact->factor = MAT_FACTOR_ILU; 1231 (fact)->info.factor_mallocs = 0; 1232 (fact)->info.fill_ratio_given = info->fill; 1233 (fact)->info.fill_ratio_needed = 1.0; 1234 b = (Mat_SeqAIJ*)(fact)->data; 1235 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1236 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1237 b->row = isrow; 1238 b->col = iscol; 1239 b->icol = isicol; 1240 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1241 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1242 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1243 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1244 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1245 PetscFunctionReturn(0); 1246 } 1247 1248 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1249 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1250 1251 /* get new row pointers */ 1252 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1253 bi[0] = 0; 1254 /* bdiag is location of diagonal in factor */ 1255 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1256 bdiag[0] = 0; 1257 1258 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1259 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1260 1261 /* create a linked list for storing column indices of the active row */ 1262 nlnk = n + 1; 1263 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1264 1265 /* initial FreeSpace size is f*(ai[n]+1) */ 1266 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1267 current_space = free_space; 1268 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1269 current_space_lvl = free_space_lvl; 1270 1271 for (i=0; i<n; i++) { 1272 nzi = 0; 1273 /* copy current row into linked list */ 1274 nnz = ai[r[i]+1] - ai[r[i]]; 1275 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1276 cols = aj + ai[r[i]]; 1277 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1278 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1279 nzi += nlnk; 1280 1281 /* make sure diagonal entry is included */ 1282 if (diagonal_fill && lnk[i] == -1) { 1283 fm = n; 1284 while (lnk[fm] < i) fm = lnk[fm]; 1285 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1286 lnk[fm] = i; 1287 lnk_lvl[i] = 0; 1288 nzi++; dcount++; 1289 } 1290 1291 /* add pivot rows into the active row */ 1292 nzbd = 0; 1293 prow = lnk[n]; 1294 while (prow < i) { 1295 nnz = bdiag[prow]; 1296 cols = bj_ptr[prow] + nnz + 1; 1297 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1298 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1299 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1300 nzi += nlnk; 1301 prow = lnk[prow]; 1302 nzbd++; 1303 } 1304 bdiag[i] = nzbd; 1305 bi[i+1] = bi[i] + nzi; 1306 1307 /* if free space is not available, make more free space */ 1308 if (current_space->local_remaining<nzi) { 1309 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1310 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1311 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1312 reallocs++; 1313 } 1314 1315 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1316 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1317 bj_ptr[i] = current_space->array; 1318 bjlvl_ptr[i] = current_space_lvl->array; 1319 1320 /* make sure the active row i has diagonal entry */ 1321 if (*(bj_ptr[i]+bdiag[i]) != i) { 1322 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1323 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1324 } 1325 1326 current_space->array += nzi; 1327 current_space->local_used += nzi; 1328 current_space->local_remaining -= nzi; 1329 current_space_lvl->array += nzi; 1330 current_space_lvl->local_used += nzi; 1331 current_space_lvl->local_remaining -= nzi; 1332 } 1333 1334 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1335 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1336 1337 /* destroy list of free space and other temporary arrays */ 1338 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1339 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1340 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1341 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1342 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1343 1344 #if defined(PETSC_USE_INFO) 1345 { 1346 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1347 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1348 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1349 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1350 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1351 if (diagonal_fill) { 1352 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1353 } 1354 } 1355 #endif 1356 1357 /* put together the new matrix */ 1358 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1359 b = (Mat_SeqAIJ*)(fact)->data; 1360 b->free_a = PETSC_TRUE; 1361 b->free_ij = PETSC_TRUE; 1362 b->singlemalloc = PETSC_FALSE; 1363 len = (bi[n] )*sizeof(PetscScalar); 1364 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1365 b->j = bj; 1366 b->i = bi; 1367 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1368 b->diag = bdiag; 1369 b->ilen = 0; 1370 b->imax = 0; 1371 b->row = isrow; 1372 b->col = iscol; 1373 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1374 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1375 b->icol = isicol; 1376 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1377 /* In b structure: Free imax, ilen, old a, old j. 1378 Allocate bdiag, solve_work, new a, new j */ 1379 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1380 b->maxnz = b->nz = bi[n] ; 1381 (fact)->info.factor_mallocs = reallocs; 1382 (fact)->info.fill_ratio_given = f; 1383 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1384 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1385 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1386 PetscFunctionReturn(0); 1387 } 1388 1389 #include "src/mat/impls/sbaij/seq/sbaij.h" 1390 #undef __FUNCT__ 1391 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1392 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 1393 { 1394 Mat C = B; 1395 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1396 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1397 IS ip=b->row,iip = b->icol; 1398 PetscErrorCode ierr; 1399 const PetscInt *rip,*riip; 1400 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1401 PetscInt *ai=a->i,*aj=a->j; 1402 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1403 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1404 PetscReal zeropivot,rs,shiftnz; 1405 PetscReal shiftpd; 1406 ChShift_Ctx sctx; 1407 PetscInt newshift; 1408 PetscTruth perm_identity; 1409 1410 PetscFunctionBegin; 1411 1412 shiftnz = info->shiftnz; 1413 shiftpd = info->shiftpd; 1414 zeropivot = info->zeropivot; 1415 1416 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1417 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1418 1419 /* initialization */ 1420 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1421 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1422 jl = il + mbs; 1423 rtmp = (MatScalar*)(jl + mbs); 1424 1425 sctx.shift_amount = 0; 1426 sctx.nshift = 0; 1427 do { 1428 sctx.chshift = PETSC_FALSE; 1429 for (i=0; i<mbs; i++) { 1430 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1431 } 1432 1433 for (k = 0; k<mbs; k++){ 1434 bval = ba + bi[k]; 1435 /* initialize k-th row by the perm[k]-th row of A */ 1436 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1437 for (j = jmin; j < jmax; j++){ 1438 col = riip[aj[j]]; 1439 if (col >= k){ /* only take upper triangular entry */ 1440 rtmp[col] = aa[j]; 1441 *bval++ = 0.0; /* for in-place factorization */ 1442 } 1443 } 1444 /* shift the diagonal of the matrix */ 1445 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1446 1447 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1448 dk = rtmp[k]; 1449 i = jl[k]; /* first row to be added to k_th row */ 1450 1451 while (i < k){ 1452 nexti = jl[i]; /* next row to be added to k_th row */ 1453 1454 /* compute multiplier, update diag(k) and U(i,k) */ 1455 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1456 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1457 dk += uikdi*ba[ili]; 1458 ba[ili] = uikdi; /* -U(i,k) */ 1459 1460 /* add multiple of row i to k-th row */ 1461 jmin = ili + 1; jmax = bi[i+1]; 1462 if (jmin < jmax){ 1463 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1464 /* update il and jl for row i */ 1465 il[i] = jmin; 1466 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1467 } 1468 i = nexti; 1469 } 1470 1471 /* shift the diagonals when zero pivot is detected */ 1472 /* compute rs=sum of abs(off-diagonal) */ 1473 rs = 0.0; 1474 jmin = bi[k]+1; 1475 nz = bi[k+1] - jmin; 1476 bcol = bj + jmin; 1477 while (nz--){ 1478 rs += PetscAbsScalar(rtmp[*bcol]); 1479 bcol++; 1480 } 1481 1482 sctx.rs = rs; 1483 sctx.pv = dk; 1484 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1485 1486 if (newshift == 1) { 1487 if (!sctx.shift_amount) { 1488 sctx.shift_amount = 1e-5; 1489 } 1490 break; 1491 } 1492 1493 /* copy data into U(k,:) */ 1494 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1495 jmin = bi[k]+1; jmax = bi[k+1]; 1496 if (jmin < jmax) { 1497 for (j=jmin; j<jmax; j++){ 1498 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1499 } 1500 /* add the k-th row into il and jl */ 1501 il[k] = jmin; 1502 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1503 } 1504 } 1505 } while (sctx.chshift); 1506 ierr = PetscFree(il);CHKERRQ(ierr); 1507 1508 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1509 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1510 1511 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1512 if (perm_identity){ 1513 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1514 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1515 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1516 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1517 } else { 1518 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1519 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1520 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1521 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1522 } 1523 1524 C->assembled = PETSC_TRUE; 1525 C->preallocated = PETSC_TRUE; 1526 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1527 if (sctx.nshift){ 1528 if (shiftnz) { 1529 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1530 } else if (shiftpd) { 1531 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1532 } 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 1537 #undef __FUNCT__ 1538 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1539 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1540 { 1541 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1542 Mat_SeqSBAIJ *b; 1543 PetscErrorCode ierr; 1544 PetscTruth perm_identity,missing; 1545 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1546 const PetscInt *rip,*riip; 1547 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1548 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1549 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1550 PetscReal fill=info->fill,levels=info->levels; 1551 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1552 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1553 PetscBT lnkbt; 1554 IS iperm; 1555 1556 PetscFunctionBegin; 1557 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1558 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1559 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1560 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1561 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1562 1563 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1564 ui[0] = 0; 1565 1566 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1567 if (!levels && perm_identity) { 1568 1569 for (i=0; i<am; i++) { 1570 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1571 } 1572 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1573 cols = uj; 1574 for (i=0; i<am; i++) { 1575 aj = a->j + a->diag[i]; 1576 ncols = ui[i+1] - ui[i]; 1577 for (j=0; j<ncols; j++) *cols++ = *aj++; 1578 } 1579 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1580 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1581 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1582 1583 /* initialization */ 1584 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1585 1586 /* jl: linked list for storing indices of the pivot rows 1587 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1588 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1589 il = jl + am; 1590 uj_ptr = (PetscInt**)(il + am); 1591 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1592 for (i=0; i<am; i++){ 1593 jl[i] = am; il[i] = 0; 1594 } 1595 1596 /* create and initialize a linked list for storing column indices of the active row k */ 1597 nlnk = am + 1; 1598 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1599 1600 /* initial FreeSpace size is fill*(ai[am]+1) */ 1601 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1602 current_space = free_space; 1603 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1604 current_space_lvl = free_space_lvl; 1605 1606 for (k=0; k<am; k++){ /* for each active row k */ 1607 /* initialize lnk by the column indices of row rip[k] of A */ 1608 nzk = 0; 1609 ncols = ai[rip[k]+1] - ai[rip[k]]; 1610 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1611 ncols_upper = 0; 1612 for (j=0; j<ncols; j++){ 1613 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1614 if (riip[i] >= k){ /* only take upper triangular entry */ 1615 ajtmp[ncols_upper] = i; 1616 ncols_upper++; 1617 } 1618 } 1619 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1620 nzk += nlnk; 1621 1622 /* update lnk by computing fill-in for each pivot row to be merged in */ 1623 prow = jl[k]; /* 1st pivot row */ 1624 1625 while (prow < k){ 1626 nextprow = jl[prow]; 1627 1628 /* merge prow into k-th row */ 1629 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1630 jmax = ui[prow+1]; 1631 ncols = jmax-jmin; 1632 i = jmin - ui[prow]; 1633 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1634 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1635 j = *(uj - 1); 1636 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1637 nzk += nlnk; 1638 1639 /* update il and jl for prow */ 1640 if (jmin < jmax){ 1641 il[prow] = jmin; 1642 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1643 } 1644 prow = nextprow; 1645 } 1646 1647 /* if free space is not available, make more free space */ 1648 if (current_space->local_remaining<nzk) { 1649 i = am - k + 1; /* num of unfactored rows */ 1650 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1651 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1652 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1653 reallocs++; 1654 } 1655 1656 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1657 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1658 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1659 1660 /* add the k-th row into il and jl */ 1661 if (nzk > 1){ 1662 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1663 jl[k] = jl[i]; jl[i] = k; 1664 il[k] = ui[k] + 1; 1665 } 1666 uj_ptr[k] = current_space->array; 1667 uj_lvl_ptr[k] = current_space_lvl->array; 1668 1669 current_space->array += nzk; 1670 current_space->local_used += nzk; 1671 current_space->local_remaining -= nzk; 1672 1673 current_space_lvl->array += nzk; 1674 current_space_lvl->local_used += nzk; 1675 current_space_lvl->local_remaining -= nzk; 1676 1677 ui[k+1] = ui[k] + nzk; 1678 } 1679 1680 #if defined(PETSC_USE_INFO) 1681 if (ai[am] != 0) { 1682 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1683 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1684 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1685 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1686 } else { 1687 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1688 } 1689 #endif 1690 1691 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1692 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1693 ierr = PetscFree(jl);CHKERRQ(ierr); 1694 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1695 1696 /* destroy list of free space and other temporary array(s) */ 1697 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1698 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1699 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1700 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1701 1702 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1703 1704 /* put together the new matrix in MATSEQSBAIJ format */ 1705 1706 b = (Mat_SeqSBAIJ*)(fact)->data; 1707 b->singlemalloc = PETSC_FALSE; 1708 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1709 b->j = uj; 1710 b->i = ui; 1711 b->diag = 0; 1712 b->ilen = 0; 1713 b->imax = 0; 1714 b->row = perm; 1715 b->col = perm; 1716 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1717 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1718 b->icol = iperm; 1719 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1720 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1721 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1722 b->maxnz = b->nz = ui[am]; 1723 b->free_a = PETSC_TRUE; 1724 b->free_ij = PETSC_TRUE; 1725 1726 (fact)->info.factor_mallocs = reallocs; 1727 (fact)->info.fill_ratio_given = fill; 1728 if (ai[am] != 0) { 1729 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1730 } else { 1731 (fact)->info.fill_ratio_needed = 0.0; 1732 } 1733 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1734 PetscFunctionReturn(0); 1735 } 1736 1737 #undef __FUNCT__ 1738 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1739 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1740 { 1741 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1742 Mat_SeqSBAIJ *b; 1743 PetscErrorCode ierr; 1744 PetscTruth perm_identity; 1745 PetscReal fill = info->fill; 1746 const PetscInt *rip,*riip; 1747 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1748 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1749 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1750 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1751 PetscBT lnkbt; 1752 IS iperm; 1753 1754 PetscFunctionBegin; 1755 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1756 /* check whether perm is the identity mapping */ 1757 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1758 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1759 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1760 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1761 1762 /* initialization */ 1763 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1764 ui[0] = 0; 1765 1766 /* jl: linked list for storing indices of the pivot rows 1767 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1768 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1769 il = jl + am; 1770 cols = il + am; 1771 ui_ptr = (PetscInt**)(cols + am); 1772 for (i=0; i<am; i++){ 1773 jl[i] = am; il[i] = 0; 1774 } 1775 1776 /* create and initialize a linked list for storing column indices of the active row k */ 1777 nlnk = am + 1; 1778 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1779 1780 /* initial FreeSpace size is fill*(ai[am]+1) */ 1781 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1782 current_space = free_space; 1783 1784 for (k=0; k<am; k++){ /* for each active row k */ 1785 /* initialize lnk by the column indices of row rip[k] of A */ 1786 nzk = 0; 1787 ncols = ai[rip[k]+1] - ai[rip[k]]; 1788 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1789 ncols_upper = 0; 1790 for (j=0; j<ncols; j++){ 1791 i = riip[*(aj + ai[rip[k]] + j)]; 1792 if (i >= k){ /* only take upper triangular entry */ 1793 cols[ncols_upper] = i; 1794 ncols_upper++; 1795 } 1796 } 1797 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1798 nzk += nlnk; 1799 1800 /* update lnk by computing fill-in for each pivot row to be merged in */ 1801 prow = jl[k]; /* 1st pivot row */ 1802 1803 while (prow < k){ 1804 nextprow = jl[prow]; 1805 /* merge prow into k-th row */ 1806 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1807 jmax = ui[prow+1]; 1808 ncols = jmax-jmin; 1809 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1810 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1811 nzk += nlnk; 1812 1813 /* update il and jl for prow */ 1814 if (jmin < jmax){ 1815 il[prow] = jmin; 1816 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1817 } 1818 prow = nextprow; 1819 } 1820 1821 /* if free space is not available, make more free space */ 1822 if (current_space->local_remaining<nzk) { 1823 i = am - k + 1; /* num of unfactored rows */ 1824 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1825 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1826 reallocs++; 1827 } 1828 1829 /* copy data into free space, then initialize lnk */ 1830 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1831 1832 /* add the k-th row into il and jl */ 1833 if (nzk-1 > 0){ 1834 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1835 jl[k] = jl[i]; jl[i] = k; 1836 il[k] = ui[k] + 1; 1837 } 1838 ui_ptr[k] = current_space->array; 1839 current_space->array += nzk; 1840 current_space->local_used += nzk; 1841 current_space->local_remaining -= nzk; 1842 1843 ui[k+1] = ui[k] + nzk; 1844 } 1845 1846 #if defined(PETSC_USE_INFO) 1847 if (ai[am] != 0) { 1848 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1849 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1850 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1851 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1852 } else { 1853 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1854 } 1855 #endif 1856 1857 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1858 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1859 ierr = PetscFree(jl);CHKERRQ(ierr); 1860 1861 /* destroy list of free space and other temporary array(s) */ 1862 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1863 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1864 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1865 1866 /* put together the new matrix in MATSEQSBAIJ format */ 1867 1868 b = (Mat_SeqSBAIJ*)(fact)->data; 1869 b->singlemalloc = PETSC_FALSE; 1870 b->free_a = PETSC_TRUE; 1871 b->free_ij = PETSC_TRUE; 1872 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1873 b->j = uj; 1874 b->i = ui; 1875 b->diag = 0; 1876 b->ilen = 0; 1877 b->imax = 0; 1878 b->row = perm; 1879 b->col = perm; 1880 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1881 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1882 b->icol = iperm; 1883 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1884 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1885 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1886 b->maxnz = b->nz = ui[am]; 1887 1888 (fact)->info.factor_mallocs = reallocs; 1889 (fact)->info.fill_ratio_given = fill; 1890 if (ai[am] != 0) { 1891 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1892 } else { 1893 (fact)->info.fill_ratio_needed = 0.0; 1894 } 1895 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1896 PetscFunctionReturn(0); 1897 } 1898