1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 const PetscInt *c,*r,*ic; 64 PetscInt i,n = A->rmap->n,*cc,*cr; 65 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 PetscInt *ordcol,*iwk,*iperm,*jw; 67 PetscInt jmax,lfill,job,*o_i,*o_j; 68 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 69 PetscReal af,dt,dtcount,dtcol,fill; 70 71 PetscFunctionBegin; 72 73 if (info->dt == PETSC_DEFAULT) dt = .005; else dt = info->dt; 74 if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); else dtcount = info->dtcount; 75 if (info->dtcol == PETSC_DEFAULT) dtcol = .01; else dtcol = info->dtcol; 76 if (info->fill == PETSC_DEFAULT) fill = ((double)(n*(info->dtcount+1)))/a->nz; else fill = info->fill; 77 lfill = (PetscInt)(dtcount/2.0); 78 jmax = (PetscInt)(fill*a->nz); 79 80 81 /* ------------------------------------------------------------ 82 If reorder=.TRUE., then the original matrix has to be 83 reordered to reflect the user selected ordering scheme, and 84 then de-reordered so it is in it's original format. 85 Because Saad's dperm() is NOT in place, we have to copy 86 the original matrix and allocate more storage. . . 87 ------------------------------------------------------------ 88 */ 89 90 /* set reorder to true if either isrow or iscol is not identity */ 91 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 92 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 93 reorder = PetscNot(reorder); 94 95 96 /* storage for ilu factor */ 97 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 100 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 101 102 /* ------------------------------------------------------------ 103 Make sure that everything is Fortran formatted (1-Based) 104 ------------------------------------------------------------ 105 */ 106 for (i=old_i[0];i<old_i[n];i++) { 107 old_j[i]++; 108 } 109 for(i=0;i<n+1;i++) { 110 old_i[i]++; 111 }; 112 113 114 if (reorder) { 115 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 116 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 117 ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 cr[i] = r[i]+1; 120 cc[i] = c[i]+1; 121 } 122 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 123 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 124 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job); 128 ierr = PetscFree2(cc,cr);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 222 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = MAT_FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 b->free_a = PETSC_TRUE; 230 b->free_ij = PETSC_TRUE; 231 b->singlemalloc = PETSC_FALSE; 232 b->a = new_a; 233 b->j = new_j; 234 b->i = new_i; 235 b->ilen = 0; 236 b->imax = 0; 237 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238 b->row = isirow; 239 b->col = iscolf; 240 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 241 b->maxnz = b->nz = new_i[n]; 242 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 243 (*fact)->info.factor_mallocs = 0; 244 245 af = ((double)b->nz)/((double)a->nz) + .001; 246 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr); 247 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 249 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 250 251 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 252 253 PetscFunctionReturn(0); 254 #endif 255 } 256 257 EXTERN_C_BEGIN 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 260 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 261 { 262 PetscFunctionBegin; 263 *flg = PETSC_TRUE; 264 PetscFunctionReturn(0); 265 } 266 EXTERN_C_END 267 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 271 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 272 { 273 PetscInt n = A->rmap->n; 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 278 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 279 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 280 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 281 if (ftype == MAT_FACTOR_ILU) { 282 (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ; 283 } else { 284 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 285 } 286 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 287 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 288 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 289 if (ftype == MAT_FACTOR_ICC) { 290 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 291 } else { 292 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 293 } 294 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 295 (*B)->factor = ftype; 296 PetscFunctionReturn(0); 297 } 298 EXTERN_C_END 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 302 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 303 { 304 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 305 IS isicol; 306 PetscErrorCode ierr; 307 const PetscInt *r,*ic; 308 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 309 PetscInt *bi,*bj,*ajtmp; 310 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 311 PetscReal f; 312 PetscInt nlnk,*lnk,k,**bi_ptr; 313 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 314 PetscBT lnkbt; 315 316 PetscFunctionBegin; 317 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 318 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 319 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 320 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 321 322 /* get new row pointers */ 323 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 324 bi[0] = 0; 325 326 /* bdiag is location of diagonal in factor */ 327 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 328 bdiag[0] = 0; 329 330 /* linked list for storing column indices of the active row */ 331 nlnk = n + 1; 332 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 333 334 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 335 336 /* initial FreeSpace size is f*(ai[n]+1) */ 337 f = info->fill; 338 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 339 current_space = free_space; 340 341 for (i=0; i<n; i++) { 342 /* copy previous fill into linked list */ 343 nzi = 0; 344 nnz = ai[r[i]+1] - ai[r[i]]; 345 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 346 ajtmp = aj + ai[r[i]]; 347 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 348 nzi += nlnk; 349 350 /* add pivot rows into linked list */ 351 row = lnk[n]; 352 while (row < i) { 353 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 354 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 355 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 356 nzi += nlnk; 357 row = lnk[row]; 358 } 359 bi[i+1] = bi[i] + nzi; 360 im[i] = nzi; 361 362 /* mark bdiag */ 363 nzbd = 0; 364 nnz = nzi; 365 k = lnk[n]; 366 while (nnz-- && k < i){ 367 nzbd++; 368 k = lnk[k]; 369 } 370 bdiag[i] = bi[i] + nzbd; 371 372 /* if free space is not available, make more free space */ 373 if (current_space->local_remaining<nzi) { 374 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 375 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 376 reallocs++; 377 } 378 379 /* copy data into free space, then initialize lnk */ 380 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 381 bi_ptr[i] = current_space->array; 382 current_space->array += nzi; 383 current_space->local_used += nzi; 384 current_space->local_remaining -= nzi; 385 } 386 #if defined(PETSC_USE_INFO) 387 if (ai[n] != 0) { 388 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 389 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 390 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 391 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 392 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 393 } else { 394 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 395 } 396 #endif 397 398 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 399 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 400 401 /* destroy list of free space and other temporary array(s) */ 402 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 403 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 404 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 405 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 406 407 /* put together the new matrix */ 408 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 409 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 410 b = (Mat_SeqAIJ*)(B)->data; 411 b->free_a = PETSC_TRUE; 412 b->free_ij = PETSC_TRUE; 413 b->singlemalloc = PETSC_FALSE; 414 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 415 b->j = bj; 416 b->i = bi; 417 b->diag = bdiag; 418 b->ilen = 0; 419 b->imax = 0; 420 b->row = isrow; 421 b->col = iscol; 422 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 423 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 424 b->icol = isicol; 425 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 426 427 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 428 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 429 b->maxnz = b->nz = bi[n] ; 430 431 (B)->factor = MAT_FACTOR_LU; 432 (B)->info.factor_mallocs = reallocs; 433 (B)->info.fill_ratio_given = f; 434 435 if (ai[n] != 0) { 436 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 437 } else { 438 (B)->info.fill_ratio_needed = 0.0; 439 } 440 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 441 (B)->ops->solve = MatSolve_SeqAIJ; 442 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 443 /* switch to inodes if appropriate */ 444 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 /* 449 Trouble in factorization, should we dump the original matrix? 450 */ 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatFactorDumpMatrix" 453 PetscErrorCode MatFactorDumpMatrix(Mat A) 454 { 455 PetscErrorCode ierr; 456 PetscTruth flg; 457 458 PetscFunctionBegin; 459 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 460 if (flg) { 461 PetscViewer viewer; 462 char filename[PETSC_MAX_PATH_LEN]; 463 464 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 465 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 466 ierr = MatView(A,viewer);CHKERRQ(ierr); 467 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 468 } 469 PetscFunctionReturn(0); 470 } 471 472 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 473 474 /* ----------------------------------------------------------- */ 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 477 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 478 { 479 Mat C=B; 480 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 481 IS isrow = b->row,isicol = b->icol; 482 PetscErrorCode ierr; 483 const PetscInt *r,*ic,*ics; 484 PetscInt i,j,n=A->rmap->n,*bi=b->i,*bj=b->j; 485 PetscInt *ajtmp,*bjtmp,nz,row; 486 PetscInt *diag_offset = b->diag,diag,*pj; 487 PetscScalar *rtmp,*pc,multiplier,*rtmps; 488 MatScalar *v,*pv; 489 PetscScalar d; 490 PetscReal rs; 491 LUShift_Ctx sctx; 492 PetscInt newshift,*ddiag; 493 494 PetscFunctionBegin; 495 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 496 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 497 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 498 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 499 rtmps = rtmp; ics = ic; 500 501 sctx.shift_top = 0; 502 sctx.nshift_max = 0; 503 sctx.shift_lo = 0; 504 sctx.shift_hi = 0; 505 sctx.shift_fraction = 0; 506 507 /* if both shift schemes are chosen by user, only use info->shiftpd */ 508 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 509 PetscInt *aai = a->i; 510 ddiag = a->diag; 511 sctx.shift_top = 0; 512 for (i=0; i<n; i++) { 513 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 514 d = (a->a)[ddiag[i]]; 515 rs = -PetscAbsScalar(d) - PetscRealPart(d); 516 v = a->a+aai[i]; 517 nz = aai[i+1] - aai[i]; 518 for (j=0; j<nz; j++) 519 rs += PetscAbsScalar(v[j]); 520 if (rs>sctx.shift_top) sctx.shift_top = rs; 521 } 522 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 523 sctx.shift_top *= 1.1; 524 sctx.nshift_max = 5; 525 sctx.shift_lo = 0.; 526 sctx.shift_hi = 1.; 527 } 528 529 sctx.shift_amount = 0; 530 sctx.nshift = 0; 531 do { 532 sctx.lushift = PETSC_FALSE; 533 for (i=0; i<n; i++){ 534 nz = bi[i+1] - bi[i]; 535 bjtmp = bj + bi[i]; 536 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 537 538 /* load in initial (unfactored row) */ 539 nz = a->i[r[i]+1] - a->i[r[i]]; 540 ajtmp = a->j + a->i[r[i]]; 541 v = a->a + a->i[r[i]]; 542 for (j=0; j<nz; j++) { 543 rtmp[ics[ajtmp[j]]] = v[j]; 544 } 545 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 546 547 row = *bjtmp++; 548 while (row < i) { 549 pc = rtmp + row; 550 if (*pc != 0.0) { 551 pv = b->a + diag_offset[row]; 552 pj = b->j + diag_offset[row] + 1; 553 multiplier = *pc / *pv++; 554 *pc = multiplier; 555 nz = bi[row+1] - diag_offset[row] - 1; 556 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 557 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 558 } 559 row = *bjtmp++; 560 } 561 /* finished row so stick it into b->a */ 562 pv = b->a + bi[i] ; 563 pj = b->j + bi[i] ; 564 nz = bi[i+1] - bi[i]; 565 diag = diag_offset[i] - bi[i]; 566 rs = 0.0; 567 for (j=0; j<nz; j++) { 568 pv[j] = rtmps[pj[j]]; 569 if (j != diag) rs += PetscAbsScalar(pv[j]); 570 } 571 572 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 573 sctx.rs = rs; 574 sctx.pv = pv[diag]; 575 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 576 if (newshift == 1) break; 577 } 578 579 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 580 /* 581 * if no shift in this attempt & shifting & started shifting & can refine, 582 * then try lower shift 583 */ 584 sctx.shift_hi = sctx.shift_fraction; 585 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 586 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 587 sctx.lushift = PETSC_TRUE; 588 sctx.nshift++; 589 } 590 } while (sctx.lushift); 591 592 /* invert diagonal entries for simplier triangular solves */ 593 for (i=0; i<n; i++) { 594 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 595 } 596 ierr = PetscFree(rtmp);CHKERRQ(ierr); 597 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 598 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 599 if (b->inode.use) { 600 C->ops->solve = MatSolve_Inode; 601 } else { 602 PetscTruth row_identity, col_identity; 603 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 604 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 605 if (row_identity && col_identity) { 606 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 607 } else { 608 C->ops->solve = MatSolve_SeqAIJ; 609 } 610 } 611 C->ops->solveadd = MatSolveAdd_SeqAIJ; 612 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 613 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 614 C->ops->matsolve = MatMatSolve_SeqAIJ; 615 C->assembled = PETSC_TRUE; 616 C->preallocated = PETSC_TRUE; 617 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 618 if (sctx.nshift){ 619 if (info->shiftpd) { 620 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 621 } else if (info->shiftnz) { 622 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 /* 629 This routine implements inplace ILU(0) with row or/and column permutations. 630 Input: 631 A - original matrix 632 Output; 633 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 634 a->j (col index) is permuted by the inverse of colperm, then sorted 635 a->a reordered accordingly with a->j 636 a->diag (ptr to diagonal elements) is updated. 637 */ 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 640 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 641 { 642 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 643 IS isrow = a->row,isicol = a->icol; 644 PetscErrorCode ierr; 645 const PetscInt *r,*ic,*ics; 646 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 647 PetscInt *ajtmp,nz,row; 648 PetscInt *diag = a->diag,nbdiag,*pj; 649 PetscScalar *rtmp,*pc,multiplier,d; 650 MatScalar *v,*pv; 651 PetscReal rs; 652 LUShift_Ctx sctx; 653 PetscInt newshift; 654 655 PetscFunctionBegin; 656 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 657 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 658 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 659 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 660 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 661 ics = ic; 662 663 sctx.shift_top = 0; 664 sctx.nshift_max = 0; 665 sctx.shift_lo = 0; 666 sctx.shift_hi = 0; 667 sctx.shift_fraction = 0; 668 669 /* if both shift schemes are chosen by user, only use info->shiftpd */ 670 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 671 sctx.shift_top = 0; 672 for (i=0; i<n; i++) { 673 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 674 d = (a->a)[diag[i]]; 675 rs = -PetscAbsScalar(d) - PetscRealPart(d); 676 v = a->a+ai[i]; 677 nz = ai[i+1] - ai[i]; 678 for (j=0; j<nz; j++) 679 rs += PetscAbsScalar(v[j]); 680 if (rs>sctx.shift_top) sctx.shift_top = rs; 681 } 682 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 683 sctx.shift_top *= 1.1; 684 sctx.nshift_max = 5; 685 sctx.shift_lo = 0.; 686 sctx.shift_hi = 1.; 687 } 688 689 sctx.shift_amount = 0; 690 sctx.nshift = 0; 691 do { 692 sctx.lushift = PETSC_FALSE; 693 for (i=0; i<n; i++){ 694 /* load in initial unfactored row */ 695 nz = ai[r[i]+1] - ai[r[i]]; 696 ajtmp = aj + ai[r[i]]; 697 v = a->a + ai[r[i]]; 698 /* sort permuted ajtmp and values v accordingly */ 699 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 700 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 701 702 diag[r[i]] = ai[r[i]]; 703 for (j=0; j<nz; j++) { 704 rtmp[ajtmp[j]] = v[j]; 705 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 706 } 707 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 708 709 row = *ajtmp++; 710 while (row < i) { 711 pc = rtmp + row; 712 if (*pc != 0.0) { 713 pv = a->a + diag[r[row]]; 714 pj = aj + diag[r[row]] + 1; 715 716 multiplier = *pc / *pv++; 717 *pc = multiplier; 718 nz = ai[r[row]+1] - diag[r[row]] - 1; 719 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 720 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 721 } 722 row = *ajtmp++; 723 } 724 /* finished row so overwrite it onto a->a */ 725 pv = a->a + ai[r[i]] ; 726 pj = aj + ai[r[i]] ; 727 nz = ai[r[i]+1] - ai[r[i]]; 728 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 729 730 rs = 0.0; 731 for (j=0; j<nz; j++) { 732 pv[j] = rtmp[pj[j]]; 733 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 734 } 735 736 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 737 sctx.rs = rs; 738 sctx.pv = pv[nbdiag]; 739 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 740 if (newshift == 1) break; 741 } 742 743 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 744 /* 745 * if no shift in this attempt & shifting & started shifting & can refine, 746 * then try lower shift 747 */ 748 sctx.shift_hi = sctx.shift_fraction; 749 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 750 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 751 sctx.lushift = PETSC_TRUE; 752 sctx.nshift++; 753 } 754 } while (sctx.lushift); 755 756 /* invert diagonal entries for simplier triangular solves */ 757 for (i=0; i<n; i++) { 758 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 759 } 760 761 ierr = PetscFree(rtmp);CHKERRQ(ierr); 762 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 763 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 764 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 765 A->ops->solveadd = MatSolveAdd_SeqAIJ; 766 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 767 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 768 A->assembled = PETSC_TRUE; 769 A->preallocated = PETSC_TRUE; 770 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 771 if (sctx.nshift){ 772 if (info->shiftpd) { 773 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 774 } else if (info->shiftnz) { 775 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 776 } 777 } 778 PetscFunctionReturn(0); 779 } 780 781 /* ----------------------------------------------------------- */ 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatLUFactor_SeqAIJ" 784 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 785 { 786 PetscErrorCode ierr; 787 Mat C; 788 789 PetscFunctionBegin; 790 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 791 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 792 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 793 A->ops->solve = C->ops->solve; 794 A->ops->solvetranspose = C->ops->solvetranspose; 795 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 796 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 797 PetscFunctionReturn(0); 798 } 799 /* ----------------------------------------------------------- */ 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatSolve_SeqAIJ" 802 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 803 { 804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 805 IS iscol = a->col,isrow = a->row; 806 PetscErrorCode ierr; 807 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 808 PetscInt nz; 809 const PetscInt *rout,*cout,*r,*c; 810 PetscScalar *x,*tmp,*tmps,sum; 811 const PetscScalar *b; 812 const MatScalar *aa = a->a,*v; 813 814 PetscFunctionBegin; 815 if (!n) PetscFunctionReturn(0); 816 817 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 818 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 819 tmp = a->solve_work; 820 821 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 822 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 823 824 /* forward solve the lower triangular */ 825 tmp[0] = b[*r++]; 826 tmps = tmp; 827 for (i=1; i<n; i++) { 828 v = aa + ai[i] ; 829 vi = aj + ai[i] ; 830 nz = a->diag[i] - ai[i]; 831 sum = b[*r++]; 832 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 833 tmp[i] = sum; 834 } 835 836 /* backward solve the upper triangular */ 837 for (i=n-1; i>=0; i--){ 838 v = aa + a->diag[i] + 1; 839 vi = aj + a->diag[i] + 1; 840 nz = ai[i+1] - a->diag[i] - 1; 841 sum = tmp[i]; 842 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 843 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 844 } 845 846 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 847 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 848 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 849 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 850 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatMatSolve_SeqAIJ" 856 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 857 { 858 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 859 IS iscol = a->col,isrow = a->row; 860 PetscErrorCode ierr; 861 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 862 PetscInt nz,neq; 863 const PetscInt *rout,*cout,*r,*c; 864 PetscScalar *x,*b,*tmp,*tmps,sum; 865 const MatScalar *aa = a->a,*v; 866 PetscTruth bisdense,xisdense; 867 868 PetscFunctionBegin; 869 if (!n) PetscFunctionReturn(0); 870 871 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 872 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 873 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 874 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 875 876 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 877 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 878 879 tmp = a->solve_work; 880 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 881 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 882 883 for (neq=0; neq<B->cmap->n; neq++){ 884 /* forward solve the lower triangular */ 885 tmp[0] = b[r[0]]; 886 tmps = tmp; 887 for (i=1; i<n; i++) { 888 v = aa + ai[i] ; 889 vi = aj + ai[i] ; 890 nz = a->diag[i] - ai[i]; 891 sum = b[r[i]]; 892 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 893 tmp[i] = sum; 894 } 895 /* backward solve the upper triangular */ 896 for (i=n-1; i>=0; i--){ 897 v = aa + a->diag[i] + 1; 898 vi = aj + a->diag[i] + 1; 899 nz = ai[i+1] - a->diag[i] - 1; 900 sum = tmp[i]; 901 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 902 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 903 } 904 905 b += n; 906 x += n; 907 } 908 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 909 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 910 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 911 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 912 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 913 PetscFunctionReturn(0); 914 } 915 916 #undef __FUNCT__ 917 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 918 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 919 { 920 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 921 IS iscol = a->col,isrow = a->row; 922 PetscErrorCode ierr; 923 const PetscInt *r,*c,*rout,*cout; 924 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 925 PetscInt nz,row; 926 PetscScalar *x,*b,*tmp,*tmps,sum; 927 const MatScalar *aa = a->a,*v; 928 929 PetscFunctionBegin; 930 if (!n) PetscFunctionReturn(0); 931 932 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 933 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 934 tmp = a->solve_work; 935 936 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 937 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 938 939 /* forward solve the lower triangular */ 940 tmp[0] = b[*r++]; 941 tmps = tmp; 942 for (row=1; row<n; row++) { 943 i = rout[row]; /* permuted row */ 944 v = aa + ai[i] ; 945 vi = aj + ai[i] ; 946 nz = a->diag[i] - ai[i]; 947 sum = b[*r++]; 948 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 949 tmp[row] = sum; 950 } 951 952 /* backward solve the upper triangular */ 953 for (row=n-1; row>=0; row--){ 954 i = rout[row]; /* permuted row */ 955 v = aa + a->diag[i] + 1; 956 vi = aj + a->diag[i] + 1; 957 nz = ai[i+1] - a->diag[i] - 1; 958 sum = tmp[row]; 959 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 960 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 961 } 962 963 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 964 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 965 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 966 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 967 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 /* ----------------------------------------------------------- */ 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 974 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 975 { 976 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 977 PetscErrorCode ierr; 978 PetscInt n = A->rmap->n; 979 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 980 PetscScalar *x; 981 const PetscScalar *b; 982 const MatScalar *aa = a->a; 983 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 984 PetscInt adiag_i,i,nz,ai_i; 985 const MatScalar *v; 986 PetscScalar sum; 987 #endif 988 989 PetscFunctionBegin; 990 if (!n) PetscFunctionReturn(0); 991 992 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 993 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 994 995 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 996 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 997 #else 998 /* forward solve the lower triangular */ 999 x[0] = b[0]; 1000 for (i=1; i<n; i++) { 1001 ai_i = ai[i]; 1002 v = aa + ai_i; 1003 vi = aj + ai_i; 1004 nz = adiag[i] - ai_i; 1005 sum = b[i]; 1006 while (nz--) sum -= *v++ * x[*vi++]; 1007 x[i] = sum; 1008 } 1009 1010 /* backward solve the upper triangular */ 1011 for (i=n-1; i>=0; i--){ 1012 adiag_i = adiag[i]; 1013 v = aa + adiag_i + 1; 1014 vi = aj + adiag_i + 1; 1015 nz = ai[i+1] - adiag_i - 1; 1016 sum = x[i]; 1017 while (nz--) sum -= *v++ * x[*vi++]; 1018 x[i] = sum*aa[adiag_i]; 1019 } 1020 #endif 1021 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 1022 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1023 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1029 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1030 { 1031 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1032 IS iscol = a->col,isrow = a->row; 1033 PetscErrorCode ierr; 1034 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1035 PetscInt nz; 1036 const PetscInt *rout,*cout,*r,*c; 1037 PetscScalar *x,*b,*tmp,sum; 1038 const MatScalar *aa = a->a,*v; 1039 1040 PetscFunctionBegin; 1041 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1042 1043 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1044 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1045 tmp = a->solve_work; 1046 1047 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1048 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1049 1050 /* forward solve the lower triangular */ 1051 tmp[0] = b[*r++]; 1052 for (i=1; i<n; i++) { 1053 v = aa + ai[i] ; 1054 vi = aj + ai[i] ; 1055 nz = a->diag[i] - ai[i]; 1056 sum = b[*r++]; 1057 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1058 tmp[i] = sum; 1059 } 1060 1061 /* backward solve the upper triangular */ 1062 for (i=n-1; i>=0; i--){ 1063 v = aa + a->diag[i] + 1; 1064 vi = aj + a->diag[i] + 1; 1065 nz = ai[i+1] - a->diag[i] - 1; 1066 sum = tmp[i]; 1067 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1068 tmp[i] = sum*aa[a->diag[i]]; 1069 x[*c--] += tmp[i]; 1070 } 1071 1072 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1073 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1074 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1075 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1076 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1077 1078 PetscFunctionReturn(0); 1079 } 1080 /* -------------------------------------------------------------------*/ 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1083 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1084 { 1085 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1086 IS iscol = a->col,isrow = a->row; 1087 PetscErrorCode ierr; 1088 const PetscInt *rout,*cout,*r,*c; 1089 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1090 PetscInt nz,*diag = a->diag; 1091 PetscScalar *x,*b,*tmp,s1; 1092 const MatScalar *aa = a->a,*v; 1093 1094 PetscFunctionBegin; 1095 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1096 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1097 tmp = a->solve_work; 1098 1099 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1100 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1101 1102 /* copy the b into temp work space according to permutation */ 1103 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1104 1105 /* forward solve the U^T */ 1106 for (i=0; i<n; i++) { 1107 v = aa + diag[i] ; 1108 vi = aj + diag[i] + 1; 1109 nz = ai[i+1] - diag[i] - 1; 1110 s1 = tmp[i]; 1111 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1112 while (nz--) { 1113 tmp[*vi++ ] -= (*v++)*s1; 1114 } 1115 tmp[i] = s1; 1116 } 1117 1118 /* backward solve the L^T */ 1119 for (i=n-1; i>=0; i--){ 1120 v = aa + diag[i] - 1 ; 1121 vi = aj + diag[i] - 1 ; 1122 nz = diag[i] - ai[i]; 1123 s1 = tmp[i]; 1124 while (nz--) { 1125 tmp[*vi-- ] -= (*v--)*s1; 1126 } 1127 } 1128 1129 /* copy tmp into x according to permutation */ 1130 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1131 1132 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1133 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1134 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1135 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1136 1137 ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1143 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1144 { 1145 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1146 IS iscol = a->col,isrow = a->row; 1147 PetscErrorCode ierr; 1148 const PetscInt *r,*c,*rout,*cout; 1149 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1150 PetscInt nz,*diag = a->diag; 1151 PetscScalar *x,*b,*tmp; 1152 const MatScalar *aa = a->a,*v; 1153 1154 PetscFunctionBegin; 1155 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1156 1157 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1158 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1159 tmp = a->solve_work; 1160 1161 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1162 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1163 1164 /* copy the b into temp work space according to permutation */ 1165 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1166 1167 /* forward solve the U^T */ 1168 for (i=0; i<n; i++) { 1169 v = aa + diag[i] ; 1170 vi = aj + diag[i] + 1; 1171 nz = ai[i+1] - diag[i] - 1; 1172 tmp[i] *= *v++; 1173 while (nz--) { 1174 tmp[*vi++ ] -= (*v++)*tmp[i]; 1175 } 1176 } 1177 1178 /* backward solve the L^T */ 1179 for (i=n-1; i>=0; i--){ 1180 v = aa + diag[i] - 1 ; 1181 vi = aj + diag[i] - 1 ; 1182 nz = diag[i] - ai[i]; 1183 while (nz--) { 1184 tmp[*vi-- ] -= (*v--)*tmp[i]; 1185 } 1186 } 1187 1188 /* copy tmp into x according to permutation */ 1189 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1190 1191 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1192 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1193 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1194 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1195 1196 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 /* ----------------------------------------------------------------*/ 1200 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1201 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption); 1202 1203 #undef __FUNCT__ 1204 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1205 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1206 { 1207 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1208 IS isicol; 1209 PetscErrorCode ierr; 1210 const PetscInt *r,*ic; 1211 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1212 PetscInt *bi,*cols,nnz,*cols_lvl; 1213 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1214 PetscInt i,levels,diagonal_fill; 1215 PetscTruth col_identity,row_identity; 1216 PetscReal f; 1217 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1218 PetscBT lnkbt; 1219 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1220 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1221 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1222 PetscTruth missing; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1226 f = info->fill; 1227 levels = (PetscInt)info->levels; 1228 diagonal_fill = (PetscInt)info->diagonal_fill; 1229 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1230 1231 /* special case that simply copies fill pattern */ 1232 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1233 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1234 if (!levels && row_identity && col_identity) { 1235 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr); 1236 fact->factor = MAT_FACTOR_ILU; 1237 (fact)->info.factor_mallocs = 0; 1238 (fact)->info.fill_ratio_given = info->fill; 1239 (fact)->info.fill_ratio_needed = 1.0; 1240 b = (Mat_SeqAIJ*)(fact)->data; 1241 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1242 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1243 b->row = isrow; 1244 b->col = iscol; 1245 b->icol = isicol; 1246 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1247 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1248 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1249 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1250 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1255 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1256 1257 /* get new row pointers */ 1258 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1259 bi[0] = 0; 1260 /* bdiag is location of diagonal in factor */ 1261 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1262 bdiag[0] = 0; 1263 1264 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1265 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1266 1267 /* create a linked list for storing column indices of the active row */ 1268 nlnk = n + 1; 1269 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1270 1271 /* initial FreeSpace size is f*(ai[n]+1) */ 1272 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1273 current_space = free_space; 1274 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1275 current_space_lvl = free_space_lvl; 1276 1277 for (i=0; i<n; i++) { 1278 nzi = 0; 1279 /* copy current row into linked list */ 1280 nnz = ai[r[i]+1] - ai[r[i]]; 1281 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1282 cols = aj + ai[r[i]]; 1283 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1284 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1285 nzi += nlnk; 1286 1287 /* make sure diagonal entry is included */ 1288 if (diagonal_fill && lnk[i] == -1) { 1289 fm = n; 1290 while (lnk[fm] < i) fm = lnk[fm]; 1291 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1292 lnk[fm] = i; 1293 lnk_lvl[i] = 0; 1294 nzi++; dcount++; 1295 } 1296 1297 /* add pivot rows into the active row */ 1298 nzbd = 0; 1299 prow = lnk[n]; 1300 while (prow < i) { 1301 nnz = bdiag[prow]; 1302 cols = bj_ptr[prow] + nnz + 1; 1303 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1304 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1305 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1306 nzi += nlnk; 1307 prow = lnk[prow]; 1308 nzbd++; 1309 } 1310 bdiag[i] = nzbd; 1311 bi[i+1] = bi[i] + nzi; 1312 1313 /* if free space is not available, make more free space */ 1314 if (current_space->local_remaining<nzi) { 1315 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1316 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1317 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1318 reallocs++; 1319 } 1320 1321 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1322 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1323 bj_ptr[i] = current_space->array; 1324 bjlvl_ptr[i] = current_space_lvl->array; 1325 1326 /* make sure the active row i has diagonal entry */ 1327 if (*(bj_ptr[i]+bdiag[i]) != i) { 1328 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1329 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1330 } 1331 1332 current_space->array += nzi; 1333 current_space->local_used += nzi; 1334 current_space->local_remaining -= nzi; 1335 current_space_lvl->array += nzi; 1336 current_space_lvl->local_used += nzi; 1337 current_space_lvl->local_remaining -= nzi; 1338 } 1339 1340 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1341 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1342 1343 /* destroy list of free space and other temporary arrays */ 1344 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1345 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1346 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1347 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1348 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1349 1350 #if defined(PETSC_USE_INFO) 1351 { 1352 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1353 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1354 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1355 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1356 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1357 if (diagonal_fill) { 1358 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1359 } 1360 } 1361 #endif 1362 1363 /* put together the new matrix */ 1364 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1365 b = (Mat_SeqAIJ*)(fact)->data; 1366 b->free_a = PETSC_TRUE; 1367 b->free_ij = PETSC_TRUE; 1368 b->singlemalloc = PETSC_FALSE; 1369 len = (bi[n] )*sizeof(PetscScalar); 1370 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1371 b->j = bj; 1372 b->i = bi; 1373 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1374 b->diag = bdiag; 1375 b->ilen = 0; 1376 b->imax = 0; 1377 b->row = isrow; 1378 b->col = iscol; 1379 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1380 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1381 b->icol = isicol; 1382 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1383 /* In b structure: Free imax, ilen, old a, old j. 1384 Allocate bdiag, solve_work, new a, new j */ 1385 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1386 b->maxnz = b->nz = bi[n] ; 1387 (fact)->info.factor_mallocs = reallocs; 1388 (fact)->info.fill_ratio_given = f; 1389 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1390 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1391 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1392 PetscFunctionReturn(0); 1393 } 1394 1395 #include "src/mat/impls/sbaij/seq/sbaij.h" 1396 #undef __FUNCT__ 1397 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1398 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 1399 { 1400 Mat C = B; 1401 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1402 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1403 IS ip=b->row,iip = b->icol; 1404 PetscErrorCode ierr; 1405 const PetscInt *rip,*riip; 1406 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1407 PetscInt *ai=a->i,*aj=a->j; 1408 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1409 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1410 PetscReal zeropivot,rs,shiftnz; 1411 PetscReal shiftpd; 1412 ChShift_Ctx sctx; 1413 PetscInt newshift; 1414 PetscTruth perm_identity; 1415 1416 PetscFunctionBegin; 1417 1418 shiftnz = info->shiftnz; 1419 shiftpd = info->shiftpd; 1420 zeropivot = info->zeropivot; 1421 1422 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1423 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1424 1425 /* initialization */ 1426 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1427 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1428 jl = il + mbs; 1429 rtmp = (MatScalar*)(jl + mbs); 1430 1431 sctx.shift_amount = 0; 1432 sctx.nshift = 0; 1433 do { 1434 sctx.chshift = PETSC_FALSE; 1435 for (i=0; i<mbs; i++) { 1436 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1437 } 1438 1439 for (k = 0; k<mbs; k++){ 1440 bval = ba + bi[k]; 1441 /* initialize k-th row by the perm[k]-th row of A */ 1442 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1443 for (j = jmin; j < jmax; j++){ 1444 col = riip[aj[j]]; 1445 if (col >= k){ /* only take upper triangular entry */ 1446 rtmp[col] = aa[j]; 1447 *bval++ = 0.0; /* for in-place factorization */ 1448 } 1449 } 1450 /* shift the diagonal of the matrix */ 1451 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1452 1453 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1454 dk = rtmp[k]; 1455 i = jl[k]; /* first row to be added to k_th row */ 1456 1457 while (i < k){ 1458 nexti = jl[i]; /* next row to be added to k_th row */ 1459 1460 /* compute multiplier, update diag(k) and U(i,k) */ 1461 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1462 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1463 dk += uikdi*ba[ili]; 1464 ba[ili] = uikdi; /* -U(i,k) */ 1465 1466 /* add multiple of row i to k-th row */ 1467 jmin = ili + 1; jmax = bi[i+1]; 1468 if (jmin < jmax){ 1469 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1470 /* update il and jl for row i */ 1471 il[i] = jmin; 1472 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1473 } 1474 i = nexti; 1475 } 1476 1477 /* shift the diagonals when zero pivot is detected */ 1478 /* compute rs=sum of abs(off-diagonal) */ 1479 rs = 0.0; 1480 jmin = bi[k]+1; 1481 nz = bi[k+1] - jmin; 1482 bcol = bj + jmin; 1483 while (nz--){ 1484 rs += PetscAbsScalar(rtmp[*bcol]); 1485 bcol++; 1486 } 1487 1488 sctx.rs = rs; 1489 sctx.pv = dk; 1490 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1491 1492 if (newshift == 1) { 1493 if (!sctx.shift_amount) { 1494 sctx.shift_amount = 1e-5; 1495 } 1496 break; 1497 } 1498 1499 /* copy data into U(k,:) */ 1500 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1501 jmin = bi[k]+1; jmax = bi[k+1]; 1502 if (jmin < jmax) { 1503 for (j=jmin; j<jmax; j++){ 1504 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1505 } 1506 /* add the k-th row into il and jl */ 1507 il[k] = jmin; 1508 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1509 } 1510 } 1511 } while (sctx.chshift); 1512 ierr = PetscFree(il);CHKERRQ(ierr); 1513 1514 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1515 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1516 1517 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1518 if (perm_identity){ 1519 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1520 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1521 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1522 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1523 } else { 1524 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1525 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1526 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1527 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1528 } 1529 1530 C->assembled = PETSC_TRUE; 1531 C->preallocated = PETSC_TRUE; 1532 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1533 if (sctx.nshift){ 1534 if (shiftnz) { 1535 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1536 } else if (shiftpd) { 1537 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1538 } 1539 } 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1545 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1546 { 1547 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1548 Mat_SeqSBAIJ *b; 1549 PetscErrorCode ierr; 1550 PetscTruth perm_identity,missing; 1551 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1552 const PetscInt *rip,*riip; 1553 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1554 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1555 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1556 PetscReal fill=info->fill,levels=info->levels; 1557 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1558 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1559 PetscBT lnkbt; 1560 IS iperm; 1561 1562 PetscFunctionBegin; 1563 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1564 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1565 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1566 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1567 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1568 1569 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1570 ui[0] = 0; 1571 1572 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1573 if (!levels && perm_identity) { 1574 1575 for (i=0; i<am; i++) { 1576 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1577 } 1578 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1579 cols = uj; 1580 for (i=0; i<am; i++) { 1581 aj = a->j + a->diag[i]; 1582 ncols = ui[i+1] - ui[i]; 1583 for (j=0; j<ncols; j++) *cols++ = *aj++; 1584 } 1585 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1586 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1587 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1588 1589 /* initialization */ 1590 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1591 1592 /* jl: linked list for storing indices of the pivot rows 1593 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1594 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1595 il = jl + am; 1596 uj_ptr = (PetscInt**)(il + am); 1597 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1598 for (i=0; i<am; i++){ 1599 jl[i] = am; il[i] = 0; 1600 } 1601 1602 /* create and initialize a linked list for storing column indices of the active row k */ 1603 nlnk = am + 1; 1604 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1605 1606 /* initial FreeSpace size is fill*(ai[am]+1) */ 1607 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1608 current_space = free_space; 1609 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1610 current_space_lvl = free_space_lvl; 1611 1612 for (k=0; k<am; k++){ /* for each active row k */ 1613 /* initialize lnk by the column indices of row rip[k] of A */ 1614 nzk = 0; 1615 ncols = ai[rip[k]+1] - ai[rip[k]]; 1616 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1617 ncols_upper = 0; 1618 for (j=0; j<ncols; j++){ 1619 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1620 if (riip[i] >= k){ /* only take upper triangular entry */ 1621 ajtmp[ncols_upper] = i; 1622 ncols_upper++; 1623 } 1624 } 1625 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1626 nzk += nlnk; 1627 1628 /* update lnk by computing fill-in for each pivot row to be merged in */ 1629 prow = jl[k]; /* 1st pivot row */ 1630 1631 while (prow < k){ 1632 nextprow = jl[prow]; 1633 1634 /* merge prow into k-th row */ 1635 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1636 jmax = ui[prow+1]; 1637 ncols = jmax-jmin; 1638 i = jmin - ui[prow]; 1639 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1640 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1641 j = *(uj - 1); 1642 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1643 nzk += nlnk; 1644 1645 /* update il and jl for prow */ 1646 if (jmin < jmax){ 1647 il[prow] = jmin; 1648 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1649 } 1650 prow = nextprow; 1651 } 1652 1653 /* if free space is not available, make more free space */ 1654 if (current_space->local_remaining<nzk) { 1655 i = am - k + 1; /* num of unfactored rows */ 1656 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1657 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1658 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1659 reallocs++; 1660 } 1661 1662 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1663 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1664 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1665 1666 /* add the k-th row into il and jl */ 1667 if (nzk > 1){ 1668 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1669 jl[k] = jl[i]; jl[i] = k; 1670 il[k] = ui[k] + 1; 1671 } 1672 uj_ptr[k] = current_space->array; 1673 uj_lvl_ptr[k] = current_space_lvl->array; 1674 1675 current_space->array += nzk; 1676 current_space->local_used += nzk; 1677 current_space->local_remaining -= nzk; 1678 1679 current_space_lvl->array += nzk; 1680 current_space_lvl->local_used += nzk; 1681 current_space_lvl->local_remaining -= nzk; 1682 1683 ui[k+1] = ui[k] + nzk; 1684 } 1685 1686 #if defined(PETSC_USE_INFO) 1687 if (ai[am] != 0) { 1688 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1689 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1690 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1691 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1692 } else { 1693 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1694 } 1695 #endif 1696 1697 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1698 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1699 ierr = PetscFree(jl);CHKERRQ(ierr); 1700 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1701 1702 /* destroy list of free space and other temporary array(s) */ 1703 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1704 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1705 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1706 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1707 1708 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1709 1710 /* put together the new matrix in MATSEQSBAIJ format */ 1711 1712 b = (Mat_SeqSBAIJ*)(fact)->data; 1713 b->singlemalloc = PETSC_FALSE; 1714 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1715 b->j = uj; 1716 b->i = ui; 1717 b->diag = 0; 1718 b->ilen = 0; 1719 b->imax = 0; 1720 b->row = perm; 1721 b->col = perm; 1722 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1723 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1724 b->icol = iperm; 1725 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1726 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1727 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1728 b->maxnz = b->nz = ui[am]; 1729 b->free_a = PETSC_TRUE; 1730 b->free_ij = PETSC_TRUE; 1731 1732 (fact)->info.factor_mallocs = reallocs; 1733 (fact)->info.fill_ratio_given = fill; 1734 if (ai[am] != 0) { 1735 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1736 } else { 1737 (fact)->info.fill_ratio_needed = 0.0; 1738 } 1739 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1740 PetscFunctionReturn(0); 1741 } 1742 1743 #undef __FUNCT__ 1744 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1745 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1746 { 1747 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1748 Mat_SeqSBAIJ *b; 1749 PetscErrorCode ierr; 1750 PetscTruth perm_identity; 1751 PetscReal fill = info->fill; 1752 const PetscInt *rip,*riip; 1753 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1754 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1755 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1756 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1757 PetscBT lnkbt; 1758 IS iperm; 1759 1760 PetscFunctionBegin; 1761 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1762 /* check whether perm is the identity mapping */ 1763 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1764 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1765 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1766 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1767 1768 /* initialization */ 1769 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1770 ui[0] = 0; 1771 1772 /* jl: linked list for storing indices of the pivot rows 1773 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1774 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1775 il = jl + am; 1776 cols = il + am; 1777 ui_ptr = (PetscInt**)(cols + am); 1778 for (i=0; i<am; i++){ 1779 jl[i] = am; il[i] = 0; 1780 } 1781 1782 /* create and initialize a linked list for storing column indices of the active row k */ 1783 nlnk = am + 1; 1784 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1785 1786 /* initial FreeSpace size is fill*(ai[am]+1) */ 1787 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1788 current_space = free_space; 1789 1790 for (k=0; k<am; k++){ /* for each active row k */ 1791 /* initialize lnk by the column indices of row rip[k] of A */ 1792 nzk = 0; 1793 ncols = ai[rip[k]+1] - ai[rip[k]]; 1794 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1795 ncols_upper = 0; 1796 for (j=0; j<ncols; j++){ 1797 i = riip[*(aj + ai[rip[k]] + j)]; 1798 if (i >= k){ /* only take upper triangular entry */ 1799 cols[ncols_upper] = i; 1800 ncols_upper++; 1801 } 1802 } 1803 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1804 nzk += nlnk; 1805 1806 /* update lnk by computing fill-in for each pivot row to be merged in */ 1807 prow = jl[k]; /* 1st pivot row */ 1808 1809 while (prow < k){ 1810 nextprow = jl[prow]; 1811 /* merge prow into k-th row */ 1812 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1813 jmax = ui[prow+1]; 1814 ncols = jmax-jmin; 1815 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1816 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1817 nzk += nlnk; 1818 1819 /* update il and jl for prow */ 1820 if (jmin < jmax){ 1821 il[prow] = jmin; 1822 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1823 } 1824 prow = nextprow; 1825 } 1826 1827 /* if free space is not available, make more free space */ 1828 if (current_space->local_remaining<nzk) { 1829 i = am - k + 1; /* num of unfactored rows */ 1830 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1831 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1832 reallocs++; 1833 } 1834 1835 /* copy data into free space, then initialize lnk */ 1836 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1837 1838 /* add the k-th row into il and jl */ 1839 if (nzk-1 > 0){ 1840 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1841 jl[k] = jl[i]; jl[i] = k; 1842 il[k] = ui[k] + 1; 1843 } 1844 ui_ptr[k] = current_space->array; 1845 current_space->array += nzk; 1846 current_space->local_used += nzk; 1847 current_space->local_remaining -= nzk; 1848 1849 ui[k+1] = ui[k] + nzk; 1850 } 1851 1852 #if defined(PETSC_USE_INFO) 1853 if (ai[am] != 0) { 1854 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1855 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1856 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1857 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1858 } else { 1859 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1860 } 1861 #endif 1862 1863 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1864 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1865 ierr = PetscFree(jl);CHKERRQ(ierr); 1866 1867 /* destroy list of free space and other temporary array(s) */ 1868 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1869 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1870 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1871 1872 /* put together the new matrix in MATSEQSBAIJ format */ 1873 1874 b = (Mat_SeqSBAIJ*)(fact)->data; 1875 b->singlemalloc = PETSC_FALSE; 1876 b->free_a = PETSC_TRUE; 1877 b->free_ij = PETSC_TRUE; 1878 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1879 b->j = uj; 1880 b->i = ui; 1881 b->diag = 0; 1882 b->ilen = 0; 1883 b->imax = 0; 1884 b->row = perm; 1885 b->col = perm; 1886 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1887 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1888 b->icol = iperm; 1889 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1890 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1891 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1892 b->maxnz = b->nz = ui[am]; 1893 1894 (fact)->info.factor_mallocs = reallocs; 1895 (fact)->info.fill_ratio_given = fill; 1896 if (ai[am] != 0) { 1897 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1898 } else { 1899 (fact)->info.fill_ratio_needed = 0.0; 1900 } 1901 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1902 PetscFunctionReturn(0); 1903 } 1904