1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/seq/aij.h" 4 #include "../src/inline/dot.h" 5 #include "../src/inline/spops.h" 6 #include "petscbt.h" 7 #include "../src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 const PetscInt *c,*r,*ic; 64 PetscInt i,n = A->rmap->n,*cc,*cr; 65 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 PetscInt *ordcol,*iwk,*iperm,*jw; 67 PetscInt jmax,lfill,job,*o_i,*o_j; 68 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 69 PetscReal af,dt,dtcount,dtcol,fill; 70 71 PetscFunctionBegin; 72 73 if (info->dt == PETSC_DEFAULT) dt = .005; else dt = info->dt; 74 if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); else dtcount = info->dtcount; 75 if (info->dtcol == PETSC_DEFAULT) dtcol = .01; else dtcol = info->dtcol; 76 if (info->fill == PETSC_DEFAULT) fill = ((double)(n*(info->dtcount+1)))/a->nz; else fill = info->fill; 77 lfill = (PetscInt)(dtcount/2.0); 78 jmax = (PetscInt)(fill*a->nz); 79 80 81 /* ------------------------------------------------------------ 82 If reorder=.TRUE., then the original matrix has to be 83 reordered to reflect the user selected ordering scheme, and 84 then de-reordered so it is in it's original format. 85 Because Saad's dperm() is NOT in place, we have to copy 86 the original matrix and allocate more storage. . . 87 ------------------------------------------------------------ 88 */ 89 90 /* set reorder to true if either isrow or iscol is not identity */ 91 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 92 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 93 reorder = PetscNot(reorder); 94 95 96 /* storage for ilu factor */ 97 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 100 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 101 102 /* ------------------------------------------------------------ 103 Make sure that everything is Fortran formatted (1-Based) 104 ------------------------------------------------------------ 105 */ 106 for (i=old_i[0];i<old_i[n];i++) { 107 old_j[i]++; 108 } 109 for(i=0;i<n+1;i++) { 110 old_i[i]++; 111 }; 112 113 114 if (reorder) { 115 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 116 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 117 ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 cr[i] = r[i]+1; 120 cc[i] = c[i]+1; 121 } 122 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 123 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 124 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job); 128 ierr = PetscFree2(cc,cr);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 222 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = MAT_FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 b->free_a = PETSC_TRUE; 230 b->free_ij = PETSC_TRUE; 231 b->singlemalloc = PETSC_FALSE; 232 b->a = new_a; 233 b->j = new_j; 234 b->i = new_i; 235 b->ilen = 0; 236 b->imax = 0; 237 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238 b->row = isirow; 239 b->col = iscolf; 240 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 241 b->maxnz = b->nz = new_i[n]; 242 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 243 (*fact)->info.factor_mallocs = 0; 244 245 af = ((double)b->nz)/((double)a->nz) + .001; 246 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr); 247 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 249 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 250 251 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 252 253 PetscFunctionReturn(0); 254 #endif 255 } 256 257 EXTERN_C_BEGIN 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 260 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 261 { 262 PetscFunctionBegin; 263 *flg = PETSC_TRUE; 264 PetscFunctionReturn(0); 265 } 266 EXTERN_C_END 267 268 EXTERN_C_BEGIN 269 #undef __FUNCT__ 270 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 271 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 272 { 273 PetscInt n = A->rmap->n; 274 PetscErrorCode ierr; 275 276 PetscFunctionBegin; 277 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 278 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 279 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 280 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 281 if (ftype == MAT_FACTOR_ILU) { 282 (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ; 283 } else { 284 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 285 } 286 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 287 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 288 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 289 if (ftype == MAT_FACTOR_ICC) { 290 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 291 } else { 292 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 293 } 294 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 295 (*B)->factor = ftype; 296 PetscFunctionReturn(0); 297 } 298 EXTERN_C_END 299 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 302 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 303 { 304 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 305 IS isicol; 306 PetscErrorCode ierr; 307 const PetscInt *r,*ic; 308 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 309 PetscInt *bi,*bj,*ajtmp; 310 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 311 PetscReal f; 312 PetscInt nlnk,*lnk,k,**bi_ptr; 313 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 314 PetscBT lnkbt; 315 316 PetscFunctionBegin; 317 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 318 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 319 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 320 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 321 322 /* get new row pointers */ 323 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 324 bi[0] = 0; 325 326 /* bdiag is location of diagonal in factor */ 327 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 328 bdiag[0] = 0; 329 330 /* linked list for storing column indices of the active row */ 331 nlnk = n + 1; 332 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 333 334 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 335 336 /* initial FreeSpace size is f*(ai[n]+1) */ 337 f = info->fill; 338 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 339 current_space = free_space; 340 341 for (i=0; i<n; i++) { 342 /* copy previous fill into linked list */ 343 nzi = 0; 344 nnz = ai[r[i]+1] - ai[r[i]]; 345 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 346 ajtmp = aj + ai[r[i]]; 347 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 348 nzi += nlnk; 349 350 /* add pivot rows into linked list */ 351 row = lnk[n]; 352 while (row < i) { 353 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 354 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 355 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 356 nzi += nlnk; 357 row = lnk[row]; 358 } 359 bi[i+1] = bi[i] + nzi; 360 im[i] = nzi; 361 362 /* mark bdiag */ 363 nzbd = 0; 364 nnz = nzi; 365 k = lnk[n]; 366 while (nnz-- && k < i){ 367 nzbd++; 368 k = lnk[k]; 369 } 370 bdiag[i] = bi[i] + nzbd; 371 372 /* if free space is not available, make more free space */ 373 if (current_space->local_remaining<nzi) { 374 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 375 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 376 reallocs++; 377 } 378 379 /* copy data into free space, then initialize lnk */ 380 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 381 bi_ptr[i] = current_space->array; 382 current_space->array += nzi; 383 current_space->local_used += nzi; 384 current_space->local_remaining -= nzi; 385 } 386 #if defined(PETSC_USE_INFO) 387 if (ai[n] != 0) { 388 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 389 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 390 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 391 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 392 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 393 } else { 394 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 395 } 396 #endif 397 398 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 399 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 400 401 /* destroy list of free space and other temporary array(s) */ 402 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 403 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 404 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 405 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 406 407 /* put together the new matrix */ 408 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 409 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 410 b = (Mat_SeqAIJ*)(B)->data; 411 b->free_a = PETSC_TRUE; 412 b->free_ij = PETSC_TRUE; 413 b->singlemalloc = PETSC_FALSE; 414 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 415 b->j = bj; 416 b->i = bi; 417 b->diag = bdiag; 418 b->ilen = 0; 419 b->imax = 0; 420 b->row = isrow; 421 b->col = iscol; 422 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 423 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 424 b->icol = isicol; 425 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 426 427 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 428 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 429 b->maxnz = b->nz = bi[n] ; 430 431 (B)->factor = MAT_FACTOR_LU; 432 (B)->info.factor_mallocs = reallocs; 433 (B)->info.fill_ratio_given = f; 434 435 if (ai[n] != 0) { 436 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 437 } else { 438 (B)->info.fill_ratio_needed = 0.0; 439 } 440 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 441 (B)->ops->solve = MatSolve_SeqAIJ; 442 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 443 /* switch to inodes if appropriate */ 444 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 445 PetscFunctionReturn(0); 446 } 447 448 /* 449 Trouble in factorization, should we dump the original matrix? 450 */ 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatFactorDumpMatrix" 453 PetscErrorCode MatFactorDumpMatrix(Mat A) 454 { 455 PetscErrorCode ierr; 456 PetscTruth flg; 457 458 PetscFunctionBegin; 459 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 460 if (flg) { 461 PetscViewer viewer; 462 char filename[PETSC_MAX_PATH_LEN]; 463 464 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 465 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 466 ierr = MatView(A,viewer);CHKERRQ(ierr); 467 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 468 } 469 PetscFunctionReturn(0); 470 } 471 472 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 473 474 /* ----------------------------------------------------------- */ 475 #undef __FUNCT__ 476 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 477 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 478 { 479 Mat C=B; 480 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 481 IS isrow = b->row,isicol = b->icol; 482 PetscErrorCode ierr; 483 const PetscInt *r,*ic,*ics; 484 PetscInt i,j,n=A->rmap->n,*bi=b->i,*bj=b->j; 485 PetscInt *ajtmp,*bjtmp,nz,row; 486 PetscInt *diag_offset = b->diag,diag,*pj; 487 PetscScalar *rtmp,*pc,multiplier,*rtmps; 488 MatScalar *v,*pv; 489 PetscScalar d; 490 PetscReal rs; 491 LUShift_Ctx sctx; 492 PetscInt newshift,*ddiag; 493 494 PetscFunctionBegin; 495 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 496 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 497 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 498 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 499 rtmps = rtmp; ics = ic; 500 501 sctx.shift_top = 0; 502 sctx.nshift_max = 0; 503 sctx.shift_lo = 0; 504 sctx.shift_hi = 0; 505 sctx.shift_fraction = 0; 506 507 /* if both shift schemes are chosen by user, only use info->shiftpd */ 508 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 509 PetscInt *aai = a->i; 510 ddiag = a->diag; 511 sctx.shift_top = 0; 512 for (i=0; i<n; i++) { 513 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 514 d = (a->a)[ddiag[i]]; 515 rs = -PetscAbsScalar(d) - PetscRealPart(d); 516 v = a->a+aai[i]; 517 nz = aai[i+1] - aai[i]; 518 for (j=0; j<nz; j++) 519 rs += PetscAbsScalar(v[j]); 520 if (rs>sctx.shift_top) sctx.shift_top = rs; 521 } 522 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 523 sctx.shift_top *= 1.1; 524 sctx.nshift_max = 5; 525 sctx.shift_lo = 0.; 526 sctx.shift_hi = 1.; 527 } 528 529 sctx.shift_amount = 0; 530 sctx.nshift = 0; 531 do { 532 sctx.lushift = PETSC_FALSE; 533 for (i=0; i<n; i++){ 534 nz = bi[i+1] - bi[i]; 535 bjtmp = bj + bi[i]; 536 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 537 538 /* load in initial (unfactored row) */ 539 nz = a->i[r[i]+1] - a->i[r[i]]; 540 ajtmp = a->j + a->i[r[i]]; 541 v = a->a + a->i[r[i]]; 542 for (j=0; j<nz; j++) { 543 rtmp[ics[ajtmp[j]]] = v[j]; 544 } 545 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 546 547 row = *bjtmp++; 548 while (row < i) { 549 pc = rtmp + row; 550 if (*pc != 0.0) { 551 pv = b->a + diag_offset[row]; 552 pj = b->j + diag_offset[row] + 1; 553 multiplier = *pc / *pv++; 554 *pc = multiplier; 555 nz = bi[row+1] - diag_offset[row] - 1; 556 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 557 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 558 } 559 row = *bjtmp++; 560 } 561 /* finished row so stick it into b->a */ 562 pv = b->a + bi[i] ; 563 pj = b->j + bi[i] ; 564 nz = bi[i+1] - bi[i]; 565 diag = diag_offset[i] - bi[i]; 566 rs = 0.0; 567 for (j=0; j<nz; j++) { 568 pv[j] = rtmps[pj[j]]; 569 if (j != diag) rs += PetscAbsScalar(pv[j]); 570 } 571 572 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 573 sctx.rs = rs; 574 sctx.pv = pv[diag]; 575 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 576 if (newshift == 1) break; 577 } 578 579 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 580 /* 581 * if no shift in this attempt & shifting & started shifting & can refine, 582 * then try lower shift 583 */ 584 sctx.shift_hi = sctx.shift_fraction; 585 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 586 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 587 sctx.lushift = PETSC_TRUE; 588 sctx.nshift++; 589 } 590 } while (sctx.lushift); 591 592 /* invert diagonal entries for simplier triangular solves */ 593 for (i=0; i<n; i++) { 594 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 595 } 596 ierr = PetscFree(rtmp);CHKERRQ(ierr); 597 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 598 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 599 if (b->inode.use) { 600 C->ops->solve = MatSolve_Inode; 601 } else { 602 PetscTruth row_identity, col_identity; 603 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 604 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 605 if (row_identity && col_identity) { 606 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 607 } else { 608 C->ops->solve = MatSolve_SeqAIJ; 609 } 610 } 611 C->ops->solveadd = MatSolveAdd_SeqAIJ; 612 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 613 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 614 C->ops->matsolve = MatMatSolve_SeqAIJ; 615 C->assembled = PETSC_TRUE; 616 C->preallocated = PETSC_TRUE; 617 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 618 if (sctx.nshift){ 619 if (info->shiftpd) { 620 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 621 } else if (info->shiftnz) { 622 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 /* 629 This routine implements inplace ILU(0) with row or/and column permutations. 630 Input: 631 A - original matrix 632 Output; 633 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 634 a->j (col index) is permuted by the inverse of colperm, then sorted 635 a->a reordered accordingly with a->j 636 a->diag (ptr to diagonal elements) is updated. 637 */ 638 #undef __FUNCT__ 639 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 640 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 641 { 642 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 643 IS isrow = a->row,isicol = a->icol; 644 PetscErrorCode ierr; 645 const PetscInt *r,*ic,*ics; 646 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 647 PetscInt *ajtmp,nz,row; 648 PetscInt *diag = a->diag,nbdiag,*pj; 649 PetscScalar *rtmp,*pc,multiplier,d; 650 MatScalar *v,*pv; 651 PetscReal rs; 652 LUShift_Ctx sctx; 653 PetscInt newshift; 654 655 PetscFunctionBegin; 656 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 657 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 658 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 659 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 660 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 661 ics = ic; 662 663 sctx.shift_top = 0; 664 sctx.nshift_max = 0; 665 sctx.shift_lo = 0; 666 sctx.shift_hi = 0; 667 sctx.shift_fraction = 0; 668 669 /* if both shift schemes are chosen by user, only use info->shiftpd */ 670 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 671 sctx.shift_top = 0; 672 for (i=0; i<n; i++) { 673 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 674 d = (a->a)[diag[i]]; 675 rs = -PetscAbsScalar(d) - PetscRealPart(d); 676 v = a->a+ai[i]; 677 nz = ai[i+1] - ai[i]; 678 for (j=0; j<nz; j++) 679 rs += PetscAbsScalar(v[j]); 680 if (rs>sctx.shift_top) sctx.shift_top = rs; 681 } 682 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 683 sctx.shift_top *= 1.1; 684 sctx.nshift_max = 5; 685 sctx.shift_lo = 0.; 686 sctx.shift_hi = 1.; 687 } 688 689 sctx.shift_amount = 0; 690 sctx.nshift = 0; 691 do { 692 sctx.lushift = PETSC_FALSE; 693 for (i=0; i<n; i++){ 694 /* load in initial unfactored row */ 695 nz = ai[r[i]+1] - ai[r[i]]; 696 ajtmp = aj + ai[r[i]]; 697 v = a->a + ai[r[i]]; 698 /* sort permuted ajtmp and values v accordingly */ 699 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 700 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 701 702 diag[r[i]] = ai[r[i]]; 703 for (j=0; j<nz; j++) { 704 rtmp[ajtmp[j]] = v[j]; 705 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 706 } 707 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 708 709 row = *ajtmp++; 710 while (row < i) { 711 pc = rtmp + row; 712 if (*pc != 0.0) { 713 pv = a->a + diag[r[row]]; 714 pj = aj + diag[r[row]] + 1; 715 716 multiplier = *pc / *pv++; 717 *pc = multiplier; 718 nz = ai[r[row]+1] - diag[r[row]] - 1; 719 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 720 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 721 } 722 row = *ajtmp++; 723 } 724 /* finished row so overwrite it onto a->a */ 725 pv = a->a + ai[r[i]] ; 726 pj = aj + ai[r[i]] ; 727 nz = ai[r[i]+1] - ai[r[i]]; 728 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 729 730 rs = 0.0; 731 for (j=0; j<nz; j++) { 732 pv[j] = rtmp[pj[j]]; 733 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 734 } 735 736 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 737 sctx.rs = rs; 738 sctx.pv = pv[nbdiag]; 739 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 740 if (newshift == 1) break; 741 } 742 743 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 744 /* 745 * if no shift in this attempt & shifting & started shifting & can refine, 746 * then try lower shift 747 */ 748 sctx.shift_hi = sctx.shift_fraction; 749 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 750 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 751 sctx.lushift = PETSC_TRUE; 752 sctx.nshift++; 753 } 754 } while (sctx.lushift); 755 756 /* invert diagonal entries for simplier triangular solves */ 757 for (i=0; i<n; i++) { 758 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 759 } 760 761 ierr = PetscFree(rtmp);CHKERRQ(ierr); 762 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 763 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 764 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 765 A->ops->solveadd = MatSolveAdd_SeqAIJ; 766 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 767 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 768 A->assembled = PETSC_TRUE; 769 A->preallocated = PETSC_TRUE; 770 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 771 if (sctx.nshift){ 772 if (info->shiftpd) { 773 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 774 } else if (info->shiftnz) { 775 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 776 } 777 } 778 PetscFunctionReturn(0); 779 } 780 781 /* ----------------------------------------------------------- */ 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatLUFactor_SeqAIJ" 784 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 785 { 786 PetscErrorCode ierr; 787 Mat C; 788 789 PetscFunctionBegin; 790 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 791 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 792 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 793 A->ops->solve = C->ops->solve; 794 A->ops->solvetranspose = C->ops->solvetranspose; 795 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 796 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 797 PetscFunctionReturn(0); 798 } 799 /* ----------------------------------------------------------- */ 800 #undef __FUNCT__ 801 #define __FUNCT__ "MatSolve_SeqAIJ" 802 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 803 { 804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 805 IS iscol = a->col,isrow = a->row; 806 PetscErrorCode ierr; 807 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 808 PetscInt nz; 809 const PetscInt *rout,*cout,*r,*c; 810 PetscScalar *x,*tmp,*tmps,sum; 811 const PetscScalar *b; 812 const MatScalar *aa = a->a,*v; 813 814 PetscFunctionBegin; 815 if (!n) PetscFunctionReturn(0); 816 817 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 818 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 819 tmp = a->solve_work; 820 821 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 822 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 823 824 /* forward solve the lower triangular */ 825 tmp[0] = b[*r++]; 826 tmps = tmp; 827 for (i=1; i<n; i++) { 828 v = aa + ai[i] ; 829 vi = aj + ai[i] ; 830 nz = a->diag[i] - ai[i]; 831 sum = b[*r++]; 832 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 833 tmp[i] = sum; 834 } 835 836 /* backward solve the upper triangular */ 837 for (i=n-1; i>=0; i--){ 838 v = aa + a->diag[i] + 1; 839 vi = aj + a->diag[i] + 1; 840 nz = ai[i+1] - a->diag[i] - 1; 841 sum = tmp[i]; 842 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 843 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 844 } 845 846 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 847 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 848 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 849 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 850 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "MatMatSolve_SeqAIJ" 856 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 857 { 858 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 859 IS iscol = a->col,isrow = a->row; 860 PetscErrorCode ierr; 861 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 862 PetscInt nz,neq; 863 const PetscInt *rout,*cout,*r,*c; 864 PetscScalar *x,*b,*tmp,*tmps,sum; 865 const MatScalar *aa = a->a,*v; 866 PetscTruth bisdense,xisdense; 867 868 PetscFunctionBegin; 869 if (!n) PetscFunctionReturn(0); 870 871 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 872 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 873 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 874 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 875 876 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 877 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 878 879 tmp = a->solve_work; 880 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 881 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 882 883 for (neq=0; neq<B->cmap->n; neq++){ 884 /* forward solve the lower triangular */ 885 tmp[0] = b[r[0]]; 886 tmps = tmp; 887 for (i=1; i<n; i++) { 888 v = aa + ai[i] ; 889 vi = aj + ai[i] ; 890 nz = a->diag[i] - ai[i]; 891 sum = b[r[i]]; 892 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 893 tmp[i] = sum; 894 } 895 /* backward solve the upper triangular */ 896 for (i=n-1; i>=0; i--){ 897 v = aa + a->diag[i] + 1; 898 vi = aj + a->diag[i] + 1; 899 nz = ai[i+1] - a->diag[i] - 1; 900 sum = tmp[i]; 901 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 902 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 903 } 904 905 b += n; 906 x += n; 907 } 908 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 909 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 910 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 911 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 912 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 913 PetscFunctionReturn(0); 914 } 915 916 #undef __FUNCT__ 917 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 918 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 919 { 920 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 921 IS iscol = a->col,isrow = a->row; 922 PetscErrorCode ierr; 923 const PetscInt *r,*c,*rout,*cout; 924 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 925 PetscInt nz,row; 926 PetscScalar *x,*b,*tmp,*tmps,sum; 927 const MatScalar *aa = a->a,*v; 928 929 PetscFunctionBegin; 930 if (!n) PetscFunctionReturn(0); 931 932 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 933 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 934 tmp = a->solve_work; 935 936 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 937 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 938 939 /* forward solve the lower triangular */ 940 tmp[0] = b[*r++]; 941 tmps = tmp; 942 for (row=1; row<n; row++) { 943 i = rout[row]; /* permuted row */ 944 v = aa + ai[i] ; 945 vi = aj + ai[i] ; 946 nz = a->diag[i] - ai[i]; 947 sum = b[*r++]; 948 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 949 tmp[row] = sum; 950 } 951 952 /* backward solve the upper triangular */ 953 for (row=n-1; row>=0; row--){ 954 i = rout[row]; /* permuted row */ 955 v = aa + a->diag[i] + 1; 956 vi = aj + a->diag[i] + 1; 957 nz = ai[i+1] - a->diag[i] - 1; 958 sum = tmp[row]; 959 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 960 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 961 } 962 963 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 964 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 965 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 966 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 967 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 968 PetscFunctionReturn(0); 969 } 970 971 /* ----------------------------------------------------------- */ 972 #undef __FUNCT__ 973 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 974 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 975 { 976 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 977 PetscErrorCode ierr; 978 PetscInt n = A->rmap->n; 979 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 980 PetscScalar *x; 981 const PetscScalar *b; 982 const MatScalar *aa = a->a; 983 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 984 PetscInt adiag_i,i,nz,ai_i; 985 const MatScalar *v; 986 PetscScalar sum; 987 #endif 988 989 PetscFunctionBegin; 990 if (!n) PetscFunctionReturn(0); 991 992 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 993 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 994 995 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 996 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 997 #else 998 /* forward solve the lower triangular */ 999 x[0] = b[0]; 1000 for (i=1; i<n; i++) { 1001 ai_i = ai[i]; 1002 v = aa + ai_i; 1003 vi = aj + ai_i; 1004 nz = adiag[i] - ai_i; 1005 sum = b[i]; 1006 while (nz--) sum -= *v++ * x[*vi++]; 1007 x[i] = sum; 1008 } 1009 1010 /* backward solve the upper triangular */ 1011 for (i=n-1; i>=0; i--){ 1012 adiag_i = adiag[i]; 1013 v = aa + adiag_i + 1; 1014 vi = aj + adiag_i + 1; 1015 nz = ai[i+1] - adiag_i - 1; 1016 sum = x[i]; 1017 while (nz--) sum -= *v++ * x[*vi++]; 1018 x[i] = sum*aa[adiag_i]; 1019 } 1020 #endif 1021 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 1022 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1023 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1029 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1030 { 1031 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1032 IS iscol = a->col,isrow = a->row; 1033 PetscErrorCode ierr; 1034 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1035 PetscInt nz; 1036 const PetscInt *rout,*cout,*r,*c; 1037 PetscScalar *x,*b,*tmp,sum; 1038 const MatScalar *aa = a->a,*v; 1039 1040 PetscFunctionBegin; 1041 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1042 1043 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1044 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1045 tmp = a->solve_work; 1046 1047 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1048 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1049 1050 /* forward solve the lower triangular */ 1051 tmp[0] = b[*r++]; 1052 for (i=1; i<n; i++) { 1053 v = aa + ai[i] ; 1054 vi = aj + ai[i] ; 1055 nz = a->diag[i] - ai[i]; 1056 sum = b[*r++]; 1057 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1058 tmp[i] = sum; 1059 } 1060 1061 /* backward solve the upper triangular */ 1062 for (i=n-1; i>=0; i--){ 1063 v = aa + a->diag[i] + 1; 1064 vi = aj + a->diag[i] + 1; 1065 nz = ai[i+1] - a->diag[i] - 1; 1066 sum = tmp[i]; 1067 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1068 tmp[i] = sum*aa[a->diag[i]]; 1069 x[*c--] += tmp[i]; 1070 } 1071 1072 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1073 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1074 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1075 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1076 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1077 1078 PetscFunctionReturn(0); 1079 } 1080 /* -------------------------------------------------------------------*/ 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1083 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1084 { 1085 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1086 IS iscol = a->col,isrow = a->row; 1087 PetscErrorCode ierr; 1088 const PetscInt *rout,*cout,*r,*c; 1089 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1090 PetscInt nz,*diag = a->diag; 1091 PetscScalar *x,*b,*tmp,s1; 1092 const MatScalar *aa = a->a,*v; 1093 1094 PetscFunctionBegin; 1095 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1096 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1097 tmp = a->solve_work; 1098 1099 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1100 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1101 1102 /* copy the b into temp work space according to permutation */ 1103 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1104 1105 /* forward solve the U^T */ 1106 for (i=0; i<n; i++) { 1107 v = aa + diag[i] ; 1108 vi = aj + diag[i] + 1; 1109 nz = ai[i+1] - diag[i] - 1; 1110 s1 = tmp[i]; 1111 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1112 while (nz--) { 1113 tmp[*vi++ ] -= (*v++)*s1; 1114 } 1115 tmp[i] = s1; 1116 } 1117 1118 /* backward solve the L^T */ 1119 for (i=n-1; i>=0; i--){ 1120 v = aa + diag[i] - 1 ; 1121 vi = aj + diag[i] - 1 ; 1122 nz = diag[i] - ai[i]; 1123 s1 = tmp[i]; 1124 while (nz--) { 1125 tmp[*vi-- ] -= (*v--)*s1; 1126 } 1127 } 1128 1129 /* copy tmp into x according to permutation */ 1130 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1131 1132 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1133 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1134 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1135 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1136 1137 ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 #undef __FUNCT__ 1142 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1143 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1144 { 1145 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1146 IS iscol = a->col,isrow = a->row; 1147 PetscErrorCode ierr; 1148 const PetscInt *r,*c,*rout,*cout; 1149 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1150 PetscInt nz,*diag = a->diag; 1151 PetscScalar *x,*b,*tmp; 1152 const MatScalar *aa = a->a,*v; 1153 1154 PetscFunctionBegin; 1155 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1156 1157 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1158 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1159 tmp = a->solve_work; 1160 1161 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1162 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1163 1164 /* copy the b into temp work space according to permutation */ 1165 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1166 1167 /* forward solve the U^T */ 1168 for (i=0; i<n; i++) { 1169 v = aa + diag[i] ; 1170 vi = aj + diag[i] + 1; 1171 nz = ai[i+1] - diag[i] - 1; 1172 tmp[i] *= *v++; 1173 while (nz--) { 1174 tmp[*vi++ ] -= (*v++)*tmp[i]; 1175 } 1176 } 1177 1178 /* backward solve the L^T */ 1179 for (i=n-1; i>=0; i--){ 1180 v = aa + diag[i] - 1 ; 1181 vi = aj + diag[i] - 1 ; 1182 nz = diag[i] - ai[i]; 1183 while (nz--) { 1184 tmp[*vi-- ] -= (*v--)*tmp[i]; 1185 } 1186 } 1187 1188 /* copy tmp into x according to permutation */ 1189 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1190 1191 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1192 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1193 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1194 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1195 1196 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 /* ----------------------------------------------------------------*/ 1200 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1201 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption); 1202 1203 #undef __FUNCT__ 1204 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1205 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1206 { 1207 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1208 IS isicol; 1209 PetscErrorCode ierr; 1210 const PetscInt *r,*ic; 1211 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1212 PetscInt *bi,*cols,nnz,*cols_lvl; 1213 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1214 PetscInt i,levels,diagonal_fill; 1215 PetscTruth col_identity,row_identity; 1216 PetscReal f; 1217 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1218 PetscBT lnkbt; 1219 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1220 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1221 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1222 PetscTruth missing; 1223 1224 PetscFunctionBegin; 1225 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1226 f = info->fill; 1227 levels = (PetscInt)info->levels; 1228 diagonal_fill = (PetscInt)info->diagonal_fill; 1229 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1230 1231 /* special case that simply copies fill pattern */ 1232 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1233 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1234 if (!levels && row_identity && col_identity) { 1235 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr); 1236 fact->factor = MAT_FACTOR_ILU; 1237 (fact)->info.factor_mallocs = 0; 1238 (fact)->info.fill_ratio_given = info->fill; 1239 (fact)->info.fill_ratio_needed = 1.0; 1240 b = (Mat_SeqAIJ*)(fact)->data; 1241 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1242 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1243 b->row = isrow; 1244 b->col = iscol; 1245 b->icol = isicol; 1246 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1247 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1248 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1249 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1250 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1255 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1256 1257 /* get new row pointers */ 1258 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1259 bi[0] = 0; 1260 /* bdiag is location of diagonal in factor */ 1261 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1262 bdiag[0] = 0; 1263 1264 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1265 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1266 1267 /* create a linked list for storing column indices of the active row */ 1268 nlnk = n + 1; 1269 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1270 1271 /* initial FreeSpace size is f*(ai[n]+1) */ 1272 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1273 current_space = free_space; 1274 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1275 current_space_lvl = free_space_lvl; 1276 1277 for (i=0; i<n; i++) { 1278 nzi = 0; 1279 /* copy current row into linked list */ 1280 nnz = ai[r[i]+1] - ai[r[i]]; 1281 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1282 cols = aj + ai[r[i]]; 1283 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1284 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1285 nzi += nlnk; 1286 1287 /* make sure diagonal entry is included */ 1288 if (diagonal_fill && lnk[i] == -1) { 1289 fm = n; 1290 while (lnk[fm] < i) fm = lnk[fm]; 1291 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1292 lnk[fm] = i; 1293 lnk_lvl[i] = 0; 1294 nzi++; dcount++; 1295 } 1296 1297 /* add pivot rows into the active row */ 1298 nzbd = 0; 1299 prow = lnk[n]; 1300 while (prow < i) { 1301 nnz = bdiag[prow]; 1302 cols = bj_ptr[prow] + nnz + 1; 1303 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1304 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1305 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1306 nzi += nlnk; 1307 prow = lnk[prow]; 1308 nzbd++; 1309 } 1310 bdiag[i] = nzbd; 1311 bi[i+1] = bi[i] + nzi; 1312 1313 /* if free space is not available, make more free space */ 1314 if (current_space->local_remaining<nzi) { 1315 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1316 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1317 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1318 reallocs++; 1319 } 1320 1321 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1322 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1323 bj_ptr[i] = current_space->array; 1324 bjlvl_ptr[i] = current_space_lvl->array; 1325 1326 /* make sure the active row i has diagonal entry */ 1327 if (*(bj_ptr[i]+bdiag[i]) != i) { 1328 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1329 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1330 } 1331 1332 current_space->array += nzi; 1333 current_space->local_used += nzi; 1334 current_space->local_remaining -= nzi; 1335 current_space_lvl->array += nzi; 1336 current_space_lvl->local_used += nzi; 1337 current_space_lvl->local_remaining -= nzi; 1338 } 1339 1340 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1341 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1342 1343 /* destroy list of free space and other temporary arrays */ 1344 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1345 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1346 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1347 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1348 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1349 1350 #if defined(PETSC_USE_INFO) 1351 { 1352 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1353 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1354 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1355 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1356 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1357 if (diagonal_fill) { 1358 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1359 } 1360 } 1361 #endif 1362 1363 /* put together the new matrix */ 1364 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1365 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1366 b = (Mat_SeqAIJ*)(fact)->data; 1367 b->free_a = PETSC_TRUE; 1368 b->free_ij = PETSC_TRUE; 1369 b->singlemalloc = PETSC_FALSE; 1370 len = (bi[n] )*sizeof(PetscScalar); 1371 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1372 b->j = bj; 1373 b->i = bi; 1374 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1375 b->diag = bdiag; 1376 b->ilen = 0; 1377 b->imax = 0; 1378 b->row = isrow; 1379 b->col = iscol; 1380 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1381 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1382 b->icol = isicol; 1383 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1384 /* In b structure: Free imax, ilen, old a, old j. 1385 Allocate bdiag, solve_work, new a, new j */ 1386 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1387 b->maxnz = b->nz = bi[n] ; 1388 (fact)->info.factor_mallocs = reallocs; 1389 (fact)->info.fill_ratio_given = f; 1390 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1391 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1392 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 #include "../src/mat/impls/sbaij/seq/sbaij.h" 1397 #undef __FUNCT__ 1398 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1399 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 1400 { 1401 Mat C = B; 1402 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1403 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1404 IS ip=b->row,iip = b->icol; 1405 PetscErrorCode ierr; 1406 const PetscInt *rip,*riip; 1407 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1408 PetscInt *ai=a->i,*aj=a->j; 1409 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1410 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1411 PetscReal zeropivot,rs,shiftnz; 1412 PetscReal shiftpd; 1413 ChShift_Ctx sctx; 1414 PetscInt newshift; 1415 PetscTruth perm_identity; 1416 1417 PetscFunctionBegin; 1418 1419 shiftnz = info->shiftnz; 1420 shiftpd = info->shiftpd; 1421 zeropivot = info->zeropivot; 1422 1423 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1424 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1425 1426 /* initialization */ 1427 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1428 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1429 jl = il + mbs; 1430 rtmp = (MatScalar*)(jl + mbs); 1431 1432 sctx.shift_amount = 0; 1433 sctx.nshift = 0; 1434 do { 1435 sctx.chshift = PETSC_FALSE; 1436 for (i=0; i<mbs; i++) { 1437 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1438 } 1439 1440 for (k = 0; k<mbs; k++){ 1441 bval = ba + bi[k]; 1442 /* initialize k-th row by the perm[k]-th row of A */ 1443 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1444 for (j = jmin; j < jmax; j++){ 1445 col = riip[aj[j]]; 1446 if (col >= k){ /* only take upper triangular entry */ 1447 rtmp[col] = aa[j]; 1448 *bval++ = 0.0; /* for in-place factorization */ 1449 } 1450 } 1451 /* shift the diagonal of the matrix */ 1452 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1453 1454 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1455 dk = rtmp[k]; 1456 i = jl[k]; /* first row to be added to k_th row */ 1457 1458 while (i < k){ 1459 nexti = jl[i]; /* next row to be added to k_th row */ 1460 1461 /* compute multiplier, update diag(k) and U(i,k) */ 1462 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1463 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1464 dk += uikdi*ba[ili]; 1465 ba[ili] = uikdi; /* -U(i,k) */ 1466 1467 /* add multiple of row i to k-th row */ 1468 jmin = ili + 1; jmax = bi[i+1]; 1469 if (jmin < jmax){ 1470 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1471 /* update il and jl for row i */ 1472 il[i] = jmin; 1473 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1474 } 1475 i = nexti; 1476 } 1477 1478 /* shift the diagonals when zero pivot is detected */ 1479 /* compute rs=sum of abs(off-diagonal) */ 1480 rs = 0.0; 1481 jmin = bi[k]+1; 1482 nz = bi[k+1] - jmin; 1483 bcol = bj + jmin; 1484 while (nz--){ 1485 rs += PetscAbsScalar(rtmp[*bcol]); 1486 bcol++; 1487 } 1488 1489 sctx.rs = rs; 1490 sctx.pv = dk; 1491 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1492 1493 if (newshift == 1) { 1494 if (!sctx.shift_amount) { 1495 sctx.shift_amount = 1e-5; 1496 } 1497 break; 1498 } 1499 1500 /* copy data into U(k,:) */ 1501 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1502 jmin = bi[k]+1; jmax = bi[k+1]; 1503 if (jmin < jmax) { 1504 for (j=jmin; j<jmax; j++){ 1505 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1506 } 1507 /* add the k-th row into il and jl */ 1508 il[k] = jmin; 1509 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1510 } 1511 } 1512 } while (sctx.chshift); 1513 ierr = PetscFree(il);CHKERRQ(ierr); 1514 1515 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1516 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1517 1518 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1519 if (perm_identity){ 1520 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1521 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1522 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1523 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1524 } else { 1525 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1526 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1527 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1528 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1529 } 1530 1531 C->assembled = PETSC_TRUE; 1532 C->preallocated = PETSC_TRUE; 1533 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1534 if (sctx.nshift){ 1535 if (shiftnz) { 1536 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1537 } else if (shiftpd) { 1538 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1539 } 1540 } 1541 PetscFunctionReturn(0); 1542 } 1543 1544 #undef __FUNCT__ 1545 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1546 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1547 { 1548 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1549 Mat_SeqSBAIJ *b; 1550 PetscErrorCode ierr; 1551 PetscTruth perm_identity,missing; 1552 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1553 const PetscInt *rip,*riip; 1554 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1555 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1556 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1557 PetscReal fill=info->fill,levels=info->levels; 1558 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1559 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1560 PetscBT lnkbt; 1561 IS iperm; 1562 1563 PetscFunctionBegin; 1564 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1565 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1566 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1567 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1568 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1569 1570 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1571 ui[0] = 0; 1572 1573 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1574 if (!levels && perm_identity) { 1575 1576 for (i=0; i<am; i++) { 1577 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1578 } 1579 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1580 cols = uj; 1581 for (i=0; i<am; i++) { 1582 aj = a->j + a->diag[i]; 1583 ncols = ui[i+1] - ui[i]; 1584 for (j=0; j<ncols; j++) *cols++ = *aj++; 1585 } 1586 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1587 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1588 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1589 1590 /* initialization */ 1591 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1592 1593 /* jl: linked list for storing indices of the pivot rows 1594 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1595 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1596 il = jl + am; 1597 uj_ptr = (PetscInt**)(il + am); 1598 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1599 for (i=0; i<am; i++){ 1600 jl[i] = am; il[i] = 0; 1601 } 1602 1603 /* create and initialize a linked list for storing column indices of the active row k */ 1604 nlnk = am + 1; 1605 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1606 1607 /* initial FreeSpace size is fill*(ai[am]+1) */ 1608 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1609 current_space = free_space; 1610 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1611 current_space_lvl = free_space_lvl; 1612 1613 for (k=0; k<am; k++){ /* for each active row k */ 1614 /* initialize lnk by the column indices of row rip[k] of A */ 1615 nzk = 0; 1616 ncols = ai[rip[k]+1] - ai[rip[k]]; 1617 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1618 ncols_upper = 0; 1619 for (j=0; j<ncols; j++){ 1620 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1621 if (riip[i] >= k){ /* only take upper triangular entry */ 1622 ajtmp[ncols_upper] = i; 1623 ncols_upper++; 1624 } 1625 } 1626 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1627 nzk += nlnk; 1628 1629 /* update lnk by computing fill-in for each pivot row to be merged in */ 1630 prow = jl[k]; /* 1st pivot row */ 1631 1632 while (prow < k){ 1633 nextprow = jl[prow]; 1634 1635 /* merge prow into k-th row */ 1636 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1637 jmax = ui[prow+1]; 1638 ncols = jmax-jmin; 1639 i = jmin - ui[prow]; 1640 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1641 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1642 j = *(uj - 1); 1643 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1644 nzk += nlnk; 1645 1646 /* update il and jl for prow */ 1647 if (jmin < jmax){ 1648 il[prow] = jmin; 1649 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1650 } 1651 prow = nextprow; 1652 } 1653 1654 /* if free space is not available, make more free space */ 1655 if (current_space->local_remaining<nzk) { 1656 i = am - k + 1; /* num of unfactored rows */ 1657 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1658 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1659 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1660 reallocs++; 1661 } 1662 1663 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1664 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1665 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1666 1667 /* add the k-th row into il and jl */ 1668 if (nzk > 1){ 1669 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1670 jl[k] = jl[i]; jl[i] = k; 1671 il[k] = ui[k] + 1; 1672 } 1673 uj_ptr[k] = current_space->array; 1674 uj_lvl_ptr[k] = current_space_lvl->array; 1675 1676 current_space->array += nzk; 1677 current_space->local_used += nzk; 1678 current_space->local_remaining -= nzk; 1679 1680 current_space_lvl->array += nzk; 1681 current_space_lvl->local_used += nzk; 1682 current_space_lvl->local_remaining -= nzk; 1683 1684 ui[k+1] = ui[k] + nzk; 1685 } 1686 1687 #if defined(PETSC_USE_INFO) 1688 if (ai[am] != 0) { 1689 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1690 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1691 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1692 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1693 } else { 1694 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1695 } 1696 #endif 1697 1698 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1699 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1700 ierr = PetscFree(jl);CHKERRQ(ierr); 1701 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1702 1703 /* destroy list of free space and other temporary array(s) */ 1704 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1705 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1706 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1707 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1708 1709 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1710 1711 /* put together the new matrix in MATSEQSBAIJ format */ 1712 1713 b = (Mat_SeqSBAIJ*)(fact)->data; 1714 b->singlemalloc = PETSC_FALSE; 1715 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1716 b->j = uj; 1717 b->i = ui; 1718 b->diag = 0; 1719 b->ilen = 0; 1720 b->imax = 0; 1721 b->row = perm; 1722 b->col = perm; 1723 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1724 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1725 b->icol = iperm; 1726 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1727 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1728 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1729 b->maxnz = b->nz = ui[am]; 1730 b->free_a = PETSC_TRUE; 1731 b->free_ij = PETSC_TRUE; 1732 1733 (fact)->info.factor_mallocs = reallocs; 1734 (fact)->info.fill_ratio_given = fill; 1735 if (ai[am] != 0) { 1736 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1737 } else { 1738 (fact)->info.fill_ratio_needed = 0.0; 1739 } 1740 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1741 PetscFunctionReturn(0); 1742 } 1743 1744 #undef __FUNCT__ 1745 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1746 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1747 { 1748 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1749 Mat_SeqSBAIJ *b; 1750 PetscErrorCode ierr; 1751 PetscTruth perm_identity; 1752 PetscReal fill = info->fill; 1753 const PetscInt *rip,*riip; 1754 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1755 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1756 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1757 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1758 PetscBT lnkbt; 1759 IS iperm; 1760 1761 PetscFunctionBegin; 1762 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1763 /* check whether perm is the identity mapping */ 1764 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1765 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1766 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1767 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1768 1769 /* initialization */ 1770 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1771 ui[0] = 0; 1772 1773 /* jl: linked list for storing indices of the pivot rows 1774 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1775 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1776 il = jl + am; 1777 cols = il + am; 1778 ui_ptr = (PetscInt**)(cols + am); 1779 for (i=0; i<am; i++){ 1780 jl[i] = am; il[i] = 0; 1781 } 1782 1783 /* create and initialize a linked list for storing column indices of the active row k */ 1784 nlnk = am + 1; 1785 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1786 1787 /* initial FreeSpace size is fill*(ai[am]+1) */ 1788 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1789 current_space = free_space; 1790 1791 for (k=0; k<am; k++){ /* for each active row k */ 1792 /* initialize lnk by the column indices of row rip[k] of A */ 1793 nzk = 0; 1794 ncols = ai[rip[k]+1] - ai[rip[k]]; 1795 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1796 ncols_upper = 0; 1797 for (j=0; j<ncols; j++){ 1798 i = riip[*(aj + ai[rip[k]] + j)]; 1799 if (i >= k){ /* only take upper triangular entry */ 1800 cols[ncols_upper] = i; 1801 ncols_upper++; 1802 } 1803 } 1804 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1805 nzk += nlnk; 1806 1807 /* update lnk by computing fill-in for each pivot row to be merged in */ 1808 prow = jl[k]; /* 1st pivot row */ 1809 1810 while (prow < k){ 1811 nextprow = jl[prow]; 1812 /* merge prow into k-th row */ 1813 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1814 jmax = ui[prow+1]; 1815 ncols = jmax-jmin; 1816 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1817 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1818 nzk += nlnk; 1819 1820 /* update il and jl for prow */ 1821 if (jmin < jmax){ 1822 il[prow] = jmin; 1823 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1824 } 1825 prow = nextprow; 1826 } 1827 1828 /* if free space is not available, make more free space */ 1829 if (current_space->local_remaining<nzk) { 1830 i = am - k + 1; /* num of unfactored rows */ 1831 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1832 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1833 reallocs++; 1834 } 1835 1836 /* copy data into free space, then initialize lnk */ 1837 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1838 1839 /* add the k-th row into il and jl */ 1840 if (nzk-1 > 0){ 1841 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1842 jl[k] = jl[i]; jl[i] = k; 1843 il[k] = ui[k] + 1; 1844 } 1845 ui_ptr[k] = current_space->array; 1846 current_space->array += nzk; 1847 current_space->local_used += nzk; 1848 current_space->local_remaining -= nzk; 1849 1850 ui[k+1] = ui[k] + nzk; 1851 } 1852 1853 #if defined(PETSC_USE_INFO) 1854 if (ai[am] != 0) { 1855 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1856 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1857 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1858 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1859 } else { 1860 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1861 } 1862 #endif 1863 1864 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1865 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1866 ierr = PetscFree(jl);CHKERRQ(ierr); 1867 1868 /* destroy list of free space and other temporary array(s) */ 1869 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1870 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1871 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1872 1873 /* put together the new matrix in MATSEQSBAIJ format */ 1874 1875 b = (Mat_SeqSBAIJ*)(fact)->data; 1876 b->singlemalloc = PETSC_FALSE; 1877 b->free_a = PETSC_TRUE; 1878 b->free_ij = PETSC_TRUE; 1879 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1880 b->j = uj; 1881 b->i = ui; 1882 b->diag = 0; 1883 b->ilen = 0; 1884 b->imax = 0; 1885 b->row = perm; 1886 b->col = perm; 1887 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1888 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1889 b->icol = iperm; 1890 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1891 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1892 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1893 b->maxnz = b->nz = ui[am]; 1894 1895 (fact)->info.factor_mallocs = reallocs; 1896 (fact)->info.fill_ratio_given = fill; 1897 if (ai[am] != 0) { 1898 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1899 } else { 1900 (fact)->info.fill_ratio_needed = 0.0; 1901 } 1902 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1903 PetscFunctionReturn(0); 1904 } 1905