1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap->n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = MAT_FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 EXTERN_C_BEGIN 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 261 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 262 { 263 PetscFunctionBegin; 264 *flg = PETSC_TRUE; 265 PetscFunctionReturn(0); 266 } 267 EXTERN_C_END 268 269 EXTERN_C_BEGIN 270 #undef __FUNCT__ 271 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 272 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 273 { 274 PetscInt n = A->rmap->n; 275 PetscErrorCode ierr; 276 277 PetscFunctionBegin; 278 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 279 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 280 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 281 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 282 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 283 (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ; 284 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 285 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 286 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 287 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 288 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 289 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 290 (*B)->factor = ftype; 291 PetscFunctionReturn(0); 292 } 293 EXTERN_C_END 294 295 #undef __FUNCT__ 296 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 297 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 298 { 299 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 300 IS isicol; 301 PetscErrorCode ierr; 302 PetscInt *r,*ic,i,n=A->rmap->n,*ai=a->i,*aj=a->j; 303 PetscInt *bi,*bj,*ajtmp; 304 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 305 PetscReal f; 306 PetscInt nlnk,*lnk,k,**bi_ptr; 307 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 308 PetscBT lnkbt; 309 310 PetscFunctionBegin; 311 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 312 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 313 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 314 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 315 316 /* get new row pointers */ 317 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 318 bi[0] = 0; 319 320 /* bdiag is location of diagonal in factor */ 321 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 322 bdiag[0] = 0; 323 324 /* linked list for storing column indices of the active row */ 325 nlnk = n + 1; 326 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 327 328 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 329 330 /* initial FreeSpace size is f*(ai[n]+1) */ 331 f = info->fill; 332 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 333 current_space = free_space; 334 335 for (i=0; i<n; i++) { 336 /* copy previous fill into linked list */ 337 nzi = 0; 338 nnz = ai[r[i]+1] - ai[r[i]]; 339 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 340 ajtmp = aj + ai[r[i]]; 341 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 342 nzi += nlnk; 343 344 /* add pivot rows into linked list */ 345 row = lnk[n]; 346 while (row < i) { 347 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 348 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 349 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 350 nzi += nlnk; 351 row = lnk[row]; 352 } 353 bi[i+1] = bi[i] + nzi; 354 im[i] = nzi; 355 356 /* mark bdiag */ 357 nzbd = 0; 358 nnz = nzi; 359 k = lnk[n]; 360 while (nnz-- && k < i){ 361 nzbd++; 362 k = lnk[k]; 363 } 364 bdiag[i] = bi[i] + nzbd; 365 366 /* if free space is not available, make more free space */ 367 if (current_space->local_remaining<nzi) { 368 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 369 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 370 reallocs++; 371 } 372 373 /* copy data into free space, then initialize lnk */ 374 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 375 bi_ptr[i] = current_space->array; 376 current_space->array += nzi; 377 current_space->local_used += nzi; 378 current_space->local_remaining -= nzi; 379 } 380 #if defined(PETSC_USE_INFO) 381 if (ai[n] != 0) { 382 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 383 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 384 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 385 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 386 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 387 } else { 388 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 389 } 390 #endif 391 392 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 393 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 394 395 /* destroy list of free space and other temporary array(s) */ 396 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 397 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 398 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 399 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 400 401 /* put together the new matrix */ 402 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 403 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 404 b = (Mat_SeqAIJ*)(*B)->data; 405 b->free_a = PETSC_TRUE; 406 b->free_ij = PETSC_TRUE; 407 b->singlemalloc = PETSC_FALSE; 408 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 409 b->j = bj; 410 b->i = bi; 411 b->diag = bdiag; 412 b->ilen = 0; 413 b->imax = 0; 414 b->row = isrow; 415 b->col = iscol; 416 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 417 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 418 b->icol = isicol; 419 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 420 421 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 422 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 423 b->maxnz = b->nz = bi[n] ; 424 425 (*B)->factor = MAT_FACTOR_LU; 426 (*B)->info.factor_mallocs = reallocs; 427 (*B)->info.fill_ratio_given = f; 428 429 if (ai[n] != 0) { 430 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 431 } else { 432 (*B)->info.fill_ratio_needed = 0.0; 433 } 434 (*B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 435 (*B)->ops->solve = MatSolve_SeqAIJ; 436 (*B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 437 /* switch to inodes if appropriate */ 438 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 /* 443 Trouble in factorization, should we dump the original matrix? 444 */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatFactorDumpMatrix" 447 PetscErrorCode MatFactorDumpMatrix(Mat A) 448 { 449 PetscErrorCode ierr; 450 PetscTruth flg; 451 452 PetscFunctionBegin; 453 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 454 if (flg) { 455 PetscViewer viewer; 456 char filename[PETSC_MAX_PATH_LEN]; 457 458 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 459 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 460 ierr = MatView(A,viewer);CHKERRQ(ierr); 461 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 462 } 463 PetscFunctionReturn(0); 464 } 465 466 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 467 468 /* ----------------------------------------------------------- */ 469 #undef __FUNCT__ 470 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 471 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 472 { 473 Mat C=*B; 474 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 475 IS isrow = b->row,isicol = b->icol; 476 PetscErrorCode ierr; 477 PetscInt *r,*ic,i,j,n=A->rmap->n,*bi=b->i,*bj=b->j; 478 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 479 PetscInt *diag_offset = b->diag,diag,*pj; 480 PetscScalar *rtmp,*pc,multiplier,*rtmps; 481 MatScalar *v,*pv; 482 PetscScalar d; 483 PetscReal rs; 484 LUShift_Ctx sctx; 485 PetscInt newshift,*ddiag; 486 487 PetscFunctionBegin; 488 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 489 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 490 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 491 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 492 rtmps = rtmp; ics = ic; 493 494 sctx.shift_top = 0; 495 sctx.nshift_max = 0; 496 sctx.shift_lo = 0; 497 sctx.shift_hi = 0; 498 499 /* if both shift schemes are chosen by user, only use info->shiftpd */ 500 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 501 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 502 PetscInt *aai = a->i; 503 ddiag = a->diag; 504 sctx.shift_top = 0; 505 for (i=0; i<n; i++) { 506 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 507 d = (a->a)[ddiag[i]]; 508 rs = -PetscAbsScalar(d) - PetscRealPart(d); 509 v = a->a+aai[i]; 510 nz = aai[i+1] - aai[i]; 511 for (j=0; j<nz; j++) 512 rs += PetscAbsScalar(v[j]); 513 if (rs>sctx.shift_top) sctx.shift_top = rs; 514 } 515 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 516 sctx.shift_top *= 1.1; 517 sctx.nshift_max = 5; 518 sctx.shift_lo = 0.; 519 sctx.shift_hi = 1.; 520 } 521 522 sctx.shift_amount = 0; 523 sctx.nshift = 0; 524 do { 525 sctx.lushift = PETSC_FALSE; 526 for (i=0; i<n; i++){ 527 nz = bi[i+1] - bi[i]; 528 bjtmp = bj + bi[i]; 529 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 530 531 /* load in initial (unfactored row) */ 532 nz = a->i[r[i]+1] - a->i[r[i]]; 533 ajtmp = a->j + a->i[r[i]]; 534 v = a->a + a->i[r[i]]; 535 for (j=0; j<nz; j++) { 536 rtmp[ics[ajtmp[j]]] = v[j]; 537 } 538 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 539 540 row = *bjtmp++; 541 while (row < i) { 542 pc = rtmp + row; 543 if (*pc != 0.0) { 544 pv = b->a + diag_offset[row]; 545 pj = b->j + diag_offset[row] + 1; 546 multiplier = *pc / *pv++; 547 *pc = multiplier; 548 nz = bi[row+1] - diag_offset[row] - 1; 549 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 550 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 551 } 552 row = *bjtmp++; 553 } 554 /* finished row so stick it into b->a */ 555 pv = b->a + bi[i] ; 556 pj = b->j + bi[i] ; 557 nz = bi[i+1] - bi[i]; 558 diag = diag_offset[i] - bi[i]; 559 rs = 0.0; 560 for (j=0; j<nz; j++) { 561 pv[j] = rtmps[pj[j]]; 562 if (j != diag) rs += PetscAbsScalar(pv[j]); 563 } 564 565 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 566 sctx.rs = rs; 567 sctx.pv = pv[diag]; 568 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 569 if (newshift == 1) break; 570 } 571 572 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 573 /* 574 * if no shift in this attempt & shifting & started shifting & can refine, 575 * then try lower shift 576 */ 577 sctx.shift_hi = info->shift_fraction; 578 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 579 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 580 sctx.lushift = PETSC_TRUE; 581 sctx.nshift++; 582 } 583 } while (sctx.lushift); 584 585 /* invert diagonal entries for simplier triangular solves */ 586 for (i=0; i<n; i++) { 587 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 588 } 589 ierr = PetscFree(rtmp);CHKERRQ(ierr); 590 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 591 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 592 if (b->inode.use) { 593 C->ops->solve = MatSolve_Inode; 594 } else { 595 C->ops->solve = MatSolve_SeqAIJ; 596 } 597 C->ops->solveadd = MatSolveAdd_SeqAIJ; 598 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 599 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 600 C->assembled = PETSC_TRUE; 601 C->preallocated = PETSC_TRUE; 602 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 603 if (sctx.nshift){ 604 if (info->shiftnz) { 605 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 606 } else if (info->shiftpd) { 607 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 608 } 609 } 610 PetscFunctionReturn(0); 611 } 612 613 /* 614 This routine implements inplace ILU(0) with row or/and column permutations. 615 Input: 616 A - original matrix 617 Output; 618 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 619 a->j (col index) is permuted by the inverse of colperm, then sorted 620 a->a reordered accordingly with a->j 621 a->diag (ptr to diagonal elements) is updated. 622 */ 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 625 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 626 { 627 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 628 IS isrow = a->row,isicol = a->icol; 629 PetscErrorCode ierr; 630 PetscInt *r,*ic,i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 631 PetscInt *ajtmp,nz,row,*ics; 632 PetscInt *diag = a->diag,nbdiag,*pj; 633 PetscScalar *rtmp,*pc,multiplier,d; 634 MatScalar *v,*pv; 635 PetscReal rs; 636 LUShift_Ctx sctx; 637 PetscInt newshift; 638 639 PetscFunctionBegin; 640 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 641 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 642 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 643 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 644 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 645 ics = ic; 646 647 sctx.shift_top = 0; 648 sctx.nshift_max = 0; 649 sctx.shift_lo = 0; 650 sctx.shift_hi = 0; 651 652 /* if both shift schemes are chosen by user, only use info->shiftpd */ 653 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 654 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 655 sctx.shift_top = 0; 656 for (i=0; i<n; i++) { 657 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 658 d = (a->a)[diag[i]]; 659 rs = -PetscAbsScalar(d) - PetscRealPart(d); 660 v = a->a+ai[i]; 661 nz = ai[i+1] - ai[i]; 662 for (j=0; j<nz; j++) 663 rs += PetscAbsScalar(v[j]); 664 if (rs>sctx.shift_top) sctx.shift_top = rs; 665 } 666 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 667 sctx.shift_top *= 1.1; 668 sctx.nshift_max = 5; 669 sctx.shift_lo = 0.; 670 sctx.shift_hi = 1.; 671 } 672 673 sctx.shift_amount = 0; 674 sctx.nshift = 0; 675 do { 676 sctx.lushift = PETSC_FALSE; 677 for (i=0; i<n; i++){ 678 /* load in initial unfactored row */ 679 nz = ai[r[i]+1] - ai[r[i]]; 680 ajtmp = aj + ai[r[i]]; 681 v = a->a + ai[r[i]]; 682 /* sort permuted ajtmp and values v accordingly */ 683 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 684 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 685 686 diag[r[i]] = ai[r[i]]; 687 for (j=0; j<nz; j++) { 688 rtmp[ajtmp[j]] = v[j]; 689 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 690 } 691 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 692 693 row = *ajtmp++; 694 while (row < i) { 695 pc = rtmp + row; 696 if (*pc != 0.0) { 697 pv = a->a + diag[r[row]]; 698 pj = aj + diag[r[row]] + 1; 699 700 multiplier = *pc / *pv++; 701 *pc = multiplier; 702 nz = ai[r[row]+1] - diag[r[row]] - 1; 703 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 704 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 705 } 706 row = *ajtmp++; 707 } 708 /* finished row so overwrite it onto a->a */ 709 pv = a->a + ai[r[i]] ; 710 pj = aj + ai[r[i]] ; 711 nz = ai[r[i]+1] - ai[r[i]]; 712 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 713 714 rs = 0.0; 715 for (j=0; j<nz; j++) { 716 pv[j] = rtmp[pj[j]]; 717 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 718 } 719 720 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 721 sctx.rs = rs; 722 sctx.pv = pv[nbdiag]; 723 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 724 if (newshift == 1) break; 725 } 726 727 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 728 /* 729 * if no shift in this attempt & shifting & started shifting & can refine, 730 * then try lower shift 731 */ 732 sctx.shift_hi = info->shift_fraction; 733 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 734 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 735 sctx.lushift = PETSC_TRUE; 736 sctx.nshift++; 737 } 738 } while (sctx.lushift); 739 740 /* invert diagonal entries for simplier triangular solves */ 741 for (i=0; i<n; i++) { 742 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 743 } 744 745 ierr = PetscFree(rtmp);CHKERRQ(ierr); 746 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 747 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 748 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 749 A->ops->solveadd = MatSolveAdd_SeqAIJ; 750 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 751 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 752 A->assembled = PETSC_TRUE; 753 A->preallocated = PETSC_TRUE; 754 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 755 if (sctx.nshift){ 756 if (info->shiftnz) { 757 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 758 } else if (info->shiftpd) { 759 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 760 } 761 } 762 PetscFunctionReturn(0); 763 } 764 765 /* ----------------------------------------------------------- */ 766 #undef __FUNCT__ 767 #define __FUNCT__ "MatLUFactor_SeqAIJ" 768 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 769 { 770 PetscErrorCode ierr; 771 Mat C; 772 773 PetscFunctionBegin; 774 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 775 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 776 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 777 A->ops->solve = C->ops->solve; 778 A->ops->solvetranspose = C->ops->solvetranspose; 779 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 780 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 781 PetscFunctionReturn(0); 782 } 783 /* ----------------------------------------------------------- */ 784 #undef __FUNCT__ 785 #define __FUNCT__ "MatSolve_SeqAIJ" 786 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 787 { 788 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 789 IS iscol = a->col,isrow = a->row; 790 PetscErrorCode ierr; 791 PetscInt *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 792 PetscInt nz,*rout,*cout; 793 PetscScalar *x,*tmp,*tmps,sum; 794 const PetscScalar *b; 795 const MatScalar *aa = a->a,*v; 796 797 PetscFunctionBegin; 798 if (!n) PetscFunctionReturn(0); 799 800 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 801 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 802 tmp = a->solve_work; 803 804 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 805 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 806 807 /* forward solve the lower triangular */ 808 tmp[0] = b[*r++]; 809 tmps = tmp; 810 for (i=1; i<n; i++) { 811 v = aa + ai[i] ; 812 vi = aj + ai[i] ; 813 nz = a->diag[i] - ai[i]; 814 sum = b[*r++]; 815 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 816 tmp[i] = sum; 817 } 818 819 /* backward solve the upper triangular */ 820 for (i=n-1; i>=0; i--){ 821 v = aa + a->diag[i] + 1; 822 vi = aj + a->diag[i] + 1; 823 nz = ai[i+1] - a->diag[i] - 1; 824 sum = tmp[i]; 825 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 826 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 827 } 828 829 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 830 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 831 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 832 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 833 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 834 PetscFunctionReturn(0); 835 } 836 837 #undef __FUNCT__ 838 #define __FUNCT__ "MatMatSolve_SeqAIJ" 839 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 840 { 841 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 842 IS iscol = a->col,isrow = a->row; 843 PetscErrorCode ierr; 844 PetscInt *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 845 PetscInt nz,*rout,*cout,neq; 846 PetscScalar *x,*b,*tmp,*tmps,sum; 847 const MatScalar *aa = a->a,*v; 848 PetscTruth bisdense,xisdense; 849 850 PetscFunctionBegin; 851 if (!n) PetscFunctionReturn(0); 852 853 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 854 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 855 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 856 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 857 858 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 859 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 860 861 tmp = a->solve_work; 862 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 863 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 864 865 for (neq=0; neq<B->cmap->n; neq++){ 866 /* forward solve the lower triangular */ 867 tmp[0] = b[r[0]]; 868 tmps = tmp; 869 for (i=1; i<n; i++) { 870 v = aa + ai[i] ; 871 vi = aj + ai[i] ; 872 nz = a->diag[i] - ai[i]; 873 sum = b[r[i]]; 874 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 875 tmp[i] = sum; 876 } 877 /* backward solve the upper triangular */ 878 for (i=n-1; i>=0; i--){ 879 v = aa + a->diag[i] + 1; 880 vi = aj + a->diag[i] + 1; 881 nz = ai[i+1] - a->diag[i] - 1; 882 sum = tmp[i]; 883 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 884 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 885 } 886 887 b += n; 888 x += n; 889 } 890 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 891 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 892 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 893 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 894 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 #undef __FUNCT__ 899 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 900 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 901 { 902 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 903 IS iscol = a->col,isrow = a->row; 904 PetscErrorCode ierr; 905 PetscInt *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 906 PetscInt nz,*rout,*cout,row; 907 PetscScalar *x,*b,*tmp,*tmps,sum; 908 const MatScalar *aa = a->a,*v; 909 910 PetscFunctionBegin; 911 if (!n) PetscFunctionReturn(0); 912 913 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 914 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 915 tmp = a->solve_work; 916 917 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 918 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 919 920 /* forward solve the lower triangular */ 921 tmp[0] = b[*r++]; 922 tmps = tmp; 923 for (row=1; row<n; row++) { 924 i = rout[row]; /* permuted row */ 925 v = aa + ai[i] ; 926 vi = aj + ai[i] ; 927 nz = a->diag[i] - ai[i]; 928 sum = b[*r++]; 929 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 930 tmp[row] = sum; 931 } 932 933 /* backward solve the upper triangular */ 934 for (row=n-1; row>=0; row--){ 935 i = rout[row]; /* permuted row */ 936 v = aa + a->diag[i] + 1; 937 vi = aj + a->diag[i] + 1; 938 nz = ai[i+1] - a->diag[i] - 1; 939 sum = tmp[row]; 940 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 941 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 942 } 943 944 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 945 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 946 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 947 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 948 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 /* ----------------------------------------------------------- */ 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 955 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 956 { 957 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 958 PetscErrorCode ierr; 959 PetscInt n = A->rmap->n; 960 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 961 PetscScalar *x; 962 const PetscScalar *b; 963 const MatScalar *aa = a->a; 964 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 965 PetscInt adiag_i,i,nz,ai_i; 966 const MatScalar *v; 967 PetscScalar sum; 968 #endif 969 970 PetscFunctionBegin; 971 if (!n) PetscFunctionReturn(0); 972 973 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 974 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 975 976 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 977 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 978 #else 979 /* forward solve the lower triangular */ 980 x[0] = b[0]; 981 for (i=1; i<n; i++) { 982 ai_i = ai[i]; 983 v = aa + ai_i; 984 vi = aj + ai_i; 985 nz = adiag[i] - ai_i; 986 sum = b[i]; 987 while (nz--) sum -= *v++ * x[*vi++]; 988 x[i] = sum; 989 } 990 991 /* backward solve the upper triangular */ 992 for (i=n-1; i>=0; i--){ 993 adiag_i = adiag[i]; 994 v = aa + adiag_i + 1; 995 vi = aj + adiag_i + 1; 996 nz = ai[i+1] - adiag_i - 1; 997 sum = x[i]; 998 while (nz--) sum -= *v++ * x[*vi++]; 999 x[i] = sum*aa[adiag_i]; 1000 } 1001 #endif 1002 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 1003 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1004 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 #undef __FUNCT__ 1009 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1010 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1011 { 1012 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1013 IS iscol = a->col,isrow = a->row; 1014 PetscErrorCode ierr; 1015 PetscInt *r,*c,i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1016 PetscInt nz,*rout,*cout; 1017 PetscScalar *x,*b,*tmp,sum; 1018 const MatScalar *aa = a->a,*v; 1019 1020 PetscFunctionBegin; 1021 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1022 1023 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1024 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1025 tmp = a->solve_work; 1026 1027 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1028 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1029 1030 /* forward solve the lower triangular */ 1031 tmp[0] = b[*r++]; 1032 for (i=1; i<n; i++) { 1033 v = aa + ai[i] ; 1034 vi = aj + ai[i] ; 1035 nz = a->diag[i] - ai[i]; 1036 sum = b[*r++]; 1037 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1038 tmp[i] = sum; 1039 } 1040 1041 /* backward solve the upper triangular */ 1042 for (i=n-1; i>=0; i--){ 1043 v = aa + a->diag[i] + 1; 1044 vi = aj + a->diag[i] + 1; 1045 nz = ai[i+1] - a->diag[i] - 1; 1046 sum = tmp[i]; 1047 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1048 tmp[i] = sum*aa[a->diag[i]]; 1049 x[*c--] += tmp[i]; 1050 } 1051 1052 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1053 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1054 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1055 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1056 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1057 1058 PetscFunctionReturn(0); 1059 } 1060 /* -------------------------------------------------------------------*/ 1061 #undef __FUNCT__ 1062 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1063 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1064 { 1065 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1066 IS iscol = a->col,isrow = a->row; 1067 PetscErrorCode ierr; 1068 PetscInt *r,*c,i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1069 PetscInt nz,*rout,*cout,*diag = a->diag; 1070 PetscScalar *x,*b,*tmp,s1; 1071 const MatScalar *aa = a->a,*v; 1072 1073 PetscFunctionBegin; 1074 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1075 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1076 tmp = a->solve_work; 1077 1078 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1079 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1080 1081 /* copy the b into temp work space according to permutation */ 1082 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1083 1084 /* forward solve the U^T */ 1085 for (i=0; i<n; i++) { 1086 v = aa + diag[i] ; 1087 vi = aj + diag[i] + 1; 1088 nz = ai[i+1] - diag[i] - 1; 1089 s1 = tmp[i]; 1090 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1091 while (nz--) { 1092 tmp[*vi++ ] -= (*v++)*s1; 1093 } 1094 tmp[i] = s1; 1095 } 1096 1097 /* backward solve the L^T */ 1098 for (i=n-1; i>=0; i--){ 1099 v = aa + diag[i] - 1 ; 1100 vi = aj + diag[i] - 1 ; 1101 nz = diag[i] - ai[i]; 1102 s1 = tmp[i]; 1103 while (nz--) { 1104 tmp[*vi-- ] -= (*v--)*s1; 1105 } 1106 } 1107 1108 /* copy tmp into x according to permutation */ 1109 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1110 1111 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1112 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1113 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1114 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1115 1116 ierr = PetscLogFlops(2*a->nz-A->cmap->n);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1122 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1123 { 1124 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1125 IS iscol = a->col,isrow = a->row; 1126 PetscErrorCode ierr; 1127 PetscInt *r,*c,i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1128 PetscInt nz,*rout,*cout,*diag = a->diag; 1129 PetscScalar *x,*b,*tmp; 1130 const MatScalar *aa = a->a,*v; 1131 1132 PetscFunctionBegin; 1133 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1134 1135 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1136 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1137 tmp = a->solve_work; 1138 1139 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1140 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1141 1142 /* copy the b into temp work space according to permutation */ 1143 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1144 1145 /* forward solve the U^T */ 1146 for (i=0; i<n; i++) { 1147 v = aa + diag[i] ; 1148 vi = aj + diag[i] + 1; 1149 nz = ai[i+1] - diag[i] - 1; 1150 tmp[i] *= *v++; 1151 while (nz--) { 1152 tmp[*vi++ ] -= (*v++)*tmp[i]; 1153 } 1154 } 1155 1156 /* backward solve the L^T */ 1157 for (i=n-1; i>=0; i--){ 1158 v = aa + diag[i] - 1 ; 1159 vi = aj + diag[i] - 1 ; 1160 nz = diag[i] - ai[i]; 1161 while (nz--) { 1162 tmp[*vi-- ] -= (*v--)*tmp[i]; 1163 } 1164 } 1165 1166 /* copy tmp into x according to permutation */ 1167 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1168 1169 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1170 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1171 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1172 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1173 1174 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 /* ----------------------------------------------------------------*/ 1178 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1179 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,MatDuplicateOption,Mat*); 1180 1181 #undef __FUNCT__ 1182 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1183 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1184 { 1185 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1186 IS isicol; 1187 PetscErrorCode ierr; 1188 PetscInt *r,*ic,n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1189 PetscInt *bi,*cols,nnz,*cols_lvl; 1190 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1191 PetscInt i,levels,diagonal_fill; 1192 PetscTruth col_identity,row_identity; 1193 PetscReal f; 1194 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1195 PetscBT lnkbt; 1196 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1197 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1198 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1199 PetscTruth missing; 1200 1201 PetscFunctionBegin; 1202 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1203 f = info->fill; 1204 levels = (PetscInt)info->levels; 1205 diagonal_fill = (PetscInt)info->diagonal_fill; 1206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1207 1208 /* special case that simply copies fill pattern */ 1209 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1210 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1211 if (!levels && row_identity && col_identity) { 1212 ierr = MatDuplicateNoCreate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1213 (*fact)->factor = MAT_FACTOR_LU; 1214 (*fact)->info.factor_mallocs = 0; 1215 (*fact)->info.fill_ratio_given = info->fill; 1216 (*fact)->info.fill_ratio_needed = 1.0; 1217 b = (Mat_SeqAIJ*)(*fact)->data; 1218 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1219 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1220 b->row = isrow; 1221 b->col = iscol; 1222 b->icol = isicol; 1223 ierr = PetscMalloc(((*fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1224 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1225 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1226 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1227 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1228 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1229 PetscFunctionReturn(0); 1230 } 1231 1232 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1233 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1234 1235 /* get new row pointers */ 1236 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1237 bi[0] = 0; 1238 /* bdiag is location of diagonal in factor */ 1239 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1240 bdiag[0] = 0; 1241 1242 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1243 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1244 1245 /* create a linked list for storing column indices of the active row */ 1246 nlnk = n + 1; 1247 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1248 1249 /* initial FreeSpace size is f*(ai[n]+1) */ 1250 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1251 current_space = free_space; 1252 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1253 current_space_lvl = free_space_lvl; 1254 1255 for (i=0; i<n; i++) { 1256 nzi = 0; 1257 /* copy current row into linked list */ 1258 nnz = ai[r[i]+1] - ai[r[i]]; 1259 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1260 cols = aj + ai[r[i]]; 1261 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1262 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1263 nzi += nlnk; 1264 1265 /* make sure diagonal entry is included */ 1266 if (diagonal_fill && lnk[i] == -1) { 1267 fm = n; 1268 while (lnk[fm] < i) fm = lnk[fm]; 1269 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1270 lnk[fm] = i; 1271 lnk_lvl[i] = 0; 1272 nzi++; dcount++; 1273 } 1274 1275 /* add pivot rows into the active row */ 1276 nzbd = 0; 1277 prow = lnk[n]; 1278 while (prow < i) { 1279 nnz = bdiag[prow]; 1280 cols = bj_ptr[prow] + nnz + 1; 1281 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1282 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1283 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1284 nzi += nlnk; 1285 prow = lnk[prow]; 1286 nzbd++; 1287 } 1288 bdiag[i] = nzbd; 1289 bi[i+1] = bi[i] + nzi; 1290 1291 /* if free space is not available, make more free space */ 1292 if (current_space->local_remaining<nzi) { 1293 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1294 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1295 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1296 reallocs++; 1297 } 1298 1299 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1300 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1301 bj_ptr[i] = current_space->array; 1302 bjlvl_ptr[i] = current_space_lvl->array; 1303 1304 /* make sure the active row i has diagonal entry */ 1305 if (*(bj_ptr[i]+bdiag[i]) != i) { 1306 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1307 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1308 } 1309 1310 current_space->array += nzi; 1311 current_space->local_used += nzi; 1312 current_space->local_remaining -= nzi; 1313 current_space_lvl->array += nzi; 1314 current_space_lvl->local_used += nzi; 1315 current_space_lvl->local_remaining -= nzi; 1316 } 1317 1318 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1319 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1320 1321 /* destroy list of free space and other temporary arrays */ 1322 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1323 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1324 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1325 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1326 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1327 1328 #if defined(PETSC_USE_INFO) 1329 { 1330 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1331 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1332 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1333 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1334 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1335 if (diagonal_fill) { 1336 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1337 } 1338 } 1339 #endif 1340 1341 /* put together the new matrix */ 1342 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1343 b = (Mat_SeqAIJ*)(*fact)->data; 1344 b->free_a = PETSC_TRUE; 1345 b->free_ij = PETSC_TRUE; 1346 b->singlemalloc = PETSC_FALSE; 1347 len = (bi[n] )*sizeof(PetscScalar); 1348 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1349 b->j = bj; 1350 b->i = bi; 1351 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1352 b->diag = bdiag; 1353 b->ilen = 0; 1354 b->imax = 0; 1355 b->row = isrow; 1356 b->col = iscol; 1357 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1358 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1359 b->icol = isicol; 1360 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1361 /* In b structure: Free imax, ilen, old a, old j. 1362 Allocate bdiag, solve_work, new a, new j */ 1363 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1364 b->maxnz = b->nz = bi[n] ; 1365 (*fact)->factor = MAT_FACTOR_LU; 1366 (*fact)->info.factor_mallocs = reallocs; 1367 (*fact)->info.fill_ratio_given = f; 1368 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1369 (*fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1370 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #include "src/mat/impls/sbaij/seq/sbaij.h" 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1377 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1378 { 1379 Mat C = *B; 1380 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1381 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1382 IS ip=b->row,iip = b->icol; 1383 PetscErrorCode ierr; 1384 PetscInt *rip,*riip,i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1385 PetscInt *ai=a->i,*aj=a->j; 1386 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1387 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1388 PetscReal zeropivot,rs,shiftnz; 1389 PetscReal shiftpd; 1390 ChShift_Ctx sctx; 1391 PetscInt newshift; 1392 PetscTruth perm_identity; 1393 1394 PetscFunctionBegin; 1395 1396 shiftnz = info->shiftnz; 1397 shiftpd = info->shiftpd; 1398 zeropivot = info->zeropivot; 1399 1400 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1401 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1402 1403 /* initialization */ 1404 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1405 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1406 jl = il + mbs; 1407 rtmp = (MatScalar*)(jl + mbs); 1408 1409 sctx.shift_amount = 0; 1410 sctx.nshift = 0; 1411 do { 1412 sctx.chshift = PETSC_FALSE; 1413 for (i=0; i<mbs; i++) { 1414 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1415 } 1416 1417 for (k = 0; k<mbs; k++){ 1418 bval = ba + bi[k]; 1419 /* initialize k-th row by the perm[k]-th row of A */ 1420 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1421 for (j = jmin; j < jmax; j++){ 1422 col = riip[aj[j]]; 1423 if (col >= k){ /* only take upper triangular entry */ 1424 rtmp[col] = aa[j]; 1425 *bval++ = 0.0; /* for in-place factorization */ 1426 } 1427 } 1428 /* shift the diagonal of the matrix */ 1429 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1430 1431 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1432 dk = rtmp[k]; 1433 i = jl[k]; /* first row to be added to k_th row */ 1434 1435 while (i < k){ 1436 nexti = jl[i]; /* next row to be added to k_th row */ 1437 1438 /* compute multiplier, update diag(k) and U(i,k) */ 1439 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1440 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1441 dk += uikdi*ba[ili]; 1442 ba[ili] = uikdi; /* -U(i,k) */ 1443 1444 /* add multiple of row i to k-th row */ 1445 jmin = ili + 1; jmax = bi[i+1]; 1446 if (jmin < jmax){ 1447 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1448 /* update il and jl for row i */ 1449 il[i] = jmin; 1450 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1451 } 1452 i = nexti; 1453 } 1454 1455 /* shift the diagonals when zero pivot is detected */ 1456 /* compute rs=sum of abs(off-diagonal) */ 1457 rs = 0.0; 1458 jmin = bi[k]+1; 1459 nz = bi[k+1] - jmin; 1460 bcol = bj + jmin; 1461 while (nz--){ 1462 rs += PetscAbsScalar(rtmp[*bcol]); 1463 bcol++; 1464 } 1465 1466 sctx.rs = rs; 1467 sctx.pv = dk; 1468 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1469 1470 if (newshift == 1) { 1471 if (!sctx.shift_amount) { 1472 sctx.shift_amount = 1e-5; 1473 } 1474 break; 1475 } 1476 1477 /* copy data into U(k,:) */ 1478 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1479 jmin = bi[k]+1; jmax = bi[k+1]; 1480 if (jmin < jmax) { 1481 for (j=jmin; j<jmax; j++){ 1482 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1483 } 1484 /* add the k-th row into il and jl */ 1485 il[k] = jmin; 1486 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1487 } 1488 } 1489 } while (sctx.chshift); 1490 ierr = PetscFree(il);CHKERRQ(ierr); 1491 1492 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1493 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1494 1495 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1496 if (perm_identity){ 1497 (*B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1498 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1499 (*B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1500 (*B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1501 } else { 1502 (*B)->ops->solve = MatSolve_SeqSBAIJ_1; 1503 (*B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1504 (*B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1505 (*B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1506 } 1507 1508 C->assembled = PETSC_TRUE; 1509 C->preallocated = PETSC_TRUE; 1510 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1511 if (sctx.nshift){ 1512 if (shiftnz) { 1513 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1514 } else if (shiftpd) { 1515 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1516 } 1517 } 1518 PetscFunctionReturn(0); 1519 } 1520 1521 #undef __FUNCT__ 1522 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1523 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1524 { 1525 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1526 Mat_SeqSBAIJ *b; 1527 PetscErrorCode ierr; 1528 PetscTruth perm_identity,missing; 1529 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1530 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1531 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1532 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1533 PetscReal fill=info->fill,levels=info->levels; 1534 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1535 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1536 PetscBT lnkbt; 1537 IS iperm; 1538 1539 PetscFunctionBegin; 1540 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1541 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1542 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1543 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1544 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1545 1546 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1547 ui[0] = 0; 1548 1549 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1550 if (!levels && perm_identity) { 1551 1552 for (i=0; i<am; i++) { 1553 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1554 } 1555 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1556 cols = uj; 1557 for (i=0; i<am; i++) { 1558 aj = a->j + a->diag[i]; 1559 ncols = ui[i+1] - ui[i]; 1560 for (j=0; j<ncols; j++) *cols++ = *aj++; 1561 } 1562 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1563 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1564 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1565 1566 /* initialization */ 1567 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1568 1569 /* jl: linked list for storing indices of the pivot rows 1570 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1571 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1572 il = jl + am; 1573 uj_ptr = (PetscInt**)(il + am); 1574 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1575 for (i=0; i<am; i++){ 1576 jl[i] = am; il[i] = 0; 1577 } 1578 1579 /* create and initialize a linked list for storing column indices of the active row k */ 1580 nlnk = am + 1; 1581 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1582 1583 /* initial FreeSpace size is fill*(ai[am]+1) */ 1584 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1585 current_space = free_space; 1586 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1587 current_space_lvl = free_space_lvl; 1588 1589 for (k=0; k<am; k++){ /* for each active row k */ 1590 /* initialize lnk by the column indices of row rip[k] of A */ 1591 nzk = 0; 1592 ncols = ai[rip[k]+1] - ai[rip[k]]; 1593 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1594 ncols_upper = 0; 1595 for (j=0; j<ncols; j++){ 1596 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1597 if (riip[i] >= k){ /* only take upper triangular entry */ 1598 ajtmp[ncols_upper] = i; 1599 ncols_upper++; 1600 } 1601 } 1602 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1603 nzk += nlnk; 1604 1605 /* update lnk by computing fill-in for each pivot row to be merged in */ 1606 prow = jl[k]; /* 1st pivot row */ 1607 1608 while (prow < k){ 1609 nextprow = jl[prow]; 1610 1611 /* merge prow into k-th row */ 1612 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1613 jmax = ui[prow+1]; 1614 ncols = jmax-jmin; 1615 i = jmin - ui[prow]; 1616 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1617 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1618 j = *(uj - 1); 1619 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1620 nzk += nlnk; 1621 1622 /* update il and jl for prow */ 1623 if (jmin < jmax){ 1624 il[prow] = jmin; 1625 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1626 } 1627 prow = nextprow; 1628 } 1629 1630 /* if free space is not available, make more free space */ 1631 if (current_space->local_remaining<nzk) { 1632 i = am - k + 1; /* num of unfactored rows */ 1633 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1634 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1635 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1636 reallocs++; 1637 } 1638 1639 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1640 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1641 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1642 1643 /* add the k-th row into il and jl */ 1644 if (nzk > 1){ 1645 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1646 jl[k] = jl[i]; jl[i] = k; 1647 il[k] = ui[k] + 1; 1648 } 1649 uj_ptr[k] = current_space->array; 1650 uj_lvl_ptr[k] = current_space_lvl->array; 1651 1652 current_space->array += nzk; 1653 current_space->local_used += nzk; 1654 current_space->local_remaining -= nzk; 1655 1656 current_space_lvl->array += nzk; 1657 current_space_lvl->local_used += nzk; 1658 current_space_lvl->local_remaining -= nzk; 1659 1660 ui[k+1] = ui[k] + nzk; 1661 } 1662 1663 #if defined(PETSC_USE_INFO) 1664 if (ai[am] != 0) { 1665 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1666 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1667 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1668 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1669 } else { 1670 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1671 } 1672 #endif 1673 1674 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1675 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1676 ierr = PetscFree(jl);CHKERRQ(ierr); 1677 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1678 1679 /* destroy list of free space and other temporary array(s) */ 1680 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1681 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1682 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1683 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1684 1685 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1686 1687 /* put together the new matrix in MATSEQSBAIJ format */ 1688 1689 b = (Mat_SeqSBAIJ*)(*fact)->data; 1690 b->singlemalloc = PETSC_FALSE; 1691 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1692 b->j = uj; 1693 b->i = ui; 1694 b->diag = 0; 1695 b->ilen = 0; 1696 b->imax = 0; 1697 b->row = perm; 1698 b->col = perm; 1699 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1700 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1701 b->icol = iperm; 1702 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1703 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1704 ierr = PetscLogObjectMemory((*fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1705 b->maxnz = b->nz = ui[am]; 1706 b->free_a = PETSC_TRUE; 1707 b->free_ij = PETSC_TRUE; 1708 1709 (*fact)->info.factor_mallocs = reallocs; 1710 (*fact)->info.fill_ratio_given = fill; 1711 if (ai[am] != 0) { 1712 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1713 } else { 1714 (*fact)->info.fill_ratio_needed = 0.0; 1715 } 1716 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1722 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1723 { 1724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1725 Mat_SeqSBAIJ *b; 1726 PetscErrorCode ierr; 1727 PetscTruth perm_identity; 1728 PetscReal fill = info->fill; 1729 PetscInt *rip,*riip,i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1730 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1731 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1732 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1733 PetscBT lnkbt; 1734 IS iperm; 1735 1736 PetscFunctionBegin; 1737 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1738 /* check whether perm is the identity mapping */ 1739 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1740 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1741 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1742 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1743 1744 /* initialization */ 1745 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1746 ui[0] = 0; 1747 1748 /* jl: linked list for storing indices of the pivot rows 1749 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1750 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1751 il = jl + am; 1752 cols = il + am; 1753 ui_ptr = (PetscInt**)(cols + am); 1754 for (i=0; i<am; i++){ 1755 jl[i] = am; il[i] = 0; 1756 } 1757 1758 /* create and initialize a linked list for storing column indices of the active row k */ 1759 nlnk = am + 1; 1760 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1761 1762 /* initial FreeSpace size is fill*(ai[am]+1) */ 1763 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1764 current_space = free_space; 1765 1766 for (k=0; k<am; k++){ /* for each active row k */ 1767 /* initialize lnk by the column indices of row rip[k] of A */ 1768 nzk = 0; 1769 ncols = ai[rip[k]+1] - ai[rip[k]]; 1770 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1771 ncols_upper = 0; 1772 for (j=0; j<ncols; j++){ 1773 i = riip[*(aj + ai[rip[k]] + j)]; 1774 if (i >= k){ /* only take upper triangular entry */ 1775 cols[ncols_upper] = i; 1776 ncols_upper++; 1777 } 1778 } 1779 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1780 nzk += nlnk; 1781 1782 /* update lnk by computing fill-in for each pivot row to be merged in */ 1783 prow = jl[k]; /* 1st pivot row */ 1784 1785 while (prow < k){ 1786 nextprow = jl[prow]; 1787 /* merge prow into k-th row */ 1788 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1789 jmax = ui[prow+1]; 1790 ncols = jmax-jmin; 1791 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1792 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1793 nzk += nlnk; 1794 1795 /* update il and jl for prow */ 1796 if (jmin < jmax){ 1797 il[prow] = jmin; 1798 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1799 } 1800 prow = nextprow; 1801 } 1802 1803 /* if free space is not available, make more free space */ 1804 if (current_space->local_remaining<nzk) { 1805 i = am - k + 1; /* num of unfactored rows */ 1806 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1807 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1808 reallocs++; 1809 } 1810 1811 /* copy data into free space, then initialize lnk */ 1812 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1813 1814 /* add the k-th row into il and jl */ 1815 if (nzk-1 > 0){ 1816 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1817 jl[k] = jl[i]; jl[i] = k; 1818 il[k] = ui[k] + 1; 1819 } 1820 ui_ptr[k] = current_space->array; 1821 current_space->array += nzk; 1822 current_space->local_used += nzk; 1823 current_space->local_remaining -= nzk; 1824 1825 ui[k+1] = ui[k] + nzk; 1826 } 1827 1828 #if defined(PETSC_USE_INFO) 1829 if (ai[am] != 0) { 1830 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1831 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1832 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1833 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1834 } else { 1835 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1836 } 1837 #endif 1838 1839 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1840 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1841 ierr = PetscFree(jl);CHKERRQ(ierr); 1842 1843 /* destroy list of free space and other temporary array(s) */ 1844 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1845 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1846 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1847 1848 /* put together the new matrix in MATSEQSBAIJ format */ 1849 1850 b = (Mat_SeqSBAIJ*)(*fact)->data; 1851 b->singlemalloc = PETSC_FALSE; 1852 b->free_a = PETSC_TRUE; 1853 b->free_ij = PETSC_TRUE; 1854 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1855 b->j = uj; 1856 b->i = ui; 1857 b->diag = 0; 1858 b->ilen = 0; 1859 b->imax = 0; 1860 b->row = perm; 1861 b->col = perm; 1862 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1863 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1864 b->icol = iperm; 1865 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1866 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1867 ierr = PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1868 b->maxnz = b->nz = ui[am]; 1869 1870 (*fact)->info.factor_mallocs = reallocs; 1871 (*fact)->info.fill_ratio_given = fill; 1872 if (ai[am] != 0) { 1873 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1874 } else { 1875 (*fact)->info.fill_ratio_needed = 0.0; 1876 } 1877 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1878 PetscFunctionReturn(0); 1879 } 1880