1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 260 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 261 { 262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 263 IS isicol; 264 PetscErrorCode ierr; 265 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 266 PetscInt *bi,*bj,*ajtmp; 267 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 268 PetscReal f; 269 PetscInt nlnk,*lnk,k,**bi_ptr; 270 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 271 PetscBT lnkbt; 272 273 PetscFunctionBegin; 274 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 275 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 276 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 277 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 278 279 /* get new row pointers */ 280 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 281 bi[0] = 0; 282 283 /* bdiag is location of diagonal in factor */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 285 bdiag[0] = 0; 286 287 /* linked list for storing column indices of the active row */ 288 nlnk = n + 1; 289 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 290 291 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 292 293 /* initial FreeSpace size is f*(ai[n]+1) */ 294 f = info->fill; 295 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 296 current_space = free_space; 297 298 for (i=0; i<n; i++) { 299 /* copy previous fill into linked list */ 300 nzi = 0; 301 nnz = ai[r[i]+1] - ai[r[i]]; 302 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 303 ajtmp = aj + ai[r[i]]; 304 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 305 nzi += nlnk; 306 307 /* add pivot rows into linked list */ 308 row = lnk[n]; 309 while (row < i) { 310 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 311 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 312 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 313 nzi += nlnk; 314 row = lnk[row]; 315 } 316 bi[i+1] = bi[i] + nzi; 317 im[i] = nzi; 318 319 /* mark bdiag */ 320 nzbd = 0; 321 nnz = nzi; 322 k = lnk[n]; 323 while (nnz-- && k < i){ 324 nzbd++; 325 k = lnk[k]; 326 } 327 bdiag[i] = bi[i] + nzbd; 328 329 /* if free space is not available, make more free space */ 330 if (current_space->local_remaining<nzi) { 331 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 332 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 333 reallocs++; 334 } 335 336 /* copy data into free space, then initialize lnk */ 337 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 338 bi_ptr[i] = current_space->array; 339 current_space->array += nzi; 340 current_space->local_used += nzi; 341 current_space->local_remaining -= nzi; 342 } 343 #if defined(PETSC_USE_INFO) 344 if (ai[n] != 0) { 345 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 346 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 347 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 348 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 349 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 350 } else { 351 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 352 } 353 #endif 354 355 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 356 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 357 358 /* destroy list of free space and other temporary array(s) */ 359 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 360 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 361 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 362 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 363 364 /* put together the new matrix */ 365 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 366 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 367 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 368 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 369 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 370 b = (Mat_SeqAIJ*)(*B)->data; 371 b->free_a = PETSC_TRUE; 372 b->free_ij = PETSC_TRUE; 373 b->singlemalloc = PETSC_FALSE; 374 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 375 b->j = bj; 376 b->i = bi; 377 b->diag = bdiag; 378 b->ilen = 0; 379 b->imax = 0; 380 b->row = isrow; 381 b->col = iscol; 382 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 383 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 384 b->icol = isicol; 385 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 386 387 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 388 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 389 b->maxnz = b->nz = bi[n] ; 390 391 (*B)->factor = FACTOR_LU; 392 (*B)->info.factor_mallocs = reallocs; 393 (*B)->info.fill_ratio_given = f; 394 395 if (ai[n] != 0) { 396 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 397 } else { 398 (*B)->info.fill_ratio_needed = 0.0; 399 } 400 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 401 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 402 PetscFunctionReturn(0); 403 } 404 405 /* 406 Trouble in factorization, should we dump the original matrix? 407 */ 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatFactorDumpMatrix" 410 PetscErrorCode MatFactorDumpMatrix(Mat A) 411 { 412 PetscErrorCode ierr; 413 PetscTruth flg; 414 415 PetscFunctionBegin; 416 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 417 if (flg) { 418 PetscViewer viewer; 419 char filename[PETSC_MAX_PATH_LEN]; 420 421 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 422 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 423 ierr = MatView(A,viewer);CHKERRQ(ierr); 424 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 /* ----------------------------------------------------------- */ 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 432 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 433 { 434 Mat C=*B; 435 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 436 IS isrow = b->row,isicol = b->icol; 437 PetscErrorCode ierr; 438 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 439 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 440 PetscInt *diag_offset = b->diag,diag,*pj; 441 PetscScalar *rtmp,*pc,multiplier,*rtmps; 442 MatScalar *v,*pv; 443 PetscScalar d; 444 PetscReal rs; 445 LUShift_Ctx sctx; 446 PetscInt newshift,*ddiag; 447 448 PetscFunctionBegin; 449 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 450 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 451 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 452 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 453 rtmps = rtmp; ics = ic; 454 455 sctx.shift_top = 0; 456 sctx.nshift_max = 0; 457 sctx.shift_lo = 0; 458 sctx.shift_hi = 0; 459 460 /* if both shift schemes are chosen by user, only use info->shiftpd */ 461 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 462 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 463 PetscInt *aai = a->i; 464 ddiag = a->diag; 465 sctx.shift_top = 0; 466 for (i=0; i<n; i++) { 467 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 468 d = (a->a)[ddiag[i]]; 469 rs = -PetscAbsScalar(d) - PetscRealPart(d); 470 v = a->a+aai[i]; 471 nz = aai[i+1] - aai[i]; 472 for (j=0; j<nz; j++) 473 rs += PetscAbsScalar(v[j]); 474 if (rs>sctx.shift_top) sctx.shift_top = rs; 475 } 476 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 477 sctx.shift_top *= 1.1; 478 sctx.nshift_max = 5; 479 sctx.shift_lo = 0.; 480 sctx.shift_hi = 1.; 481 } 482 483 sctx.shift_amount = 0; 484 sctx.nshift = 0; 485 do { 486 sctx.lushift = PETSC_FALSE; 487 for (i=0; i<n; i++){ 488 nz = bi[i+1] - bi[i]; 489 bjtmp = bj + bi[i]; 490 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 491 492 /* load in initial (unfactored row) */ 493 nz = a->i[r[i]+1] - a->i[r[i]]; 494 ajtmp = a->j + a->i[r[i]]; 495 v = a->a + a->i[r[i]]; 496 for (j=0; j<nz; j++) { 497 rtmp[ics[ajtmp[j]]] = v[j]; 498 } 499 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 500 501 row = *bjtmp++; 502 while (row < i) { 503 pc = rtmp + row; 504 if (*pc != 0.0) { 505 pv = b->a + diag_offset[row]; 506 pj = b->j + diag_offset[row] + 1; 507 multiplier = *pc / *pv++; 508 *pc = multiplier; 509 nz = bi[row+1] - diag_offset[row] - 1; 510 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 511 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 512 } 513 row = *bjtmp++; 514 } 515 /* finished row so stick it into b->a */ 516 pv = b->a + bi[i] ; 517 pj = b->j + bi[i] ; 518 nz = bi[i+1] - bi[i]; 519 diag = diag_offset[i] - bi[i]; 520 rs = 0.0; 521 for (j=0; j<nz; j++) { 522 pv[j] = rtmps[pj[j]]; 523 if (j != diag) rs += PetscAbsScalar(pv[j]); 524 } 525 526 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 527 sctx.rs = rs; 528 sctx.pv = pv[diag]; 529 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 530 if (newshift == 1) break; 531 } 532 533 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 534 /* 535 * if no shift in this attempt & shifting & started shifting & can refine, 536 * then try lower shift 537 */ 538 sctx.shift_hi = info->shift_fraction; 539 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 540 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 541 sctx.lushift = PETSC_TRUE; 542 sctx.nshift++; 543 } 544 } while (sctx.lushift); 545 546 /* invert diagonal entries for simplier triangular solves */ 547 for (i=0; i<n; i++) { 548 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 549 } 550 551 ierr = PetscFree(rtmp);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 553 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 554 C->factor = FACTOR_LU; 555 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 556 C->assembled = PETSC_TRUE; 557 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 558 if (sctx.nshift){ 559 if (info->shiftnz) { 560 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 561 } else if (info->shiftpd) { 562 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 563 } 564 } 565 PetscFunctionReturn(0); 566 } 567 568 /* 569 This routine implements inplace ILU(0) with row or/and column permutations. 570 Input: 571 A - original matrix 572 Output; 573 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 574 a->j (col index) is permuted by the inverse of colperm, then sorted 575 a->a reordered accordingly with a->j 576 a->diag (ptr to diagonal elements) is updated. 577 */ 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 580 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 581 { 582 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 583 IS isrow = a->row,isicol = a->icol; 584 PetscErrorCode ierr; 585 PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j; 586 PetscInt *ajtmp,nz,row,*ics; 587 PetscInt *diag = a->diag,nbdiag,*pj; 588 PetscScalar *rtmp,*pc,multiplier,d; 589 MatScalar *v,*pv; 590 PetscReal rs; 591 LUShift_Ctx sctx; 592 PetscInt newshift; 593 594 PetscFunctionBegin; 595 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 596 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 597 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 598 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 599 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 600 ics = ic; 601 602 sctx.shift_top = 0; 603 sctx.nshift_max = 0; 604 sctx.shift_lo = 0; 605 sctx.shift_hi = 0; 606 607 /* if both shift schemes are chosen by user, only use info->shiftpd */ 608 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 609 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 610 sctx.shift_top = 0; 611 for (i=0; i<n; i++) { 612 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 613 d = (a->a)[diag[i]]; 614 rs = -PetscAbsScalar(d) - PetscRealPart(d); 615 v = a->a+ai[i]; 616 nz = ai[i+1] - ai[i]; 617 for (j=0; j<nz; j++) 618 rs += PetscAbsScalar(v[j]); 619 if (rs>sctx.shift_top) sctx.shift_top = rs; 620 } 621 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 622 sctx.shift_top *= 1.1; 623 sctx.nshift_max = 5; 624 sctx.shift_lo = 0.; 625 sctx.shift_hi = 1.; 626 } 627 628 sctx.shift_amount = 0; 629 sctx.nshift = 0; 630 do { 631 sctx.lushift = PETSC_FALSE; 632 for (i=0; i<n; i++){ 633 /* load in initial unfactored row */ 634 nz = ai[r[i]+1] - ai[r[i]]; 635 ajtmp = aj + ai[r[i]]; 636 v = a->a + ai[r[i]]; 637 /* sort permuted ajtmp and values v accordingly */ 638 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 639 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 640 641 diag[r[i]] = ai[r[i]]; 642 for (j=0; j<nz; j++) { 643 rtmp[ajtmp[j]] = v[j]; 644 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 645 } 646 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 647 648 row = *ajtmp++; 649 while (row < i) { 650 pc = rtmp + row; 651 if (*pc != 0.0) { 652 pv = a->a + diag[r[row]]; 653 pj = aj + diag[r[row]] + 1; 654 655 multiplier = *pc / *pv++; 656 *pc = multiplier; 657 nz = ai[r[row]+1] - diag[r[row]] - 1; 658 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 659 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 660 } 661 row = *ajtmp++; 662 } 663 /* finished row so overwrite it onto a->a */ 664 pv = a->a + ai[r[i]] ; 665 pj = aj + ai[r[i]] ; 666 nz = ai[r[i]+1] - ai[r[i]]; 667 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 668 669 rs = 0.0; 670 for (j=0; j<nz; j++) { 671 pv[j] = rtmp[pj[j]]; 672 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 673 } 674 675 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 676 sctx.rs = rs; 677 sctx.pv = pv[nbdiag]; 678 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 679 if (newshift == 1) break; 680 } 681 682 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 683 /* 684 * if no shift in this attempt & shifting & started shifting & can refine, 685 * then try lower shift 686 */ 687 sctx.shift_hi = info->shift_fraction; 688 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 689 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 690 sctx.lushift = PETSC_TRUE; 691 sctx.nshift++; 692 } 693 } while (sctx.lushift); 694 695 /* invert diagonal entries for simplier triangular solves */ 696 for (i=0; i<n; i++) { 697 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 698 } 699 700 ierr = PetscFree(rtmp);CHKERRQ(ierr); 701 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 702 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 703 A->factor = FACTOR_LU; 704 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 705 A->assembled = PETSC_TRUE; 706 ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr); 707 if (sctx.nshift){ 708 if (info->shiftnz) { 709 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 710 } else if (info->shiftpd) { 711 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 712 } 713 } 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 719 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 720 { 721 PetscFunctionBegin; 722 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 723 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 724 PetscFunctionReturn(0); 725 } 726 727 728 /* ----------------------------------------------------------- */ 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatLUFactor_SeqAIJ" 731 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 732 { 733 PetscErrorCode ierr; 734 Mat C; 735 736 PetscFunctionBegin; 737 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 738 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 739 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 740 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 /* ----------------------------------------------------------- */ 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatSolve_SeqAIJ" 746 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 747 { 748 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 749 IS iscol = a->col,isrow = a->row; 750 PetscErrorCode ierr; 751 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 752 PetscInt nz,*rout,*cout; 753 PetscScalar *x,*tmp,*tmps,sum; 754 const PetscScalar *b; 755 const MatScalar *aa = a->a,*v; 756 757 PetscFunctionBegin; 758 if (!n) PetscFunctionReturn(0); 759 760 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 761 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 762 tmp = a->solve_work; 763 764 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 765 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 766 767 /* forward solve the lower triangular */ 768 tmp[0] = b[*r++]; 769 tmps = tmp; 770 for (i=1; i<n; i++) { 771 v = aa + ai[i] ; 772 vi = aj + ai[i] ; 773 nz = a->diag[i] - ai[i]; 774 sum = b[*r++]; 775 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 776 tmp[i] = sum; 777 } 778 779 /* backward solve the upper triangular */ 780 for (i=n-1; i>=0; i--){ 781 v = aa + a->diag[i] + 1; 782 vi = aj + a->diag[i] + 1; 783 nz = ai[i+1] - a->diag[i] - 1; 784 sum = tmp[i]; 785 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 786 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 787 } 788 789 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 790 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 791 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 792 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 793 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 794 PetscFunctionReturn(0); 795 } 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatMatSolve_SeqAIJ" 799 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 800 { 801 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 802 IS iscol = a->col,isrow = a->row; 803 PetscErrorCode ierr; 804 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 805 PetscInt nz,*rout,*cout,neq; 806 PetscScalar *x,*b,*tmp,*tmps,sum; 807 const MatScalar *aa = a->a,*v; 808 809 PetscFunctionBegin; 810 if (!n) PetscFunctionReturn(0); 811 812 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 813 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 814 815 tmp = a->solve_work; 816 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 817 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 818 819 for (neq=0; neq<n; neq++){ 820 /* forward solve the lower triangular */ 821 tmp[0] = b[r[0]]; 822 tmps = tmp; 823 for (i=1; i<n; i++) { 824 v = aa + ai[i] ; 825 vi = aj + ai[i] ; 826 nz = a->diag[i] - ai[i]; 827 sum = b[r[i]]; 828 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 829 tmp[i] = sum; 830 } 831 /* backward solve the upper triangular */ 832 for (i=n-1; i>=0; i--){ 833 v = aa + a->diag[i] + 1; 834 vi = aj + a->diag[i] + 1; 835 nz = ai[i+1] - a->diag[i] - 1; 836 sum = tmp[i]; 837 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 838 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 839 } 840 841 b += n; 842 x += n; 843 } 844 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 845 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 846 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 847 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 848 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 849 PetscFunctionReturn(0); 850 } 851 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 854 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 855 { 856 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 857 IS iscol = a->col,isrow = a->row; 858 PetscErrorCode ierr; 859 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 860 PetscInt nz,*rout,*cout,row; 861 PetscScalar *x,*b,*tmp,*tmps,sum; 862 const MatScalar *aa = a->a,*v; 863 864 PetscFunctionBegin; 865 if (!n) PetscFunctionReturn(0); 866 867 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 868 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 869 tmp = a->solve_work; 870 871 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 872 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 873 874 /* forward solve the lower triangular */ 875 tmp[0] = b[*r++]; 876 tmps = tmp; 877 for (row=1; row<n; row++) { 878 i = rout[row]; /* permuted row */ 879 v = aa + ai[i] ; 880 vi = aj + ai[i] ; 881 nz = a->diag[i] - ai[i]; 882 sum = b[*r++]; 883 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 884 tmp[row] = sum; 885 } 886 887 /* backward solve the upper triangular */ 888 for (row=n-1; row>=0; row--){ 889 i = rout[row]; /* permuted row */ 890 v = aa + a->diag[i] + 1; 891 vi = aj + a->diag[i] + 1; 892 nz = ai[i+1] - a->diag[i] - 1; 893 sum = tmp[row]; 894 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 895 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 896 } 897 898 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 899 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 900 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 901 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 902 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 903 PetscFunctionReturn(0); 904 } 905 906 /* ----------------------------------------------------------- */ 907 #undef __FUNCT__ 908 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 909 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 910 { 911 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 912 PetscErrorCode ierr; 913 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 914 PetscScalar *x,*b; 915 const MatScalar *aa = a->a; 916 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 917 PetscInt adiag_i,i,*vi,nz,ai_i; 918 const MatScalar *v; 919 PetscScalar sum; 920 #endif 921 922 PetscFunctionBegin; 923 if (!n) PetscFunctionReturn(0); 924 925 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 926 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 927 928 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 929 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 930 #else 931 /* forward solve the lower triangular */ 932 x[0] = b[0]; 933 for (i=1; i<n; i++) { 934 ai_i = ai[i]; 935 v = aa + ai_i; 936 vi = aj + ai_i; 937 nz = adiag[i] - ai_i; 938 sum = b[i]; 939 while (nz--) sum -= *v++ * x[*vi++]; 940 x[i] = sum; 941 } 942 943 /* backward solve the upper triangular */ 944 for (i=n-1; i>=0; i--){ 945 adiag_i = adiag[i]; 946 v = aa + adiag_i + 1; 947 vi = aj + adiag_i + 1; 948 nz = ai[i+1] - adiag_i - 1; 949 sum = x[i]; 950 while (nz--) sum -= *v++ * x[*vi++]; 951 x[i] = sum*aa[adiag_i]; 952 } 953 #endif 954 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 955 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 956 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 957 PetscFunctionReturn(0); 958 } 959 960 #undef __FUNCT__ 961 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 962 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 963 { 964 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 965 IS iscol = a->col,isrow = a->row; 966 PetscErrorCode ierr; 967 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 968 PetscInt nz,*rout,*cout; 969 PetscScalar *x,*b,*tmp,sum; 970 const MatScalar *aa = a->a,*v; 971 972 PetscFunctionBegin; 973 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 974 975 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 976 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 977 tmp = a->solve_work; 978 979 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 980 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 981 982 /* forward solve the lower triangular */ 983 tmp[0] = b[*r++]; 984 for (i=1; i<n; i++) { 985 v = aa + ai[i] ; 986 vi = aj + ai[i] ; 987 nz = a->diag[i] - ai[i]; 988 sum = b[*r++]; 989 while (nz--) sum -= *v++ * tmp[*vi++ ]; 990 tmp[i] = sum; 991 } 992 993 /* backward solve the upper triangular */ 994 for (i=n-1; i>=0; i--){ 995 v = aa + a->diag[i] + 1; 996 vi = aj + a->diag[i] + 1; 997 nz = ai[i+1] - a->diag[i] - 1; 998 sum = tmp[i]; 999 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1000 tmp[i] = sum*aa[a->diag[i]]; 1001 x[*c--] += tmp[i]; 1002 } 1003 1004 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1005 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1006 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1007 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1008 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1009 1010 PetscFunctionReturn(0); 1011 } 1012 /* -------------------------------------------------------------------*/ 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1015 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1016 { 1017 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1018 IS iscol = a->col,isrow = a->row; 1019 PetscErrorCode ierr; 1020 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1021 PetscInt nz,*rout,*cout,*diag = a->diag; 1022 PetscScalar *x,*b,*tmp,s1; 1023 const MatScalar *aa = a->a,*v; 1024 1025 PetscFunctionBegin; 1026 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1027 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1028 tmp = a->solve_work; 1029 1030 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1031 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1032 1033 /* copy the b into temp work space according to permutation */ 1034 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1035 1036 /* forward solve the U^T */ 1037 for (i=0; i<n; i++) { 1038 v = aa + diag[i] ; 1039 vi = aj + diag[i] + 1; 1040 nz = ai[i+1] - diag[i] - 1; 1041 s1 = tmp[i]; 1042 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1043 while (nz--) { 1044 tmp[*vi++ ] -= (*v++)*s1; 1045 } 1046 tmp[i] = s1; 1047 } 1048 1049 /* backward solve the L^T */ 1050 for (i=n-1; i>=0; i--){ 1051 v = aa + diag[i] - 1 ; 1052 vi = aj + diag[i] - 1 ; 1053 nz = diag[i] - ai[i]; 1054 s1 = tmp[i]; 1055 while (nz--) { 1056 tmp[*vi-- ] -= (*v--)*s1; 1057 } 1058 } 1059 1060 /* copy tmp into x according to permutation */ 1061 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1062 1063 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1064 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1065 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1066 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1067 1068 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1074 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1075 { 1076 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1077 IS iscol = a->col,isrow = a->row; 1078 PetscErrorCode ierr; 1079 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1080 PetscInt nz,*rout,*cout,*diag = a->diag; 1081 PetscScalar *x,*b,*tmp; 1082 const MatScalar *aa = a->a,*v; 1083 1084 PetscFunctionBegin; 1085 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1086 1087 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1088 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1089 tmp = a->solve_work; 1090 1091 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1092 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1093 1094 /* copy the b into temp work space according to permutation */ 1095 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1096 1097 /* forward solve the U^T */ 1098 for (i=0; i<n; i++) { 1099 v = aa + diag[i] ; 1100 vi = aj + diag[i] + 1; 1101 nz = ai[i+1] - diag[i] - 1; 1102 tmp[i] *= *v++; 1103 while (nz--) { 1104 tmp[*vi++ ] -= (*v++)*tmp[i]; 1105 } 1106 } 1107 1108 /* backward solve the L^T */ 1109 for (i=n-1; i>=0; i--){ 1110 v = aa + diag[i] - 1 ; 1111 vi = aj + diag[i] - 1 ; 1112 nz = diag[i] - ai[i]; 1113 while (nz--) { 1114 tmp[*vi-- ] -= (*v--)*tmp[i]; 1115 } 1116 } 1117 1118 /* copy tmp into x according to permutation */ 1119 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1120 1121 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1122 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1123 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1124 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1125 1126 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 /* ----------------------------------------------------------------*/ 1130 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1131 1132 #undef __FUNCT__ 1133 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1134 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1135 { 1136 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1137 IS isicol; 1138 PetscErrorCode ierr; 1139 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 1140 PetscInt *bi,*cols,nnz,*cols_lvl; 1141 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1142 PetscInt i,levels,diagonal_fill; 1143 PetscTruth col_identity,row_identity; 1144 PetscReal f; 1145 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1146 PetscBT lnkbt; 1147 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1148 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1149 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1150 PetscTruth missing; 1151 1152 PetscFunctionBegin; 1153 if (A->rmap.n != A->cmap.n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap.n,A->cmap.n); 1154 f = info->fill; 1155 levels = (PetscInt)info->levels; 1156 diagonal_fill = (PetscInt)info->diagonal_fill; 1157 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1158 1159 /* special case that simply copies fill pattern */ 1160 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1161 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1162 if (!levels && row_identity && col_identity) { 1163 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1164 (*fact)->factor = FACTOR_LU; 1165 (*fact)->info.factor_mallocs = 0; 1166 (*fact)->info.fill_ratio_given = info->fill; 1167 (*fact)->info.fill_ratio_needed = 1.0; 1168 b = (Mat_SeqAIJ*)(*fact)->data; 1169 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1170 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1171 b->row = isrow; 1172 b->col = iscol; 1173 b->icol = isicol; 1174 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1175 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1176 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1177 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1178 ierr = Mat_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1183 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1184 1185 /* get new row pointers */ 1186 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1187 bi[0] = 0; 1188 /* bdiag is location of diagonal in factor */ 1189 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1190 bdiag[0] = 0; 1191 1192 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1193 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1194 1195 /* create a linked list for storing column indices of the active row */ 1196 nlnk = n + 1; 1197 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1198 1199 /* initial FreeSpace size is f*(ai[n]+1) */ 1200 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1201 current_space = free_space; 1202 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1203 current_space_lvl = free_space_lvl; 1204 1205 for (i=0; i<n; i++) { 1206 nzi = 0; 1207 /* copy current row into linked list */ 1208 nnz = ai[r[i]+1] - ai[r[i]]; 1209 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1210 cols = aj + ai[r[i]]; 1211 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1212 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1213 nzi += nlnk; 1214 1215 /* make sure diagonal entry is included */ 1216 if (diagonal_fill && lnk[i] == -1) { 1217 fm = n; 1218 while (lnk[fm] < i) fm = lnk[fm]; 1219 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1220 lnk[fm] = i; 1221 lnk_lvl[i] = 0; 1222 nzi++; dcount++; 1223 } 1224 1225 /* add pivot rows into the active row */ 1226 nzbd = 0; 1227 prow = lnk[n]; 1228 while (prow < i) { 1229 nnz = bdiag[prow]; 1230 cols = bj_ptr[prow] + nnz + 1; 1231 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1232 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1233 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1234 nzi += nlnk; 1235 prow = lnk[prow]; 1236 nzbd++; 1237 } 1238 bdiag[i] = nzbd; 1239 bi[i+1] = bi[i] + nzi; 1240 1241 /* if free space is not available, make more free space */ 1242 if (current_space->local_remaining<nzi) { 1243 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1244 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1245 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1246 reallocs++; 1247 } 1248 1249 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1250 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1251 bj_ptr[i] = current_space->array; 1252 bjlvl_ptr[i] = current_space_lvl->array; 1253 1254 /* make sure the active row i has diagonal entry */ 1255 if (*(bj_ptr[i]+bdiag[i]) != i) { 1256 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1257 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1258 } 1259 1260 current_space->array += nzi; 1261 current_space->local_used += nzi; 1262 current_space->local_remaining -= nzi; 1263 current_space_lvl->array += nzi; 1264 current_space_lvl->local_used += nzi; 1265 current_space_lvl->local_remaining -= nzi; 1266 } 1267 1268 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1269 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1270 1271 /* destroy list of free space and other temporary arrays */ 1272 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1273 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1274 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1275 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1276 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1277 1278 #if defined(PETSC_USE_INFO) 1279 { 1280 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1281 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1282 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1283 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1284 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1285 if (diagonal_fill) { 1286 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1287 } 1288 } 1289 #endif 1290 1291 /* put together the new matrix */ 1292 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 1293 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1294 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 1295 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1296 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1297 b = (Mat_SeqAIJ*)(*fact)->data; 1298 b->free_a = PETSC_TRUE; 1299 b->free_ij = PETSC_TRUE; 1300 b->singlemalloc = PETSC_FALSE; 1301 len = (bi[n] )*sizeof(PetscScalar); 1302 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1303 b->j = bj; 1304 b->i = bi; 1305 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1306 b->diag = bdiag; 1307 b->ilen = 0; 1308 b->imax = 0; 1309 b->row = isrow; 1310 b->col = iscol; 1311 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1312 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1313 b->icol = isicol; 1314 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1315 /* In b structure: Free imax, ilen, old a, old j. 1316 Allocate bdiag, solve_work, new a, new j */ 1317 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1318 b->maxnz = b->nz = bi[n] ; 1319 (*fact)->factor = FACTOR_LU; 1320 (*fact)->info.factor_mallocs = reallocs; 1321 (*fact)->info.fill_ratio_given = f; 1322 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1323 1324 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1325 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1326 1327 PetscFunctionReturn(0); 1328 } 1329 1330 #include "src/mat/impls/sbaij/seq/sbaij.h" 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1333 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1334 { 1335 Mat C = *B; 1336 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1337 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1338 IS ip=b->row,iip = b->icol; 1339 PetscErrorCode ierr; 1340 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1341 PetscInt *ai=a->i,*aj=a->j; 1342 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1343 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1344 PetscReal zeropivot,rs,shiftnz; 1345 PetscReal shiftpd; 1346 ChShift_Ctx sctx; 1347 PetscInt newshift; 1348 1349 PetscFunctionBegin; 1350 1351 shiftnz = info->shiftnz; 1352 shiftpd = info->shiftpd; 1353 zeropivot = info->zeropivot; 1354 1355 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1356 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1357 1358 /* initialization */ 1359 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1360 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1361 jl = il + mbs; 1362 rtmp = (MatScalar*)(jl + mbs); 1363 1364 sctx.shift_amount = 0; 1365 sctx.nshift = 0; 1366 do { 1367 sctx.chshift = PETSC_FALSE; 1368 for (i=0; i<mbs; i++) { 1369 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1370 } 1371 1372 for (k = 0; k<mbs; k++){ 1373 bval = ba + bi[k]; 1374 /* initialize k-th row by the perm[k]-th row of A */ 1375 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1376 for (j = jmin; j < jmax; j++){ 1377 col = riip[aj[j]]; 1378 if (col >= k){ /* only take upper triangular entry */ 1379 rtmp[col] = aa[j]; 1380 *bval++ = 0.0; /* for in-place factorization */ 1381 } 1382 } 1383 /* shift the diagonal of the matrix */ 1384 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1385 1386 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1387 dk = rtmp[k]; 1388 i = jl[k]; /* first row to be added to k_th row */ 1389 1390 while (i < k){ 1391 nexti = jl[i]; /* next row to be added to k_th row */ 1392 1393 /* compute multiplier, update diag(k) and U(i,k) */ 1394 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1395 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1396 dk += uikdi*ba[ili]; 1397 ba[ili] = uikdi; /* -U(i,k) */ 1398 1399 /* add multiple of row i to k-th row */ 1400 jmin = ili + 1; jmax = bi[i+1]; 1401 if (jmin < jmax){ 1402 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1403 /* update il and jl for row i */ 1404 il[i] = jmin; 1405 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1406 } 1407 i = nexti; 1408 } 1409 1410 /* shift the diagonals when zero pivot is detected */ 1411 /* compute rs=sum of abs(off-diagonal) */ 1412 rs = 0.0; 1413 jmin = bi[k]+1; 1414 nz = bi[k+1] - jmin; 1415 bcol = bj + jmin; 1416 while (nz--){ 1417 rs += PetscAbsScalar(rtmp[*bcol]); 1418 bcol++; 1419 } 1420 1421 sctx.rs = rs; 1422 sctx.pv = dk; 1423 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1424 1425 if (newshift == 1) { 1426 if (!sctx.shift_amount) { 1427 sctx.shift_amount = 1e-5; 1428 } 1429 break; 1430 } 1431 1432 /* copy data into U(k,:) */ 1433 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1434 jmin = bi[k]+1; jmax = bi[k+1]; 1435 if (jmin < jmax) { 1436 for (j=jmin; j<jmax; j++){ 1437 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1438 } 1439 /* add the k-th row into il and jl */ 1440 il[k] = jmin; 1441 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1442 } 1443 } 1444 } while (sctx.chshift); 1445 ierr = PetscFree(il);CHKERRQ(ierr); 1446 1447 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1448 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1449 C->factor = FACTOR_CHOLESKY; 1450 C->assembled = PETSC_TRUE; 1451 C->preallocated = PETSC_TRUE; 1452 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1453 if (sctx.nshift){ 1454 if (shiftnz) { 1455 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1456 } else if (shiftpd) { 1457 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1458 } 1459 } 1460 PetscFunctionReturn(0); 1461 } 1462 1463 #undef __FUNCT__ 1464 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1465 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1466 { 1467 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1468 Mat_SeqSBAIJ *b; 1469 Mat B; 1470 PetscErrorCode ierr; 1471 PetscTruth perm_identity,missing; 1472 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1473 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1474 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1475 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1476 PetscReal fill=info->fill,levels=info->levels; 1477 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1478 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1479 PetscBT lnkbt; 1480 IS iperm; 1481 1482 PetscFunctionBegin; 1483 if (A->rmap.n != A->cmap.n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap.n,A->cmap.n); 1484 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1485 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1486 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1487 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1488 1489 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1490 ui[0] = 0; 1491 1492 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1493 if (!levels && perm_identity) { 1494 1495 for (i=0; i<am; i++) { 1496 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1497 } 1498 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1499 cols = uj; 1500 for (i=0; i<am; i++) { 1501 aj = a->j + a->diag[i]; 1502 ncols = ui[i+1] - ui[i]; 1503 for (j=0; j<ncols; j++) *cols++ = *aj++; 1504 } 1505 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1506 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1507 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1508 1509 /* initialization */ 1510 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1511 1512 /* jl: linked list for storing indices of the pivot rows 1513 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1514 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1515 il = jl + am; 1516 uj_ptr = (PetscInt**)(il + am); 1517 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1518 for (i=0; i<am; i++){ 1519 jl[i] = am; il[i] = 0; 1520 } 1521 1522 /* create and initialize a linked list for storing column indices of the active row k */ 1523 nlnk = am + 1; 1524 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1525 1526 /* initial FreeSpace size is fill*(ai[am]+1) */ 1527 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1528 current_space = free_space; 1529 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1530 current_space_lvl = free_space_lvl; 1531 1532 for (k=0; k<am; k++){ /* for each active row k */ 1533 /* initialize lnk by the column indices of row rip[k] of A */ 1534 nzk = 0; 1535 ncols = ai[rip[k]+1] - ai[rip[k]]; 1536 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1537 ncols_upper = 0; 1538 for (j=0; j<ncols; j++){ 1539 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1540 if (riip[i] >= k){ /* only take upper triangular entry */ 1541 ajtmp[ncols_upper] = i; 1542 ncols_upper++; 1543 } 1544 } 1545 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1546 nzk += nlnk; 1547 1548 /* update lnk by computing fill-in for each pivot row to be merged in */ 1549 prow = jl[k]; /* 1st pivot row */ 1550 1551 while (prow < k){ 1552 nextprow = jl[prow]; 1553 1554 /* merge prow into k-th row */ 1555 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1556 jmax = ui[prow+1]; 1557 ncols = jmax-jmin; 1558 i = jmin - ui[prow]; 1559 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1560 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1561 j = *(uj - 1); 1562 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1563 nzk += nlnk; 1564 1565 /* update il and jl for prow */ 1566 if (jmin < jmax){ 1567 il[prow] = jmin; 1568 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1569 } 1570 prow = nextprow; 1571 } 1572 1573 /* if free space is not available, make more free space */ 1574 if (current_space->local_remaining<nzk) { 1575 i = am - k + 1; /* num of unfactored rows */ 1576 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1577 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1578 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1579 reallocs++; 1580 } 1581 1582 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1583 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1584 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1585 1586 /* add the k-th row into il and jl */ 1587 if (nzk > 1){ 1588 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1589 jl[k] = jl[i]; jl[i] = k; 1590 il[k] = ui[k] + 1; 1591 } 1592 uj_ptr[k] = current_space->array; 1593 uj_lvl_ptr[k] = current_space_lvl->array; 1594 1595 current_space->array += nzk; 1596 current_space->local_used += nzk; 1597 current_space->local_remaining -= nzk; 1598 1599 current_space_lvl->array += nzk; 1600 current_space_lvl->local_used += nzk; 1601 current_space_lvl->local_remaining -= nzk; 1602 1603 ui[k+1] = ui[k] + nzk; 1604 } 1605 1606 #if defined(PETSC_USE_INFO) 1607 if (ai[am] != 0) { 1608 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1609 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1610 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1611 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1612 } else { 1613 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1614 } 1615 #endif 1616 1617 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1618 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1619 ierr = PetscFree(jl);CHKERRQ(ierr); 1620 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1621 1622 /* destroy list of free space and other temporary array(s) */ 1623 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1624 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1625 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1626 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1627 1628 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1629 1630 /* put together the new matrix in MATSEQSBAIJ format */ 1631 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1632 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1633 B = *fact; 1634 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1635 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1636 1637 b = (Mat_SeqSBAIJ*)B->data; 1638 b->singlemalloc = PETSC_FALSE; 1639 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1640 b->j = uj; 1641 b->i = ui; 1642 b->diag = 0; 1643 b->ilen = 0; 1644 b->imax = 0; 1645 b->row = perm; 1646 b->col = perm; 1647 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1648 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1649 b->icol = iperm; 1650 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1651 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1652 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1653 b->maxnz = b->nz = ui[am]; 1654 b->free_a = PETSC_TRUE; 1655 b->free_ij = PETSC_TRUE; 1656 1657 B->factor = FACTOR_CHOLESKY; 1658 B->info.factor_mallocs = reallocs; 1659 B->info.fill_ratio_given = fill; 1660 if (ai[am] != 0) { 1661 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1662 } else { 1663 B->info.fill_ratio_needed = 0.0; 1664 } 1665 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1666 if (perm_identity){ 1667 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1668 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1669 } 1670 PetscFunctionReturn(0); 1671 } 1672 1673 #undef __FUNCT__ 1674 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1675 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1676 { 1677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1678 Mat_SeqSBAIJ *b; 1679 Mat B; 1680 PetscErrorCode ierr; 1681 PetscTruth perm_identity; 1682 PetscReal fill = info->fill; 1683 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1684 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1685 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1686 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1687 PetscBT lnkbt; 1688 IS iperm; 1689 1690 PetscFunctionBegin; 1691 if (A->rmap.n != A->cmap.n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap.n,A->cmap.n); 1692 /* check whether perm is the identity mapping */ 1693 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1694 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1695 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1696 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1697 1698 /* initialization */ 1699 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1700 ui[0] = 0; 1701 1702 /* jl: linked list for storing indices of the pivot rows 1703 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1704 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1705 il = jl + am; 1706 cols = il + am; 1707 ui_ptr = (PetscInt**)(cols + am); 1708 for (i=0; i<am; i++){ 1709 jl[i] = am; il[i] = 0; 1710 } 1711 1712 /* create and initialize a linked list for storing column indices of the active row k */ 1713 nlnk = am + 1; 1714 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1715 1716 /* initial FreeSpace size is fill*(ai[am]+1) */ 1717 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1718 current_space = free_space; 1719 1720 for (k=0; k<am; k++){ /* for each active row k */ 1721 /* initialize lnk by the column indices of row rip[k] of A */ 1722 nzk = 0; 1723 ncols = ai[rip[k]+1] - ai[rip[k]]; 1724 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1725 ncols_upper = 0; 1726 for (j=0; j<ncols; j++){ 1727 i = riip[*(aj + ai[rip[k]] + j)]; 1728 if (i >= k){ /* only take upper triangular entry */ 1729 cols[ncols_upper] = i; 1730 ncols_upper++; 1731 } 1732 } 1733 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1734 nzk += nlnk; 1735 1736 /* update lnk by computing fill-in for each pivot row to be merged in */ 1737 prow = jl[k]; /* 1st pivot row */ 1738 1739 while (prow < k){ 1740 nextprow = jl[prow]; 1741 /* merge prow into k-th row */ 1742 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1743 jmax = ui[prow+1]; 1744 ncols = jmax-jmin; 1745 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1746 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1747 nzk += nlnk; 1748 1749 /* update il and jl for prow */ 1750 if (jmin < jmax){ 1751 il[prow] = jmin; 1752 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1753 } 1754 prow = nextprow; 1755 } 1756 1757 /* if free space is not available, make more free space */ 1758 if (current_space->local_remaining<nzk) { 1759 i = am - k + 1; /* num of unfactored rows */ 1760 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1761 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1762 reallocs++; 1763 } 1764 1765 /* copy data into free space, then initialize lnk */ 1766 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1767 1768 /* add the k-th row into il and jl */ 1769 if (nzk-1 > 0){ 1770 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1771 jl[k] = jl[i]; jl[i] = k; 1772 il[k] = ui[k] + 1; 1773 } 1774 ui_ptr[k] = current_space->array; 1775 current_space->array += nzk; 1776 current_space->local_used += nzk; 1777 current_space->local_remaining -= nzk; 1778 1779 ui[k+1] = ui[k] + nzk; 1780 } 1781 1782 #if defined(PETSC_USE_INFO) 1783 if (ai[am] != 0) { 1784 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1785 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1786 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1787 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1788 } else { 1789 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1790 } 1791 #endif 1792 1793 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1794 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1795 ierr = PetscFree(jl);CHKERRQ(ierr); 1796 1797 /* destroy list of free space and other temporary array(s) */ 1798 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1799 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1800 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1801 1802 /* put together the new matrix in MATSEQSBAIJ format */ 1803 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1804 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1805 B = *fact; 1806 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1807 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1808 1809 b = (Mat_SeqSBAIJ*)B->data; 1810 b->singlemalloc = PETSC_FALSE; 1811 b->free_a = PETSC_TRUE; 1812 b->free_ij = PETSC_TRUE; 1813 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1814 b->j = uj; 1815 b->i = ui; 1816 b->diag = 0; 1817 b->ilen = 0; 1818 b->imax = 0; 1819 b->row = perm; 1820 b->col = perm; 1821 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1822 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1823 b->icol = iperm; 1824 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1825 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1826 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1827 b->maxnz = b->nz = ui[am]; 1828 1829 B->factor = FACTOR_CHOLESKY; 1830 B->info.factor_mallocs = reallocs; 1831 B->info.fill_ratio_given = fill; 1832 if (ai[am] != 0) { 1833 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1834 } else { 1835 B->info.fill_ratio_needed = 0.0; 1836 } 1837 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1838 if (perm_identity){ 1839 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1840 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1841 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1842 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1843 } else { 1844 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1; 1845 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1846 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1847 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1848 } 1849 PetscFunctionReturn(0); 1850 } 1851