1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->rmap.n; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 225 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 226 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 227 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 228 (*fact)->factor = FACTOR_LU; 229 (*fact)->assembled = PETSC_TRUE; 230 231 b = (Mat_SeqAIJ*)(*fact)->data; 232 b->freedata = PETSC_TRUE; 233 b->sorted = PETSC_FALSE; 234 b->singlemalloc = PETSC_FALSE; 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 252 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 253 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 254 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 255 256 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,**bi_ptr; 274 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr); 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 301 current_space = free_space; 302 303 for (i=0; i<n; i++) { 304 /* copy previous fill into linked list */ 305 nzi = 0; 306 nnz = ai[r[i]+1] - ai[r[i]]; 307 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 308 ajtmp = aj + ai[r[i]]; 309 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 310 nzi += nlnk; 311 312 /* add pivot rows into linked list */ 313 row = lnk[n]; 314 while (row < i) { 315 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 316 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 317 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 318 nzi += nlnk; 319 row = lnk[row]; 320 } 321 bi[i+1] = bi[i] + nzi; 322 im[i] = nzi; 323 324 /* mark bdiag */ 325 nzbd = 0; 326 nnz = nzi; 327 k = lnk[n]; 328 while (nnz-- && k < i){ 329 nzbd++; 330 k = lnk[k]; 331 } 332 bdiag[i] = bi[i] + nzbd; 333 334 /* if free space is not available, make more free space */ 335 if (current_space->local_remaining<nzi) { 336 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 337 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 338 reallocs++; 339 } 340 341 /* copy data into free space, then initialize lnk */ 342 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 343 bi_ptr[i] = current_space->array; 344 current_space->array += nzi; 345 current_space->local_used += nzi; 346 current_space->local_remaining -= nzi; 347 } 348 #if defined(PETSC_USE_INFO) 349 if (ai[n] != 0) { 350 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 351 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 352 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 353 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 354 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 355 } else { 356 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 357 } 358 #endif 359 360 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 361 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 362 363 /* destroy list of free space and other temporary array(s) */ 364 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 365 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 366 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 367 ierr = PetscFree(im);CHKERRQ(ierr); 368 369 /* put together the new matrix */ 370 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 371 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 372 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 373 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 374 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 375 b = (Mat_SeqAIJ*)(*B)->data; 376 b->freedata = PETSC_TRUE; 377 b->singlemalloc = PETSC_FALSE; 378 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 379 b->j = bj; 380 b->i = bi; 381 b->diag = bdiag; 382 b->ilen = 0; 383 b->imax = 0; 384 b->row = isrow; 385 b->col = iscol; 386 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 387 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 388 b->icol = isicol; 389 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 390 391 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 392 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 393 b->maxnz = b->nz = bi[n] ; 394 395 (*B)->factor = FACTOR_LU; 396 (*B)->info.factor_mallocs = reallocs; 397 (*B)->info.fill_ratio_given = f; 398 399 if (ai[n] != 0) { 400 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 401 } else { 402 (*B)->info.fill_ratio_needed = 0.0; 403 } 404 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 405 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 406 PetscFunctionReturn(0); 407 } 408 409 /* 410 Trouble in factorization, should we dump the original matrix? 411 */ 412 #undef __FUNCT__ 413 #define __FUNCT__ "MatFactorDumpMatrix" 414 PetscErrorCode MatFactorDumpMatrix(Mat A) 415 { 416 PetscErrorCode ierr; 417 PetscTruth flg; 418 419 PetscFunctionBegin; 420 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 421 if (flg) { 422 PetscViewer viewer; 423 char filename[PETSC_MAX_PATH_LEN]; 424 425 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 426 ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 427 ierr = MatView(A,viewer);CHKERRQ(ierr); 428 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 429 } 430 PetscFunctionReturn(0); 431 } 432 433 /* ----------------------------------------------------------- */ 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 436 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 437 { 438 Mat C=*B; 439 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 440 IS isrow = b->row,isicol = b->icol; 441 PetscErrorCode ierr; 442 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 443 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 444 PetscInt *diag_offset = b->diag,diag,*pj; 445 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 446 PetscScalar d; 447 PetscReal rs; 448 LUShift_Ctx sctx; 449 PetscInt newshift,*ddiag; 450 451 PetscFunctionBegin; 452 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 453 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 454 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 455 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 456 rtmps = rtmp; ics = ic; 457 458 sctx.shift_top = 0; 459 sctx.nshift_max = 0; 460 sctx.shift_lo = 0; 461 sctx.shift_hi = 0; 462 463 if (!a->diag) { 464 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 465 } 466 /* if both shift schemes are chosen by user, only use info->shiftpd */ 467 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 468 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 469 PetscInt *aai = a->i; 470 ddiag = a->diag; 471 sctx.shift_top = 0; 472 for (i=0; i<n; i++) { 473 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 474 d = (a->a)[ddiag[i]]; 475 rs = -PetscAbsScalar(d) - PetscRealPart(d); 476 v = a->a+aai[i]; 477 nz = aai[i+1] - aai[i]; 478 for (j=0; j<nz; j++) 479 rs += PetscAbsScalar(v[j]); 480 if (rs>sctx.shift_top) sctx.shift_top = rs; 481 } 482 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 483 sctx.shift_top *= 1.1; 484 sctx.nshift_max = 5; 485 sctx.shift_lo = 0.; 486 sctx.shift_hi = 1.; 487 } 488 489 sctx.shift_amount = 0; 490 sctx.nshift = 0; 491 do { 492 sctx.lushift = PETSC_FALSE; 493 for (i=0; i<n; i++){ 494 nz = bi[i+1] - bi[i]; 495 bjtmp = bj + bi[i]; 496 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 497 498 /* load in initial (unfactored row) */ 499 nz = a->i[r[i]+1] - a->i[r[i]]; 500 ajtmp = a->j + a->i[r[i]]; 501 v = a->a + a->i[r[i]]; 502 for (j=0; j<nz; j++) { 503 rtmp[ics[ajtmp[j]]] = v[j]; 504 } 505 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 506 507 row = *bjtmp++; 508 while (row < i) { 509 pc = rtmp + row; 510 if (*pc != 0.0) { 511 pv = b->a + diag_offset[row]; 512 pj = b->j + diag_offset[row] + 1; 513 multiplier = *pc / *pv++; 514 *pc = multiplier; 515 nz = bi[row+1] - diag_offset[row] - 1; 516 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 517 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 518 } 519 row = *bjtmp++; 520 } 521 /* finished row so stick it into b->a */ 522 pv = b->a + bi[i] ; 523 pj = b->j + bi[i] ; 524 nz = bi[i+1] - bi[i]; 525 diag = diag_offset[i] - bi[i]; 526 rs = 0.0; 527 for (j=0; j<nz; j++) { 528 pv[j] = rtmps[pj[j]]; 529 if (j != diag) rs += PetscAbsScalar(pv[j]); 530 } 531 532 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 533 sctx.rs = rs; 534 sctx.pv = pv[diag]; 535 ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr); 536 if (newshift == 1) break; 537 } 538 539 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 540 /* 541 * if no shift in this attempt & shifting & started shifting & can refine, 542 * then try lower shift 543 */ 544 sctx.shift_hi = info->shift_fraction; 545 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 546 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 547 sctx.lushift = PETSC_TRUE; 548 sctx.nshift++; 549 } 550 } while (sctx.lushift); 551 552 /* invert diagonal entries for simplier triangular solves */ 553 for (i=0; i<n; i++) { 554 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 555 } 556 557 ierr = PetscFree(rtmp);CHKERRQ(ierr); 558 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 559 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 560 C->factor = FACTOR_LU; 561 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 562 C->assembled = PETSC_TRUE; 563 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 564 if (sctx.nshift){ 565 if (info->shiftnz) { 566 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 567 } else if (info->shiftpd) { 568 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 569 } 570 } 571 PetscFunctionReturn(0); 572 } 573 574 #undef __FUNCT__ 575 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 576 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 577 { 578 PetscFunctionBegin; 579 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 580 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 581 PetscFunctionReturn(0); 582 } 583 584 585 /* ----------------------------------------------------------- */ 586 #undef __FUNCT__ 587 #define __FUNCT__ "MatLUFactor_SeqAIJ" 588 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 589 { 590 PetscErrorCode ierr; 591 Mat C; 592 593 PetscFunctionBegin; 594 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 595 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 596 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 597 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 598 PetscFunctionReturn(0); 599 } 600 /* ----------------------------------------------------------- */ 601 #undef __FUNCT__ 602 #define __FUNCT__ "MatSolve_SeqAIJ" 603 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 604 { 605 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 606 IS iscol = a->col,isrow = a->row; 607 PetscErrorCode ierr; 608 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 609 PetscInt nz,*rout,*cout; 610 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 611 612 PetscFunctionBegin; 613 if (!n) PetscFunctionReturn(0); 614 615 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 616 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 617 tmp = a->solve_work; 618 619 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 620 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 621 622 /* forward solve the lower triangular */ 623 tmp[0] = b[*r++]; 624 tmps = tmp; 625 for (i=1; i<n; i++) { 626 v = aa + ai[i] ; 627 vi = aj + ai[i] ; 628 nz = a->diag[i] - ai[i]; 629 sum = b[*r++]; 630 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 631 tmp[i] = sum; 632 } 633 634 /* backward solve the upper triangular */ 635 for (i=n-1; i>=0; i--){ 636 v = aa + a->diag[i] + 1; 637 vi = aj + a->diag[i] + 1; 638 nz = ai[i+1] - a->diag[i] - 1; 639 sum = tmp[i]; 640 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 641 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 642 } 643 644 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 645 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 646 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 647 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 648 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 649 PetscFunctionReturn(0); 650 } 651 652 #undef __FUNCT__ 653 #define __FUNCT__ "MatMatSolve_SeqAIJ" 654 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 655 { 656 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 657 IS iscol = a->col,isrow = a->row; 658 PetscErrorCode ierr; 659 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 660 PetscInt nz,*rout,*cout,neq; 661 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 662 663 PetscFunctionBegin; 664 if (!n) PetscFunctionReturn(0); 665 666 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 667 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 668 669 tmp = a->solve_work; 670 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 671 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 672 673 for (neq=0; neq<n; neq++){ 674 /* forward solve the lower triangular */ 675 tmp[0] = b[r[0]]; 676 tmps = tmp; 677 for (i=1; i<n; i++) { 678 v = aa + ai[i] ; 679 vi = aj + ai[i] ; 680 nz = a->diag[i] - ai[i]; 681 sum = b[r[i]]; 682 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 683 tmp[i] = sum; 684 } 685 /* backward solve the upper triangular */ 686 for (i=n-1; i>=0; i--){ 687 v = aa + a->diag[i] + 1; 688 vi = aj + a->diag[i] + 1; 689 nz = ai[i+1] - a->diag[i] - 1; 690 sum = tmp[i]; 691 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 692 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 693 } 694 695 b += n; 696 x += n; 697 } 698 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 699 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 700 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 701 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 702 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706 /* ----------------------------------------------------------- */ 707 #undef __FUNCT__ 708 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 709 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 710 { 711 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 712 PetscErrorCode ierr; 713 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 714 PetscScalar *x,*b,*aa = a->a; 715 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 716 PetscInt adiag_i,i,*vi,nz,ai_i; 717 PetscScalar *v,sum; 718 #endif 719 720 PetscFunctionBegin; 721 if (!n) PetscFunctionReturn(0); 722 723 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 724 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 725 726 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 727 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 728 #else 729 /* forward solve the lower triangular */ 730 x[0] = b[0]; 731 for (i=1; i<n; i++) { 732 ai_i = ai[i]; 733 v = aa + ai_i; 734 vi = aj + ai_i; 735 nz = adiag[i] - ai_i; 736 sum = b[i]; 737 while (nz--) sum -= *v++ * x[*vi++]; 738 x[i] = sum; 739 } 740 741 /* backward solve the upper triangular */ 742 for (i=n-1; i>=0; i--){ 743 adiag_i = adiag[i]; 744 v = aa + adiag_i + 1; 745 vi = aj + adiag_i + 1; 746 nz = ai[i+1] - adiag_i - 1; 747 sum = x[i]; 748 while (nz--) sum -= *v++ * x[*vi++]; 749 x[i] = sum*aa[adiag_i]; 750 } 751 #endif 752 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 753 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 754 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 755 PetscFunctionReturn(0); 756 } 757 758 #undef __FUNCT__ 759 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 760 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 761 { 762 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 763 IS iscol = a->col,isrow = a->row; 764 PetscErrorCode ierr; 765 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 766 PetscInt nz,*rout,*cout; 767 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 768 769 PetscFunctionBegin; 770 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 771 772 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 773 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 774 tmp = a->solve_work; 775 776 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 777 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 778 779 /* forward solve the lower triangular */ 780 tmp[0] = b[*r++]; 781 for (i=1; i<n; i++) { 782 v = aa + ai[i] ; 783 vi = aj + ai[i] ; 784 nz = a->diag[i] - ai[i]; 785 sum = b[*r++]; 786 while (nz--) sum -= *v++ * tmp[*vi++ ]; 787 tmp[i] = sum; 788 } 789 790 /* backward solve the upper triangular */ 791 for (i=n-1; i>=0; i--){ 792 v = aa + a->diag[i] + 1; 793 vi = aj + a->diag[i] + 1; 794 nz = ai[i+1] - a->diag[i] - 1; 795 sum = tmp[i]; 796 while (nz--) sum -= *v++ * tmp[*vi++ ]; 797 tmp[i] = sum*aa[a->diag[i]]; 798 x[*c--] += tmp[i]; 799 } 800 801 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 802 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 803 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 804 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 805 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 806 807 PetscFunctionReturn(0); 808 } 809 /* -------------------------------------------------------------------*/ 810 #undef __FUNCT__ 811 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 812 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 813 { 814 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 815 IS iscol = a->col,isrow = a->row; 816 PetscErrorCode ierr; 817 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 818 PetscInt nz,*rout,*cout,*diag = a->diag; 819 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 820 821 PetscFunctionBegin; 822 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 823 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 824 tmp = a->solve_work; 825 826 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 827 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 828 829 /* copy the b into temp work space according to permutation */ 830 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 831 832 /* forward solve the U^T */ 833 for (i=0; i<n; i++) { 834 v = aa + diag[i] ; 835 vi = aj + diag[i] + 1; 836 nz = ai[i+1] - diag[i] - 1; 837 s1 = tmp[i]; 838 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 839 while (nz--) { 840 tmp[*vi++ ] -= (*v++)*s1; 841 } 842 tmp[i] = s1; 843 } 844 845 /* backward solve the L^T */ 846 for (i=n-1; i>=0; i--){ 847 v = aa + diag[i] - 1 ; 848 vi = aj + diag[i] - 1 ; 849 nz = diag[i] - ai[i]; 850 s1 = tmp[i]; 851 while (nz--) { 852 tmp[*vi-- ] -= (*v--)*s1; 853 } 854 } 855 856 /* copy tmp into x according to permutation */ 857 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 858 859 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 860 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 861 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 862 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 863 864 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 865 PetscFunctionReturn(0); 866 } 867 868 #undef __FUNCT__ 869 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 870 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 871 { 872 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 873 IS iscol = a->col,isrow = a->row; 874 PetscErrorCode ierr; 875 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 876 PetscInt nz,*rout,*cout,*diag = a->diag; 877 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 878 879 PetscFunctionBegin; 880 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 881 882 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 883 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 884 tmp = a->solve_work; 885 886 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 887 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 888 889 /* copy the b into temp work space according to permutation */ 890 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 891 892 /* forward solve the U^T */ 893 for (i=0; i<n; i++) { 894 v = aa + diag[i] ; 895 vi = aj + diag[i] + 1; 896 nz = ai[i+1] - diag[i] - 1; 897 tmp[i] *= *v++; 898 while (nz--) { 899 tmp[*vi++ ] -= (*v++)*tmp[i]; 900 } 901 } 902 903 /* backward solve the L^T */ 904 for (i=n-1; i>=0; i--){ 905 v = aa + diag[i] - 1 ; 906 vi = aj + diag[i] - 1 ; 907 nz = diag[i] - ai[i]; 908 while (nz--) { 909 tmp[*vi-- ] -= (*v--)*tmp[i]; 910 } 911 } 912 913 /* copy tmp into x according to permutation */ 914 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 915 916 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 917 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 918 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 919 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 920 921 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 922 PetscFunctionReturn(0); 923 } 924 /* ----------------------------------------------------------------*/ 925 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 926 927 #undef __FUNCT__ 928 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 929 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 930 { 931 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 932 IS isicol; 933 PetscErrorCode ierr; 934 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j; 935 PetscInt *bi,*cols,nnz,*cols_lvl; 936 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 937 PetscInt i,levels,diagonal_fill; 938 PetscTruth col_identity,row_identity; 939 PetscReal f; 940 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 941 PetscBT lnkbt; 942 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 943 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 944 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 945 946 PetscFunctionBegin; 947 f = info->fill; 948 levels = (PetscInt)info->levels; 949 diagonal_fill = (PetscInt)info->diagonal_fill; 950 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 951 952 /* special case that simply copies fill pattern */ 953 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 954 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 955 if (!levels && row_identity && col_identity) { 956 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 957 (*fact)->factor = FACTOR_LU; 958 b = (Mat_SeqAIJ*)(*fact)->data; 959 if (!b->diag) { 960 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 961 } 962 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 963 b->row = isrow; 964 b->col = iscol; 965 b->icol = isicol; 966 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 967 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 968 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 969 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 970 PetscFunctionReturn(0); 971 } 972 973 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 974 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 975 976 /* get new row pointers */ 977 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 978 bi[0] = 0; 979 /* bdiag is location of diagonal in factor */ 980 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 981 bdiag[0] = 0; 982 983 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 984 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 985 986 /* create a linked list for storing column indices of the active row */ 987 nlnk = n + 1; 988 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 989 990 /* initial FreeSpace size is f*(ai[n]+1) */ 991 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 992 current_space = free_space; 993 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 994 current_space_lvl = free_space_lvl; 995 996 for (i=0; i<n; i++) { 997 nzi = 0; 998 /* copy current row into linked list */ 999 nnz = ai[r[i]+1] - ai[r[i]]; 1000 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1001 cols = aj + ai[r[i]]; 1002 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1003 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1004 nzi += nlnk; 1005 1006 /* make sure diagonal entry is included */ 1007 if (diagonal_fill && lnk[i] == -1) { 1008 fm = n; 1009 while (lnk[fm] < i) fm = lnk[fm]; 1010 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1011 lnk[fm] = i; 1012 lnk_lvl[i] = 0; 1013 nzi++; dcount++; 1014 } 1015 1016 /* add pivot rows into the active row */ 1017 nzbd = 0; 1018 prow = lnk[n]; 1019 while (prow < i) { 1020 nnz = bdiag[prow]; 1021 cols = bj_ptr[prow] + nnz + 1; 1022 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1023 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1024 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1025 nzi += nlnk; 1026 prow = lnk[prow]; 1027 nzbd++; 1028 } 1029 bdiag[i] = nzbd; 1030 bi[i+1] = bi[i] + nzi; 1031 1032 /* if free space is not available, make more free space */ 1033 if (current_space->local_remaining<nzi) { 1034 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1035 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1036 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1037 reallocs++; 1038 } 1039 1040 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1041 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1042 bj_ptr[i] = current_space->array; 1043 bjlvl_ptr[i] = current_space_lvl->array; 1044 1045 /* make sure the active row i has diagonal entry */ 1046 if (*(bj_ptr[i]+bdiag[i]) != i) { 1047 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1048 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1049 } 1050 1051 current_space->array += nzi; 1052 current_space->local_used += nzi; 1053 current_space->local_remaining -= nzi; 1054 current_space_lvl->array += nzi; 1055 current_space_lvl->local_used += nzi; 1056 current_space_lvl->local_remaining -= nzi; 1057 } 1058 1059 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1060 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1061 1062 /* destroy list of free space and other temporary arrays */ 1063 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1064 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1065 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1066 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1067 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1068 1069 #if defined(PETSC_USE_INFO) 1070 { 1071 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1072 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1073 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1074 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1075 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1076 if (diagonal_fill) { 1077 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1078 } 1079 } 1080 #endif 1081 1082 /* put together the new matrix */ 1083 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1084 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1085 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1086 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1087 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1088 b = (Mat_SeqAIJ*)(*fact)->data; 1089 b->freedata = PETSC_TRUE; 1090 b->singlemalloc = PETSC_FALSE; 1091 len = (bi[n] )*sizeof(PetscScalar); 1092 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1093 b->j = bj; 1094 b->i = bi; 1095 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1096 b->diag = bdiag; 1097 b->ilen = 0; 1098 b->imax = 0; 1099 b->row = isrow; 1100 b->col = iscol; 1101 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1102 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1103 b->icol = isicol; 1104 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1105 /* In b structure: Free imax, ilen, old a, old j. 1106 Allocate bdiag, solve_work, new a, new j */ 1107 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1108 b->maxnz = b->nz = bi[n] ; 1109 (*fact)->factor = FACTOR_LU; 1110 (*fact)->info.factor_mallocs = reallocs; 1111 (*fact)->info.fill_ratio_given = f; 1112 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1113 1114 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1115 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1116 1117 PetscFunctionReturn(0); 1118 } 1119 1120 #include "src/mat/impls/sbaij/seq/sbaij.h" 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1123 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1124 { 1125 Mat C = *B; 1126 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1127 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1128 IS ip=b->row; 1129 PetscErrorCode ierr; 1130 PetscInt *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1131 PetscInt *ai=a->i,*aj=a->j; 1132 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1133 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1134 PetscReal zeropivot,rs,shiftnz; 1135 PetscReal shiftpd; 1136 ChShift_Ctx sctx; 1137 PetscInt newshift; 1138 1139 PetscFunctionBegin; 1140 shiftnz = info->shiftnz; 1141 shiftpd = info->shiftpd; 1142 zeropivot = info->zeropivot; 1143 1144 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1145 1146 /* initialization */ 1147 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1148 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1149 jl = il + mbs; 1150 rtmp = (MatScalar*)(jl + mbs); 1151 1152 sctx.shift_amount = 0; 1153 sctx.nshift = 0; 1154 do { 1155 sctx.chshift = PETSC_FALSE; 1156 for (i=0; i<mbs; i++) { 1157 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1158 } 1159 1160 for (k = 0; k<mbs; k++){ 1161 bval = ba + bi[k]; 1162 /* initialize k-th row by the perm[k]-th row of A */ 1163 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1164 for (j = jmin; j < jmax; j++){ 1165 col = rip[aj[j]]; 1166 if (col >= k){ /* only take upper triangular entry */ 1167 rtmp[col] = aa[j]; 1168 *bval++ = 0.0; /* for in-place factorization */ 1169 } 1170 } 1171 /* shift the diagonal of the matrix */ 1172 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1173 1174 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1175 dk = rtmp[k]; 1176 i = jl[k]; /* first row to be added to k_th row */ 1177 1178 while (i < k){ 1179 nexti = jl[i]; /* next row to be added to k_th row */ 1180 1181 /* compute multiplier, update diag(k) and U(i,k) */ 1182 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1183 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1184 dk += uikdi*ba[ili]; 1185 ba[ili] = uikdi; /* -U(i,k) */ 1186 1187 /* add multiple of row i to k-th row */ 1188 jmin = ili + 1; jmax = bi[i+1]; 1189 if (jmin < jmax){ 1190 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1191 /* update il and jl for row i */ 1192 il[i] = jmin; 1193 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1194 } 1195 i = nexti; 1196 } 1197 1198 /* shift the diagonals when zero pivot is detected */ 1199 /* compute rs=sum of abs(off-diagonal) */ 1200 rs = 0.0; 1201 jmin = bi[k]+1; 1202 nz = bi[k+1] - jmin; 1203 if (nz){ 1204 bcol = bj + jmin; 1205 while (nz--){ 1206 rs += PetscAbsScalar(rtmp[*bcol]); 1207 bcol++; 1208 } 1209 } 1210 1211 sctx.rs = rs; 1212 sctx.pv = dk; 1213 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1214 if (newshift == 1) break; 1215 1216 /* copy data into U(k,:) */ 1217 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1218 jmin = bi[k]+1; jmax = bi[k+1]; 1219 if (jmin < jmax) { 1220 for (j=jmin; j<jmax; j++){ 1221 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1222 } 1223 /* add the k-th row into il and jl */ 1224 il[k] = jmin; 1225 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1226 } 1227 } 1228 } while (sctx.chshift); 1229 ierr = PetscFree(il);CHKERRQ(ierr); 1230 1231 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1232 C->factor = FACTOR_CHOLESKY; 1233 C->assembled = PETSC_TRUE; 1234 C->preallocated = PETSC_TRUE; 1235 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1236 if (sctx.nshift){ 1237 if (shiftnz) { 1238 ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1239 } else if (shiftpd) { 1240 ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1241 } 1242 } 1243 PetscFunctionReturn(0); 1244 } 1245 1246 #undef __FUNCT__ 1247 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1248 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1249 { 1250 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1251 Mat_SeqSBAIJ *b; 1252 Mat B; 1253 PetscErrorCode ierr; 1254 PetscTruth perm_identity; 1255 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1256 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1257 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1258 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1259 PetscReal fill=info->fill,levels=info->levels; 1260 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1261 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1262 PetscBT lnkbt; 1263 1264 PetscFunctionBegin; 1265 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1266 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1267 1268 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1269 ui[0] = 0; 1270 1271 /* special case that simply copies fill pattern */ 1272 if (!levels && perm_identity) { 1273 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1274 for (i=0; i<am; i++) { 1275 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1276 } 1277 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1278 cols = uj; 1279 for (i=0; i<am; i++) { 1280 aj = a->j + a->diag[i]; 1281 ncols = ui[i+1] - ui[i]; 1282 for (j=0; j<ncols; j++) *cols++ = *aj++; 1283 } 1284 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1285 /* initialization */ 1286 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1287 1288 /* jl: linked list for storing indices of the pivot rows 1289 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1290 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1291 il = jl + am; 1292 uj_ptr = (PetscInt**)(il + am); 1293 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1294 for (i=0; i<am; i++){ 1295 jl[i] = am; il[i] = 0; 1296 } 1297 1298 /* create and initialize a linked list for storing column indices of the active row k */ 1299 nlnk = am + 1; 1300 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1301 1302 /* initial FreeSpace size is fill*(ai[am]+1) */ 1303 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1304 current_space = free_space; 1305 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1306 current_space_lvl = free_space_lvl; 1307 1308 for (k=0; k<am; k++){ /* for each active row k */ 1309 /* initialize lnk by the column indices of row rip[k] of A */ 1310 nzk = 0; 1311 ncols = ai[rip[k]+1] - ai[rip[k]]; 1312 ncols_upper = 0; 1313 for (j=0; j<ncols; j++){ 1314 i = *(aj + ai[rip[k]] + j); 1315 if (rip[i] >= k){ /* only take upper triangular entry */ 1316 ajtmp[ncols_upper] = i; 1317 ncols_upper++; 1318 } 1319 } 1320 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1321 nzk += nlnk; 1322 1323 /* update lnk by computing fill-in for each pivot row to be merged in */ 1324 prow = jl[k]; /* 1st pivot row */ 1325 1326 while (prow < k){ 1327 nextprow = jl[prow]; 1328 1329 /* merge prow into k-th row */ 1330 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1331 jmax = ui[prow+1]; 1332 ncols = jmax-jmin; 1333 i = jmin - ui[prow]; 1334 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1335 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1336 j = *(uj - 1); 1337 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1338 nzk += nlnk; 1339 1340 /* update il and jl for prow */ 1341 if (jmin < jmax){ 1342 il[prow] = jmin; 1343 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1344 } 1345 prow = nextprow; 1346 } 1347 1348 /* if free space is not available, make more free space */ 1349 if (current_space->local_remaining<nzk) { 1350 i = am - k + 1; /* num of unfactored rows */ 1351 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1352 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1353 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1354 reallocs++; 1355 } 1356 1357 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1358 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1359 1360 /* add the k-th row into il and jl */ 1361 if (nzk > 1){ 1362 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1363 jl[k] = jl[i]; jl[i] = k; 1364 il[k] = ui[k] + 1; 1365 } 1366 uj_ptr[k] = current_space->array; 1367 uj_lvl_ptr[k] = current_space_lvl->array; 1368 1369 current_space->array += nzk; 1370 current_space->local_used += nzk; 1371 current_space->local_remaining -= nzk; 1372 1373 current_space_lvl->array += nzk; 1374 current_space_lvl->local_used += nzk; 1375 current_space_lvl->local_remaining -= nzk; 1376 1377 ui[k+1] = ui[k] + nzk; 1378 } 1379 1380 #if defined(PETSC_USE_INFO) 1381 if (ai[am] != 0) { 1382 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1383 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1384 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1385 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1386 } else { 1387 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1388 } 1389 #endif 1390 1391 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1392 ierr = PetscFree(jl);CHKERRQ(ierr); 1393 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1394 1395 /* destroy list of free space and other temporary array(s) */ 1396 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1397 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1398 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1399 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1400 1401 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1402 1403 /* put together the new matrix in MATSEQSBAIJ format */ 1404 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1405 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1406 B = *fact; 1407 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1408 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1409 1410 b = (Mat_SeqSBAIJ*)B->data; 1411 b->singlemalloc = PETSC_FALSE; 1412 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1413 b->j = uj; 1414 b->i = ui; 1415 b->diag = 0; 1416 b->ilen = 0; 1417 b->imax = 0; 1418 b->row = perm; 1419 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1420 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1421 b->icol = perm; 1422 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1423 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1424 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1425 b->maxnz = b->nz = ui[am]; 1426 b->freedata = PETSC_TRUE; 1427 1428 B->factor = FACTOR_CHOLESKY; 1429 B->info.factor_mallocs = reallocs; 1430 B->info.fill_ratio_given = fill; 1431 if (ai[am] != 0) { 1432 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1433 } else { 1434 B->info.fill_ratio_needed = 0.0; 1435 } 1436 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1437 if (perm_identity){ 1438 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1439 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1440 } 1441 PetscFunctionReturn(0); 1442 } 1443 1444 #undef __FUNCT__ 1445 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1446 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1447 { 1448 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1449 Mat_SeqSBAIJ *b; 1450 Mat B; 1451 PetscErrorCode ierr; 1452 PetscTruth perm_identity; 1453 PetscReal fill = info->fill; 1454 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1455 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1456 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1457 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1458 PetscBT lnkbt; 1459 IS iperm; 1460 1461 PetscFunctionBegin; 1462 /* check whether perm is the identity mapping */ 1463 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1464 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1465 1466 if (!perm_identity){ 1467 /* check if perm is symmetric! */ 1468 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1469 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1470 for (i=0; i<am; i++) { 1471 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1472 } 1473 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1474 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1475 } 1476 1477 /* initialization */ 1478 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1479 ui[0] = 0; 1480 1481 /* jl: linked list for storing indices of the pivot rows 1482 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1483 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1484 il = jl + am; 1485 cols = il + am; 1486 ui_ptr = (PetscInt**)(cols + am); 1487 for (i=0; i<am; i++){ 1488 jl[i] = am; il[i] = 0; 1489 } 1490 1491 /* create and initialize a linked list for storing column indices of the active row k */ 1492 nlnk = am + 1; 1493 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1494 1495 /* initial FreeSpace size is fill*(ai[am]+1) */ 1496 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1497 current_space = free_space; 1498 1499 for (k=0; k<am; k++){ /* for each active row k */ 1500 /* initialize lnk by the column indices of row rip[k] of A */ 1501 nzk = 0; 1502 ncols = ai[rip[k]+1] - ai[rip[k]]; 1503 ncols_upper = 0; 1504 for (j=0; j<ncols; j++){ 1505 i = rip[*(aj + ai[rip[k]] + j)]; 1506 if (i >= k){ /* only take upper triangular entry */ 1507 cols[ncols_upper] = i; 1508 ncols_upper++; 1509 } 1510 } 1511 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1512 nzk += nlnk; 1513 1514 /* update lnk by computing fill-in for each pivot row to be merged in */ 1515 prow = jl[k]; /* 1st pivot row */ 1516 1517 while (prow < k){ 1518 nextprow = jl[prow]; 1519 /* merge prow into k-th row */ 1520 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1521 jmax = ui[prow+1]; 1522 ncols = jmax-jmin; 1523 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1524 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1525 nzk += nlnk; 1526 1527 /* update il and jl for prow */ 1528 if (jmin < jmax){ 1529 il[prow] = jmin; 1530 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1531 } 1532 prow = nextprow; 1533 } 1534 1535 /* if free space is not available, make more free space */ 1536 if (current_space->local_remaining<nzk) { 1537 i = am - k + 1; /* num of unfactored rows */ 1538 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1539 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1540 reallocs++; 1541 } 1542 1543 /* copy data into free space, then initialize lnk */ 1544 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1545 1546 /* add the k-th row into il and jl */ 1547 if (nzk-1 > 0){ 1548 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1549 jl[k] = jl[i]; jl[i] = k; 1550 il[k] = ui[k] + 1; 1551 } 1552 ui_ptr[k] = current_space->array; 1553 current_space->array += nzk; 1554 current_space->local_used += nzk; 1555 current_space->local_remaining -= nzk; 1556 1557 ui[k+1] = ui[k] + nzk; 1558 } 1559 1560 #if defined(PETSC_USE_INFO) 1561 if (ai[am] != 0) { 1562 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1563 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1564 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1565 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1566 } else { 1567 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1568 } 1569 #endif 1570 1571 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1572 ierr = PetscFree(jl);CHKERRQ(ierr); 1573 1574 /* destroy list of free space and other temporary array(s) */ 1575 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1576 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1577 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1578 1579 /* put together the new matrix in MATSEQSBAIJ format */ 1580 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1581 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1582 B = *fact; 1583 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1584 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1585 1586 b = (Mat_SeqSBAIJ*)B->data; 1587 b->singlemalloc = PETSC_FALSE; 1588 b->freedata = PETSC_TRUE; 1589 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1590 b->j = uj; 1591 b->i = ui; 1592 b->diag = 0; 1593 b->ilen = 0; 1594 b->imax = 0; 1595 b->row = perm; 1596 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1597 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1598 b->icol = perm; 1599 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1600 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1601 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1602 b->maxnz = b->nz = ui[am]; 1603 1604 B->factor = FACTOR_CHOLESKY; 1605 B->info.factor_mallocs = reallocs; 1606 B->info.fill_ratio_given = fill; 1607 if (ai[am] != 0) { 1608 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1609 } else { 1610 B->info.fill_ratio_needed = 0.0; 1611 } 1612 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1613 if (perm_identity){ 1614 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1615 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1616 } 1617 PetscFunctionReturn(0); 1618 } 1619