1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat); 23 24 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 25 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 26 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 27 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 28 #endif 29 30 #undef __FUNCT__ 31 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 32 /* ------------------------------------------------------------ 33 34 This interface was contribed by Tony Caola 35 36 This routine is an interface to the pivoting drop-tolerance 37 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 38 SPARSEKIT2. 39 40 The SPARSEKIT2 routines used here are covered by the GNU 41 copyright; see the file gnu in this directory. 42 43 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 44 help in getting this routine ironed out. 45 46 The major drawback to this routine is that if info->fill is 47 not large enough it fails rather than allocating more space; 48 this can be fixed by hacking/improving the f2c version of 49 Yousef Saad's code. 50 51 ------------------------------------------------------------ 52 */ 53 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 54 { 55 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 56 PetscFunctionBegin; 57 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 58 You can obtain the drop tolerance routines by installing PETSc from\n\ 59 www.mcs.anl.gov/petsc\n"); 60 #else 61 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 62 IS iscolf,isicol,isirow; 63 PetscTruth reorder; 64 PetscErrorCode ierr,sierr; 65 PetscInt *c,*r,*ic,i,n = A->rmap.n; 66 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 67 PetscInt *ordcol,*iwk,*iperm,*jw; 68 PetscInt jmax,lfill,job,*o_i,*o_j; 69 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 70 PetscReal af; 71 72 PetscFunctionBegin; 73 74 if (info->dt == PETSC_DEFAULT) info->dt = .005; 75 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 76 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 77 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 78 lfill = (PetscInt)(info->dtcount/2.0); 79 jmax = (PetscInt)(info->fill*a->nz); 80 81 82 /* ------------------------------------------------------------ 83 If reorder=.TRUE., then the original matrix has to be 84 reordered to reflect the user selected ordering scheme, and 85 then de-reordered so it is in it's original format. 86 Because Saad's dperm() is NOT in place, we have to copy 87 the original matrix and allocate more storage. . . 88 ------------------------------------------------------------ 89 */ 90 91 /* set reorder to true if either isrow or iscol is not identity */ 92 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 93 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 94 reorder = PetscNot(reorder); 95 96 97 /* storage for ilu factor */ 98 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 100 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 101 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 102 103 /* ------------------------------------------------------------ 104 Make sure that everything is Fortran formatted (1-Based) 105 ------------------------------------------------------------ 106 */ 107 for (i=old_i[0];i<old_i[n];i++) { 108 old_j[i]++; 109 } 110 for(i=0;i<n+1;i++) { 111 old_i[i]++; 112 }; 113 114 115 if (reorder) { 116 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 117 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 r[i] = r[i]+1; 120 c[i] = c[i]+1; 121 } 122 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 123 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 124 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 125 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 126 for (i=0;i<n;i++) { 127 r[i] = r[i]-1; 128 c[i] = c[i]-1; 129 } 130 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 131 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 132 o_a = old_a2; 133 o_j = old_j2; 134 o_i = old_i2; 135 } else { 136 o_a = old_a; 137 o_j = old_j; 138 o_i = old_i; 139 } 140 141 /* ------------------------------------------------------------ 142 Call Saad's ilutp() routine to generate the factorization 143 ------------------------------------------------------------ 144 */ 145 146 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 147 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 148 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 149 150 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 151 if (sierr) { 152 switch (sierr) { 153 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 154 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 155 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 156 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 157 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 158 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 159 } 160 } 161 162 ierr = PetscFree(w);CHKERRQ(ierr); 163 ierr = PetscFree(jw);CHKERRQ(ierr); 164 165 /* ------------------------------------------------------------ 166 Saad's routine gives the result in Modified Sparse Row (msr) 167 Convert to Compressed Sparse Row format (csr) 168 ------------------------------------------------------------ 169 */ 170 171 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 173 174 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 175 176 ierr = PetscFree(iwk);CHKERRQ(ierr); 177 ierr = PetscFree(wk);CHKERRQ(ierr); 178 179 if (reorder) { 180 ierr = PetscFree(old_a2);CHKERRQ(ierr); 181 ierr = PetscFree(old_j2);CHKERRQ(ierr); 182 ierr = PetscFree(old_i2);CHKERRQ(ierr); 183 } else { 184 /* fix permutation of old_j that the factorization introduced */ 185 for (i=old_i[0]; i<old_i[n]; i++) { 186 old_j[i-1] = iperm[old_j[i-1]-1]; 187 } 188 } 189 190 /* get rid of the shift to indices starting at 1 */ 191 for (i=0; i<n+1; i++) { 192 old_i[i]--; 193 } 194 for (i=old_i[0];i<old_i[n];i++) { 195 old_j[i]--; 196 } 197 198 /* Make the factored matrix 0-based */ 199 for (i=0; i<n+1; i++) { 200 new_i[i]--; 201 } 202 for (i=new_i[0];i<new_i[n];i++) { 203 new_j[i]--; 204 } 205 206 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 207 /*-- permute the right-hand-side and solution vectors --*/ 208 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 209 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 210 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 211 for(i=0; i<n; i++) { 212 ordcol[i] = ic[iperm[i]-1]; 213 }; 214 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 215 ierr = ISDestroy(isicol);CHKERRQ(ierr); 216 217 ierr = PetscFree(iperm);CHKERRQ(ierr); 218 219 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 220 ierr = PetscFree(ordcol);CHKERRQ(ierr); 221 222 /*----- put together the new matrix -----*/ 223 224 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 225 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 226 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 227 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 228 (*fact)->factor = FACTOR_LU; 229 (*fact)->assembled = PETSC_TRUE; 230 231 b = (Mat_SeqAIJ*)(*fact)->data; 232 b->freedata = PETSC_TRUE; 233 b->sorted = PETSC_FALSE; 234 b->singlemalloc = PETSC_FALSE; 235 b->a = new_a; 236 b->j = new_j; 237 b->i = new_i; 238 b->ilen = 0; 239 b->imax = 0; 240 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 241 b->row = isirow; 242 b->col = iscolf; 243 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 244 b->maxnz = b->nz = new_i[n]; 245 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 246 (*fact)->info.factor_mallocs = 0; 247 248 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 249 250 af = ((double)b->nz)/((double)a->nz) + .001; 251 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 252 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 253 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 254 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 255 256 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 257 258 PetscFunctionReturn(0); 259 #endif 260 } 261 262 #undef __FUNCT__ 263 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 264 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 265 { 266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 267 IS isicol; 268 PetscErrorCode ierr; 269 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 270 PetscInt *bi,*bj,*ajtmp; 271 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 272 PetscReal f; 273 PetscInt nlnk,*lnk,k,**bi_ptr; 274 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 275 PetscBT lnkbt; 276 277 PetscFunctionBegin; 278 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 279 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 280 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 281 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 282 283 /* get new row pointers */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 285 bi[0] = 0; 286 287 /* bdiag is location of diagonal in factor */ 288 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 289 bdiag[0] = 0; 290 291 /* linked list for storing column indices of the active row */ 292 nlnk = n + 1; 293 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 294 295 ierr = PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);CHKERRQ(ierr); 296 bi_ptr = (PetscInt**)(im + n); 297 298 /* initial FreeSpace size is f*(ai[n]+1) */ 299 f = info->fill; 300 if (f < 1.0) f = 1.0; /* In case user input a wrong fill, reset it to 1.0 */ 301 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 302 current_space = free_space; 303 304 for (i=0; i<n; i++) { 305 /* copy previous fill into linked list */ 306 nzi = 0; 307 nnz = ai[r[i]+1] - ai[r[i]]; 308 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 309 ajtmp = aj + ai[r[i]]; 310 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 311 nzi += nlnk; 312 313 /* add pivot rows into linked list */ 314 row = lnk[n]; 315 while (row < i) { 316 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 317 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 318 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 319 nzi += nlnk; 320 row = lnk[row]; 321 } 322 bi[i+1] = bi[i] + nzi; 323 im[i] = nzi; 324 325 /* mark bdiag */ 326 nzbd = 0; 327 nnz = nzi; 328 k = lnk[n]; 329 while (nnz-- && k < i){ 330 nzbd++; 331 k = lnk[k]; 332 } 333 bdiag[i] = bi[i] + nzbd; 334 335 /* if free space is not available, make more free space */ 336 if (current_space->local_remaining<nzi) { 337 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 338 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 339 reallocs++; 340 } 341 342 /* copy data into free space, then initialize lnk */ 343 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 344 bi_ptr[i] = current_space->array; 345 current_space->array += nzi; 346 current_space->local_used += nzi; 347 current_space->local_remaining -= nzi; 348 } 349 #if defined(PETSC_USE_INFO) 350 if (ai[n] != 0) { 351 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 352 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 353 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 354 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 355 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 356 } else { 357 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 358 } 359 #endif 360 361 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 362 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 363 364 /* destroy list of free space and other temporary array(s) */ 365 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 366 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 367 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 368 ierr = PetscFree(im);CHKERRQ(ierr); 369 370 /* put together the new matrix */ 371 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 372 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 373 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 374 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 375 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 376 b = (Mat_SeqAIJ*)(*B)->data; 377 b->freedata = PETSC_TRUE; 378 b->singlemalloc = PETSC_FALSE; 379 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 380 b->j = bj; 381 b->i = bi; 382 b->diag = bdiag; 383 b->ilen = 0; 384 b->imax = 0; 385 b->row = isrow; 386 b->col = iscol; 387 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 388 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 389 b->icol = isicol; 390 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 391 392 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 393 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 394 b->maxnz = b->nz = bi[n] ; 395 396 (*B)->factor = FACTOR_LU; 397 (*B)->info.factor_mallocs = reallocs; 398 (*B)->info.fill_ratio_given = f; 399 400 if (ai[n] != 0) { 401 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 402 } else { 403 (*B)->info.fill_ratio_needed = 0.0; 404 } 405 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 406 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 407 PetscFunctionReturn(0); 408 } 409 410 /* 411 Trouble in factorization, should we dump the original matrix? 412 */ 413 #undef __FUNCT__ 414 #define __FUNCT__ "MatFactorDumpMatrix" 415 PetscErrorCode MatFactorDumpMatrix(Mat A) 416 { 417 PetscErrorCode ierr; 418 PetscTruth flg; 419 420 PetscFunctionBegin; 421 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 422 if (flg) { 423 PetscViewer viewer; 424 char filename[PETSC_MAX_PATH_LEN]; 425 426 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 427 ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 428 ierr = MatView(A,viewer);CHKERRQ(ierr); 429 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 430 } 431 PetscFunctionReturn(0); 432 } 433 434 /* ----------------------------------------------------------- */ 435 #undef __FUNCT__ 436 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 437 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 438 { 439 Mat C=*B; 440 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 441 IS isrow = b->row,isicol = b->icol; 442 PetscErrorCode ierr; 443 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 444 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 445 PetscInt *diag_offset = b->diag,diag,*pj; 446 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 447 PetscScalar d; 448 PetscReal rs; 449 LUShift_Ctx sctx; 450 PetscInt newshift,*ddiag; 451 452 PetscFunctionBegin; 453 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 454 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 455 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 456 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 457 rtmps = rtmp; ics = ic; 458 459 sctx.shift_top = 0; 460 sctx.nshift_max = 0; 461 sctx.shift_lo = 0; 462 sctx.shift_hi = 0; 463 464 if (!a->diag) { 465 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 466 } 467 /* if both shift schemes are chosen by user, only use info->shiftpd */ 468 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 469 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 470 PetscInt *aai = a->i; 471 ddiag = a->diag; 472 sctx.shift_top = 0; 473 for (i=0; i<n; i++) { 474 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 475 d = (a->a)[ddiag[i]]; 476 rs = -PetscAbsScalar(d) - PetscRealPart(d); 477 v = a->a+aai[i]; 478 nz = aai[i+1] - aai[i]; 479 for (j=0; j<nz; j++) 480 rs += PetscAbsScalar(v[j]); 481 if (rs>sctx.shift_top) sctx.shift_top = rs; 482 } 483 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 484 sctx.shift_top *= 1.1; 485 sctx.nshift_max = 5; 486 sctx.shift_lo = 0.; 487 sctx.shift_hi = 1.; 488 } 489 490 sctx.shift_amount = 0; 491 sctx.nshift = 0; 492 do { 493 sctx.lushift = PETSC_FALSE; 494 for (i=0; i<n; i++){ 495 nz = bi[i+1] - bi[i]; 496 bjtmp = bj + bi[i]; 497 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 498 499 /* load in initial (unfactored row) */ 500 nz = a->i[r[i]+1] - a->i[r[i]]; 501 ajtmp = a->j + a->i[r[i]]; 502 v = a->a + a->i[r[i]]; 503 for (j=0; j<nz; j++) { 504 rtmp[ics[ajtmp[j]]] = v[j]; 505 } 506 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 507 508 row = *bjtmp++; 509 while (row < i) { 510 pc = rtmp + row; 511 if (*pc != 0.0) { 512 pv = b->a + diag_offset[row]; 513 pj = b->j + diag_offset[row] + 1; 514 multiplier = *pc / *pv++; 515 *pc = multiplier; 516 nz = bi[row+1] - diag_offset[row] - 1; 517 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 518 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 519 } 520 row = *bjtmp++; 521 } 522 /* finished row so stick it into b->a */ 523 pv = b->a + bi[i] ; 524 pj = b->j + bi[i] ; 525 nz = bi[i+1] - bi[i]; 526 diag = diag_offset[i] - bi[i]; 527 rs = 0.0; 528 for (j=0; j<nz; j++) { 529 pv[j] = rtmps[pj[j]]; 530 if (j != diag) rs += PetscAbsScalar(pv[j]); 531 } 532 533 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 534 sctx.rs = rs; 535 sctx.pv = pv[diag]; 536 ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr); 537 if (newshift == 1) break; 538 } 539 540 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 541 /* 542 * if no shift in this attempt & shifting & started shifting & can refine, 543 * then try lower shift 544 */ 545 sctx.shift_hi = info->shift_fraction; 546 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 547 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 548 sctx.lushift = PETSC_TRUE; 549 sctx.nshift++; 550 } 551 } while (sctx.lushift); 552 553 /* invert diagonal entries for simplier triangular solves */ 554 for (i=0; i<n; i++) { 555 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 556 } 557 558 ierr = PetscFree(rtmp);CHKERRQ(ierr); 559 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 560 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 561 C->factor = FACTOR_LU; 562 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 563 C->assembled = PETSC_TRUE; 564 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 565 if (sctx.nshift){ 566 if (info->shiftnz) { 567 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 568 } else if (info->shiftpd) { 569 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 570 } 571 } 572 PetscFunctionReturn(0); 573 } 574 575 #undef __FUNCT__ 576 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 577 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 578 { 579 PetscFunctionBegin; 580 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 581 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 582 PetscFunctionReturn(0); 583 } 584 585 586 /* ----------------------------------------------------------- */ 587 #undef __FUNCT__ 588 #define __FUNCT__ "MatLUFactor_SeqAIJ" 589 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 590 { 591 PetscErrorCode ierr; 592 Mat C; 593 594 PetscFunctionBegin; 595 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 596 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 597 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 598 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 599 PetscFunctionReturn(0); 600 } 601 /* ----------------------------------------------------------- */ 602 #undef __FUNCT__ 603 #define __FUNCT__ "MatSolve_SeqAIJ" 604 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 605 { 606 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 607 IS iscol = a->col,isrow = a->row; 608 PetscErrorCode ierr; 609 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 610 PetscInt nz,*rout,*cout; 611 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 612 613 PetscFunctionBegin; 614 if (!n) PetscFunctionReturn(0); 615 616 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 617 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 618 tmp = a->solve_work; 619 620 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 621 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 622 623 /* forward solve the lower triangular */ 624 tmp[0] = b[*r++]; 625 tmps = tmp; 626 for (i=1; i<n; i++) { 627 v = aa + ai[i] ; 628 vi = aj + ai[i] ; 629 nz = a->diag[i] - ai[i]; 630 sum = b[*r++]; 631 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 632 tmp[i] = sum; 633 } 634 635 /* backward solve the upper triangular */ 636 for (i=n-1; i>=0; i--){ 637 v = aa + a->diag[i] + 1; 638 vi = aj + a->diag[i] + 1; 639 nz = ai[i+1] - a->diag[i] - 1; 640 sum = tmp[i]; 641 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 642 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 643 } 644 645 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 646 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 647 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 648 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 649 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 650 PetscFunctionReturn(0); 651 } 652 653 #undef __FUNCT__ 654 #define __FUNCT__ "MatMatSolve_SeqAIJ" 655 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 656 { 657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 658 IS iscol = a->col,isrow = a->row; 659 PetscErrorCode ierr; 660 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 661 PetscInt nz,*rout,*cout,neq; 662 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 663 664 PetscFunctionBegin; 665 if (!n) PetscFunctionReturn(0); 666 667 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 668 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 669 670 tmp = a->solve_work; 671 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 672 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 673 674 for (neq=0; neq<n; neq++){ 675 /* forward solve the lower triangular */ 676 tmp[0] = b[r[0]]; 677 tmps = tmp; 678 for (i=1; i<n; i++) { 679 v = aa + ai[i] ; 680 vi = aj + ai[i] ; 681 nz = a->diag[i] - ai[i]; 682 sum = b[r[i]]; 683 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 684 tmp[i] = sum; 685 } 686 /* backward solve the upper triangular */ 687 for (i=n-1; i>=0; i--){ 688 v = aa + a->diag[i] + 1; 689 vi = aj + a->diag[i] + 1; 690 nz = ai[i+1] - a->diag[i] - 1; 691 sum = tmp[i]; 692 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 693 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 694 } 695 696 b += n; 697 x += n; 698 } 699 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 700 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 701 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 702 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 703 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 704 PetscFunctionReturn(0); 705 } 706 707 /* ----------------------------------------------------------- */ 708 #undef __FUNCT__ 709 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 710 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 711 { 712 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 713 PetscErrorCode ierr; 714 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 715 PetscScalar *x,*b,*aa = a->a; 716 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 717 PetscInt adiag_i,i,*vi,nz,ai_i; 718 PetscScalar *v,sum; 719 #endif 720 721 PetscFunctionBegin; 722 if (!n) PetscFunctionReturn(0); 723 724 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 725 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 726 727 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 728 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 729 #else 730 /* forward solve the lower triangular */ 731 x[0] = b[0]; 732 for (i=1; i<n; i++) { 733 ai_i = ai[i]; 734 v = aa + ai_i; 735 vi = aj + ai_i; 736 nz = adiag[i] - ai_i; 737 sum = b[i]; 738 while (nz--) sum -= *v++ * x[*vi++]; 739 x[i] = sum; 740 } 741 742 /* backward solve the upper triangular */ 743 for (i=n-1; i>=0; i--){ 744 adiag_i = adiag[i]; 745 v = aa + adiag_i + 1; 746 vi = aj + adiag_i + 1; 747 nz = ai[i+1] - adiag_i - 1; 748 sum = x[i]; 749 while (nz--) sum -= *v++ * x[*vi++]; 750 x[i] = sum*aa[adiag_i]; 751 } 752 #endif 753 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 754 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 755 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 756 PetscFunctionReturn(0); 757 } 758 759 #undef __FUNCT__ 760 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 761 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 762 { 763 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 764 IS iscol = a->col,isrow = a->row; 765 PetscErrorCode ierr; 766 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 767 PetscInt nz,*rout,*cout; 768 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 769 770 PetscFunctionBegin; 771 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 772 773 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 774 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 775 tmp = a->solve_work; 776 777 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 778 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 779 780 /* forward solve the lower triangular */ 781 tmp[0] = b[*r++]; 782 for (i=1; i<n; i++) { 783 v = aa + ai[i] ; 784 vi = aj + ai[i] ; 785 nz = a->diag[i] - ai[i]; 786 sum = b[*r++]; 787 while (nz--) sum -= *v++ * tmp[*vi++ ]; 788 tmp[i] = sum; 789 } 790 791 /* backward solve the upper triangular */ 792 for (i=n-1; i>=0; i--){ 793 v = aa + a->diag[i] + 1; 794 vi = aj + a->diag[i] + 1; 795 nz = ai[i+1] - a->diag[i] - 1; 796 sum = tmp[i]; 797 while (nz--) sum -= *v++ * tmp[*vi++ ]; 798 tmp[i] = sum*aa[a->diag[i]]; 799 x[*c--] += tmp[i]; 800 } 801 802 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 803 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 804 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 805 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 806 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 807 808 PetscFunctionReturn(0); 809 } 810 /* -------------------------------------------------------------------*/ 811 #undef __FUNCT__ 812 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 813 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 814 { 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 816 IS iscol = a->col,isrow = a->row; 817 PetscErrorCode ierr; 818 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 819 PetscInt nz,*rout,*cout,*diag = a->diag; 820 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 821 822 PetscFunctionBegin; 823 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 824 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 825 tmp = a->solve_work; 826 827 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 828 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 829 830 /* copy the b into temp work space according to permutation */ 831 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 832 833 /* forward solve the U^T */ 834 for (i=0; i<n; i++) { 835 v = aa + diag[i] ; 836 vi = aj + diag[i] + 1; 837 nz = ai[i+1] - diag[i] - 1; 838 s1 = tmp[i]; 839 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 840 while (nz--) { 841 tmp[*vi++ ] -= (*v++)*s1; 842 } 843 tmp[i] = s1; 844 } 845 846 /* backward solve the L^T */ 847 for (i=n-1; i>=0; i--){ 848 v = aa + diag[i] - 1 ; 849 vi = aj + diag[i] - 1 ; 850 nz = diag[i] - ai[i]; 851 s1 = tmp[i]; 852 while (nz--) { 853 tmp[*vi-- ] -= (*v--)*s1; 854 } 855 } 856 857 /* copy tmp into x according to permutation */ 858 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 859 860 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 861 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 862 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 863 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 864 865 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 866 PetscFunctionReturn(0); 867 } 868 869 #undef __FUNCT__ 870 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 871 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 872 { 873 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 874 IS iscol = a->col,isrow = a->row; 875 PetscErrorCode ierr; 876 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 877 PetscInt nz,*rout,*cout,*diag = a->diag; 878 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 879 880 PetscFunctionBegin; 881 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 882 883 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 884 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 885 tmp = a->solve_work; 886 887 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 888 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 889 890 /* copy the b into temp work space according to permutation */ 891 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 892 893 /* forward solve the U^T */ 894 for (i=0; i<n; i++) { 895 v = aa + diag[i] ; 896 vi = aj + diag[i] + 1; 897 nz = ai[i+1] - diag[i] - 1; 898 tmp[i] *= *v++; 899 while (nz--) { 900 tmp[*vi++ ] -= (*v++)*tmp[i]; 901 } 902 } 903 904 /* backward solve the L^T */ 905 for (i=n-1; i>=0; i--){ 906 v = aa + diag[i] - 1 ; 907 vi = aj + diag[i] - 1 ; 908 nz = diag[i] - ai[i]; 909 while (nz--) { 910 tmp[*vi-- ] -= (*v--)*tmp[i]; 911 } 912 } 913 914 /* copy tmp into x according to permutation */ 915 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 916 917 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 918 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 919 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 920 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 921 922 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 923 PetscFunctionReturn(0); 924 } 925 /* ----------------------------------------------------------------*/ 926 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat); 927 928 #undef __FUNCT__ 929 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 930 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 931 { 932 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 933 IS isicol; 934 PetscErrorCode ierr; 935 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j; 936 PetscInt *bi,*cols,nnz,*cols_lvl; 937 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 938 PetscInt i,levels,diagonal_fill; 939 PetscTruth col_identity,row_identity; 940 PetscReal f; 941 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 942 PetscBT lnkbt; 943 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 944 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 945 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 946 947 PetscFunctionBegin; 948 f = info->fill; 949 if (f < 1.0) {f = info->fill = 1.0;} 950 levels = (PetscInt)info->levels; 951 diagonal_fill = (PetscInt)info->diagonal_fill; 952 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 953 954 /* special case that simply copies fill pattern */ 955 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 956 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 957 if (!levels && row_identity && col_identity) { 958 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 959 (*fact)->factor = FACTOR_LU; 960 (*fact)->info.factor_mallocs = 0; 961 (*fact)->info.fill_ratio_given = info->fill; 962 (*fact)->info.fill_ratio_needed = 1.0; 963 b = (Mat_SeqAIJ*)(*fact)->data; 964 if (!b->diag) { 965 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 966 } 967 ierr = MatMissingDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 968 b->row = isrow; 969 b->col = iscol; 970 b->icol = isicol; 971 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 972 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 973 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 974 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 979 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 980 981 /* get new row pointers */ 982 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 983 bi[0] = 0; 984 /* bdiag is location of diagonal in factor */ 985 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 986 bdiag[0] = 0; 987 988 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 989 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 990 991 /* create a linked list for storing column indices of the active row */ 992 nlnk = n + 1; 993 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 994 995 /* initial FreeSpace size is f*(ai[n]+1) */ 996 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 997 current_space = free_space; 998 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 999 current_space_lvl = free_space_lvl; 1000 1001 for (i=0; i<n; i++) { 1002 nzi = 0; 1003 /* copy current row into linked list */ 1004 nnz = ai[r[i]+1] - ai[r[i]]; 1005 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1006 cols = aj + ai[r[i]]; 1007 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1008 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1009 nzi += nlnk; 1010 1011 /* make sure diagonal entry is included */ 1012 if (diagonal_fill && lnk[i] == -1) { 1013 fm = n; 1014 while (lnk[fm] < i) fm = lnk[fm]; 1015 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1016 lnk[fm] = i; 1017 lnk_lvl[i] = 0; 1018 nzi++; dcount++; 1019 } 1020 1021 /* add pivot rows into the active row */ 1022 nzbd = 0; 1023 prow = lnk[n]; 1024 while (prow < i) { 1025 nnz = bdiag[prow]; 1026 cols = bj_ptr[prow] + nnz + 1; 1027 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1028 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1029 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1030 nzi += nlnk; 1031 prow = lnk[prow]; 1032 nzbd++; 1033 } 1034 bdiag[i] = nzbd; 1035 bi[i+1] = bi[i] + nzi; 1036 1037 /* if free space is not available, make more free space */ 1038 if (current_space->local_remaining<nzi) { 1039 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1040 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1041 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1042 reallocs++; 1043 } 1044 1045 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1046 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1047 bj_ptr[i] = current_space->array; 1048 bjlvl_ptr[i] = current_space_lvl->array; 1049 1050 /* make sure the active row i has diagonal entry */ 1051 if (*(bj_ptr[i]+bdiag[i]) != i) { 1052 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1053 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1054 } 1055 1056 current_space->array += nzi; 1057 current_space->local_used += nzi; 1058 current_space->local_remaining -= nzi; 1059 current_space_lvl->array += nzi; 1060 current_space_lvl->local_used += nzi; 1061 current_space_lvl->local_remaining -= nzi; 1062 } 1063 1064 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1065 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1066 1067 /* destroy list of free space and other temporary arrays */ 1068 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1069 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1070 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1071 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1072 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1073 1074 #if defined(PETSC_USE_INFO) 1075 { 1076 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1077 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1078 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1079 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1080 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1081 if (diagonal_fill) { 1082 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1083 } 1084 } 1085 #endif 1086 1087 /* put together the new matrix */ 1088 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1089 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1090 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1091 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1092 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1093 b = (Mat_SeqAIJ*)(*fact)->data; 1094 b->freedata = PETSC_TRUE; 1095 b->singlemalloc = PETSC_FALSE; 1096 len = (bi[n] )*sizeof(PetscScalar); 1097 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1098 b->j = bj; 1099 b->i = bi; 1100 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1101 b->diag = bdiag; 1102 b->ilen = 0; 1103 b->imax = 0; 1104 b->row = isrow; 1105 b->col = iscol; 1106 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1107 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1108 b->icol = isicol; 1109 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1110 /* In b structure: Free imax, ilen, old a, old j. 1111 Allocate bdiag, solve_work, new a, new j */ 1112 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1113 b->maxnz = b->nz = bi[n] ; 1114 (*fact)->factor = FACTOR_LU; 1115 (*fact)->info.factor_mallocs = reallocs; 1116 (*fact)->info.fill_ratio_given = f; 1117 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1118 1119 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1120 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1121 1122 PetscFunctionReturn(0); 1123 } 1124 1125 #include "src/mat/impls/sbaij/seq/sbaij.h" 1126 #undef __FUNCT__ 1127 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1128 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1129 { 1130 Mat C = *B; 1131 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1132 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1133 IS ip=b->row; 1134 PetscErrorCode ierr; 1135 PetscInt *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1136 PetscInt *ai=a->i,*aj=a->j; 1137 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1138 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1139 PetscReal zeropivot,rs,shiftnz; 1140 PetscReal shiftpd; 1141 ChShift_Ctx sctx; 1142 PetscInt newshift; 1143 1144 PetscFunctionBegin; 1145 shiftnz = info->shiftnz; 1146 shiftpd = info->shiftpd; 1147 zeropivot = info->zeropivot; 1148 1149 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1150 1151 /* initialization */ 1152 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1153 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1154 jl = il + mbs; 1155 rtmp = (MatScalar*)(jl + mbs); 1156 1157 sctx.shift_amount = 0; 1158 sctx.nshift = 0; 1159 do { 1160 sctx.chshift = PETSC_FALSE; 1161 for (i=0; i<mbs; i++) { 1162 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1163 } 1164 1165 for (k = 0; k<mbs; k++){ 1166 bval = ba + bi[k]; 1167 /* initialize k-th row by the perm[k]-th row of A */ 1168 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1169 for (j = jmin; j < jmax; j++){ 1170 col = rip[aj[j]]; 1171 if (col >= k){ /* only take upper triangular entry */ 1172 rtmp[col] = aa[j]; 1173 *bval++ = 0.0; /* for in-place factorization */ 1174 } 1175 } 1176 /* shift the diagonal of the matrix */ 1177 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1178 1179 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1180 dk = rtmp[k]; 1181 i = jl[k]; /* first row to be added to k_th row */ 1182 1183 while (i < k){ 1184 nexti = jl[i]; /* next row to be added to k_th row */ 1185 1186 /* compute multiplier, update diag(k) and U(i,k) */ 1187 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1188 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1189 dk += uikdi*ba[ili]; 1190 ba[ili] = uikdi; /* -U(i,k) */ 1191 1192 /* add multiple of row i to k-th row */ 1193 jmin = ili + 1; jmax = bi[i+1]; 1194 if (jmin < jmax){ 1195 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1196 /* update il and jl for row i */ 1197 il[i] = jmin; 1198 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1199 } 1200 i = nexti; 1201 } 1202 1203 /* shift the diagonals when zero pivot is detected */ 1204 /* compute rs=sum of abs(off-diagonal) */ 1205 rs = 0.0; 1206 jmin = bi[k]+1; 1207 nz = bi[k+1] - jmin; 1208 if (nz){ 1209 bcol = bj + jmin; 1210 while (nz--){ 1211 rs += PetscAbsScalar(rtmp[*bcol]); 1212 bcol++; 1213 } 1214 } 1215 1216 sctx.rs = rs; 1217 sctx.pv = dk; 1218 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1219 if (newshift == 1) break; 1220 1221 /* copy data into U(k,:) */ 1222 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1223 jmin = bi[k]+1; jmax = bi[k+1]; 1224 if (jmin < jmax) { 1225 for (j=jmin; j<jmax; j++){ 1226 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1227 } 1228 /* add the k-th row into il and jl */ 1229 il[k] = jmin; 1230 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1231 } 1232 } 1233 } while (sctx.chshift); 1234 ierr = PetscFree(il);CHKERRQ(ierr); 1235 1236 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1237 C->factor = FACTOR_CHOLESKY; 1238 C->assembled = PETSC_TRUE; 1239 C->preallocated = PETSC_TRUE; 1240 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1241 if (sctx.nshift){ 1242 if (shiftnz) { 1243 ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1244 } else if (shiftpd) { 1245 ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1246 } 1247 } 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1253 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1254 { 1255 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1256 Mat_SeqSBAIJ *b; 1257 Mat B; 1258 PetscErrorCode ierr; 1259 PetscTruth perm_identity; 1260 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1261 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1262 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1263 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1264 PetscReal fill=info->fill,levels=info->levels; 1265 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1266 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1267 PetscBT lnkbt; 1268 1269 PetscFunctionBegin; 1270 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1271 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1272 1273 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1274 ui[0] = 0; 1275 1276 /* special case that simply copies fill pattern */ 1277 if (!levels && perm_identity) { 1278 ierr = MatMarkDiagonal_SeqAIJ(A);CHKERRQ(ierr); 1279 for (i=0; i<am; i++) { 1280 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1281 } 1282 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1283 cols = uj; 1284 for (i=0; i<am; i++) { 1285 aj = a->j + a->diag[i]; 1286 ncols = ui[i+1] - ui[i]; 1287 for (j=0; j<ncols; j++) *cols++ = *aj++; 1288 } 1289 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1290 /* initialization */ 1291 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1292 1293 /* jl: linked list for storing indices of the pivot rows 1294 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1295 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1296 il = jl + am; 1297 uj_ptr = (PetscInt**)(il + am); 1298 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1299 for (i=0; i<am; i++){ 1300 jl[i] = am; il[i] = 0; 1301 } 1302 1303 /* create and initialize a linked list for storing column indices of the active row k */ 1304 nlnk = am + 1; 1305 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1306 1307 /* initial FreeSpace size is fill*(ai[am]+1) */ 1308 if (fill < 1.0) fill = 1.0; 1309 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1310 current_space = free_space; 1311 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1312 current_space_lvl = free_space_lvl; 1313 1314 for (k=0; k<am; k++){ /* for each active row k */ 1315 /* initialize lnk by the column indices of row rip[k] of A */ 1316 nzk = 0; 1317 ncols = ai[rip[k]+1] - ai[rip[k]]; 1318 ncols_upper = 0; 1319 for (j=0; j<ncols; j++){ 1320 i = *(aj + ai[rip[k]] + j); 1321 if (rip[i] >= k){ /* only take upper triangular entry */ 1322 ajtmp[ncols_upper] = i; 1323 ncols_upper++; 1324 } 1325 } 1326 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1327 nzk += nlnk; 1328 1329 /* update lnk by computing fill-in for each pivot row to be merged in */ 1330 prow = jl[k]; /* 1st pivot row */ 1331 1332 while (prow < k){ 1333 nextprow = jl[prow]; 1334 1335 /* merge prow into k-th row */ 1336 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1337 jmax = ui[prow+1]; 1338 ncols = jmax-jmin; 1339 i = jmin - ui[prow]; 1340 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1341 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1342 j = *(uj - 1); 1343 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1344 nzk += nlnk; 1345 1346 /* update il and jl for prow */ 1347 if (jmin < jmax){ 1348 il[prow] = jmin; 1349 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1350 } 1351 prow = nextprow; 1352 } 1353 1354 /* if free space is not available, make more free space */ 1355 if (current_space->local_remaining<nzk) { 1356 i = am - k + 1; /* num of unfactored rows */ 1357 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1358 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1359 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1360 reallocs++; 1361 } 1362 1363 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1364 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1365 1366 /* add the k-th row into il and jl */ 1367 if (nzk > 1){ 1368 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1369 jl[k] = jl[i]; jl[i] = k; 1370 il[k] = ui[k] + 1; 1371 } 1372 uj_ptr[k] = current_space->array; 1373 uj_lvl_ptr[k] = current_space_lvl->array; 1374 1375 current_space->array += nzk; 1376 current_space->local_used += nzk; 1377 current_space->local_remaining -= nzk; 1378 1379 current_space_lvl->array += nzk; 1380 current_space_lvl->local_used += nzk; 1381 current_space_lvl->local_remaining -= nzk; 1382 1383 ui[k+1] = ui[k] + nzk; 1384 } 1385 1386 #if defined(PETSC_USE_INFO) 1387 if (ai[am] != 0) { 1388 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1389 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1390 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1391 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1392 } else { 1393 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1394 } 1395 #endif 1396 1397 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1398 ierr = PetscFree(jl);CHKERRQ(ierr); 1399 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1400 1401 /* destroy list of free space and other temporary array(s) */ 1402 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1403 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1404 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1405 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1406 1407 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1408 1409 /* put together the new matrix in MATSEQSBAIJ format */ 1410 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1411 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1412 B = *fact; 1413 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1414 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1415 1416 b = (Mat_SeqSBAIJ*)B->data; 1417 b->singlemalloc = PETSC_FALSE; 1418 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1419 b->j = uj; 1420 b->i = ui; 1421 b->diag = 0; 1422 b->ilen = 0; 1423 b->imax = 0; 1424 b->row = perm; 1425 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1426 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1427 b->icol = perm; 1428 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1429 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1430 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1431 b->maxnz = b->nz = ui[am]; 1432 b->freedata = PETSC_TRUE; 1433 1434 B->factor = FACTOR_CHOLESKY; 1435 B->info.factor_mallocs = reallocs; 1436 B->info.fill_ratio_given = fill; 1437 if (ai[am] != 0) { 1438 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1439 } else { 1440 B->info.fill_ratio_needed = 0.0; 1441 } 1442 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1443 if (perm_identity){ 1444 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1445 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1446 } 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1452 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1453 { 1454 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1455 Mat_SeqSBAIJ *b; 1456 Mat B; 1457 PetscErrorCode ierr; 1458 PetscTruth perm_identity; 1459 PetscReal fill = info->fill; 1460 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1461 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1462 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1463 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1464 PetscBT lnkbt; 1465 IS iperm; 1466 1467 PetscFunctionBegin; 1468 /* check whether perm is the identity mapping */ 1469 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1470 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1471 1472 if (!perm_identity){ 1473 /* check if perm is symmetric! */ 1474 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1475 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1476 for (i=0; i<am; i++) { 1477 if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation"); 1478 } 1479 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1480 ierr = ISDestroy(iperm);CHKERRQ(ierr); 1481 } 1482 1483 /* initialization */ 1484 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1485 ui[0] = 0; 1486 1487 /* jl: linked list for storing indices of the pivot rows 1488 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1489 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1490 il = jl + am; 1491 cols = il + am; 1492 ui_ptr = (PetscInt**)(cols + am); 1493 for (i=0; i<am; i++){ 1494 jl[i] = am; il[i] = 0; 1495 } 1496 1497 /* create and initialize a linked list for storing column indices of the active row k */ 1498 nlnk = am + 1; 1499 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1500 1501 /* initial FreeSpace size is fill*(ai[am]+1) */ 1502 if (fill < 1.0) fill = 1.0; 1503 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1504 current_space = free_space; 1505 1506 for (k=0; k<am; k++){ /* for each active row k */ 1507 /* initialize lnk by the column indices of row rip[k] of A */ 1508 nzk = 0; 1509 ncols = ai[rip[k]+1] - ai[rip[k]]; 1510 ncols_upper = 0; 1511 for (j=0; j<ncols; j++){ 1512 i = rip[*(aj + ai[rip[k]] + j)]; 1513 if (i >= k){ /* only take upper triangular entry */ 1514 cols[ncols_upper] = i; 1515 ncols_upper++; 1516 } 1517 } 1518 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1519 nzk += nlnk; 1520 1521 /* update lnk by computing fill-in for each pivot row to be merged in */ 1522 prow = jl[k]; /* 1st pivot row */ 1523 1524 while (prow < k){ 1525 nextprow = jl[prow]; 1526 /* merge prow into k-th row */ 1527 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1528 jmax = ui[prow+1]; 1529 ncols = jmax-jmin; 1530 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1531 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1532 nzk += nlnk; 1533 1534 /* update il and jl for prow */ 1535 if (jmin < jmax){ 1536 il[prow] = jmin; 1537 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1538 } 1539 prow = nextprow; 1540 } 1541 1542 /* if free space is not available, make more free space */ 1543 if (current_space->local_remaining<nzk) { 1544 i = am - k + 1; /* num of unfactored rows */ 1545 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1546 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1547 reallocs++; 1548 } 1549 1550 /* copy data into free space, then initialize lnk */ 1551 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1552 1553 /* add the k-th row into il and jl */ 1554 if (nzk-1 > 0){ 1555 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1556 jl[k] = jl[i]; jl[i] = k; 1557 il[k] = ui[k] + 1; 1558 } 1559 ui_ptr[k] = current_space->array; 1560 current_space->array += nzk; 1561 current_space->local_used += nzk; 1562 current_space->local_remaining -= nzk; 1563 1564 ui[k+1] = ui[k] + nzk; 1565 } 1566 1567 #if defined(PETSC_USE_INFO) 1568 if (ai[am] != 0) { 1569 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1570 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1571 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1572 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1573 } else { 1574 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1575 } 1576 #endif 1577 1578 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1579 ierr = PetscFree(jl);CHKERRQ(ierr); 1580 1581 /* destroy list of free space and other temporary array(s) */ 1582 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1583 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1584 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1585 1586 /* put together the new matrix in MATSEQSBAIJ format */ 1587 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1588 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1589 B = *fact; 1590 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1591 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1592 1593 b = (Mat_SeqSBAIJ*)B->data; 1594 b->singlemalloc = PETSC_FALSE; 1595 b->freedata = PETSC_TRUE; 1596 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1597 b->j = uj; 1598 b->i = ui; 1599 b->diag = 0; 1600 b->ilen = 0; 1601 b->imax = 0; 1602 b->row = perm; 1603 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1604 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1605 b->icol = perm; 1606 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1607 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1608 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1609 b->maxnz = b->nz = ui[am]; 1610 1611 B->factor = FACTOR_CHOLESKY; 1612 B->info.factor_mallocs = reallocs; 1613 B->info.fill_ratio_given = fill; 1614 if (ai[am] != 0) { 1615 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1616 } else { 1617 B->info.fill_ratio_needed = 0.0; 1618 } 1619 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1620 if (perm_identity){ 1621 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1622 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1623 } 1624 PetscFunctionReturn(0); 1625 } 1626