1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 260 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 261 { 262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 263 IS isicol; 264 PetscErrorCode ierr; 265 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 266 PetscInt *bi,*bj,*ajtmp; 267 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 268 PetscReal f; 269 PetscInt nlnk,*lnk,k,**bi_ptr; 270 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 271 PetscBT lnkbt; 272 273 PetscFunctionBegin; 274 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 275 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 276 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 277 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 278 279 /* get new row pointers */ 280 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 281 bi[0] = 0; 282 283 /* bdiag is location of diagonal in factor */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 285 bdiag[0] = 0; 286 287 /* linked list for storing column indices of the active row */ 288 nlnk = n + 1; 289 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 290 291 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 292 293 /* initial FreeSpace size is f*(ai[n]+1) */ 294 f = info->fill; 295 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 296 current_space = free_space; 297 298 for (i=0; i<n; i++) { 299 /* copy previous fill into linked list */ 300 nzi = 0; 301 nnz = ai[r[i]+1] - ai[r[i]]; 302 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 303 ajtmp = aj + ai[r[i]]; 304 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 305 nzi += nlnk; 306 307 /* add pivot rows into linked list */ 308 row = lnk[n]; 309 while (row < i) { 310 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 311 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 312 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 313 nzi += nlnk; 314 row = lnk[row]; 315 } 316 bi[i+1] = bi[i] + nzi; 317 im[i] = nzi; 318 319 /* mark bdiag */ 320 nzbd = 0; 321 nnz = nzi; 322 k = lnk[n]; 323 while (nnz-- && k < i){ 324 nzbd++; 325 k = lnk[k]; 326 } 327 bdiag[i] = bi[i] + nzbd; 328 329 /* if free space is not available, make more free space */ 330 if (current_space->local_remaining<nzi) { 331 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 332 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 333 reallocs++; 334 } 335 336 /* copy data into free space, then initialize lnk */ 337 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 338 bi_ptr[i] = current_space->array; 339 current_space->array += nzi; 340 current_space->local_used += nzi; 341 current_space->local_remaining -= nzi; 342 } 343 #if defined(PETSC_USE_INFO) 344 if (ai[n] != 0) { 345 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 346 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 347 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 348 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 349 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 350 } else { 351 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 352 } 353 #endif 354 355 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 356 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 357 358 /* destroy list of free space and other temporary array(s) */ 359 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 360 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 361 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 362 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 363 364 /* put together the new matrix */ 365 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 366 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 367 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 368 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 369 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 370 b = (Mat_SeqAIJ*)(*B)->data; 371 b->free_a = PETSC_TRUE; 372 b->free_ij = PETSC_TRUE; 373 b->singlemalloc = PETSC_FALSE; 374 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 375 b->j = bj; 376 b->i = bi; 377 b->diag = bdiag; 378 b->ilen = 0; 379 b->imax = 0; 380 b->row = isrow; 381 b->col = iscol; 382 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 383 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 384 b->icol = isicol; 385 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 386 387 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 388 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 389 b->maxnz = b->nz = bi[n] ; 390 391 (*B)->factor = FACTOR_LU; 392 (*B)->info.factor_mallocs = reallocs; 393 (*B)->info.fill_ratio_given = f; 394 395 if (ai[n] != 0) { 396 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 397 } else { 398 (*B)->info.fill_ratio_needed = 0.0; 399 } 400 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 401 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 402 PetscFunctionReturn(0); 403 } 404 405 /* 406 Trouble in factorization, should we dump the original matrix? 407 */ 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatFactorDumpMatrix" 410 PetscErrorCode MatFactorDumpMatrix(Mat A) 411 { 412 PetscErrorCode ierr; 413 PetscTruth flg; 414 415 PetscFunctionBegin; 416 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 417 if (flg) { 418 PetscViewer viewer; 419 char filename[PETSC_MAX_PATH_LEN]; 420 421 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 422 ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 423 ierr = MatView(A,viewer);CHKERRQ(ierr); 424 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 /* ----------------------------------------------------------- */ 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 432 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 433 { 434 Mat C=*B; 435 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 436 IS isrow = b->row,isicol = b->icol; 437 PetscErrorCode ierr; 438 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 439 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 440 PetscInt *diag_offset = b->diag,diag,*pj; 441 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 442 PetscScalar d; 443 PetscReal rs; 444 LUShift_Ctx sctx; 445 PetscInt newshift,*ddiag; 446 447 PetscFunctionBegin; 448 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 449 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 450 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 451 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 452 rtmps = rtmp; ics = ic; 453 454 sctx.shift_top = 0; 455 sctx.nshift_max = 0; 456 sctx.shift_lo = 0; 457 sctx.shift_hi = 0; 458 459 /* if both shift schemes are chosen by user, only use info->shiftpd */ 460 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 461 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 462 PetscInt *aai = a->i; 463 ddiag = a->diag; 464 sctx.shift_top = 0; 465 for (i=0; i<n; i++) { 466 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 467 d = (a->a)[ddiag[i]]; 468 rs = -PetscAbsScalar(d) - PetscRealPart(d); 469 v = a->a+aai[i]; 470 nz = aai[i+1] - aai[i]; 471 for (j=0; j<nz; j++) 472 rs += PetscAbsScalar(v[j]); 473 if (rs>sctx.shift_top) sctx.shift_top = rs; 474 } 475 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 476 sctx.shift_top *= 1.1; 477 sctx.nshift_max = 5; 478 sctx.shift_lo = 0.; 479 sctx.shift_hi = 1.; 480 } 481 482 sctx.shift_amount = 0; 483 sctx.nshift = 0; 484 do { 485 sctx.lushift = PETSC_FALSE; 486 for (i=0; i<n; i++){ 487 nz = bi[i+1] - bi[i]; 488 bjtmp = bj + bi[i]; 489 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 490 491 /* load in initial (unfactored row) */ 492 nz = a->i[r[i]+1] - a->i[r[i]]; 493 ajtmp = a->j + a->i[r[i]]; 494 v = a->a + a->i[r[i]]; 495 for (j=0; j<nz; j++) { 496 rtmp[ics[ajtmp[j]]] = v[j]; 497 } 498 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 499 500 row = *bjtmp++; 501 while (row < i) { 502 pc = rtmp + row; 503 if (*pc != 0.0) { 504 pv = b->a + diag_offset[row]; 505 pj = b->j + diag_offset[row] + 1; 506 multiplier = *pc / *pv++; 507 *pc = multiplier; 508 nz = bi[row+1] - diag_offset[row] - 1; 509 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 510 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 511 } 512 row = *bjtmp++; 513 } 514 /* finished row so stick it into b->a */ 515 pv = b->a + bi[i] ; 516 pj = b->j + bi[i] ; 517 nz = bi[i+1] - bi[i]; 518 diag = diag_offset[i] - bi[i]; 519 rs = 0.0; 520 for (j=0; j<nz; j++) { 521 pv[j] = rtmps[pj[j]]; 522 if (j != diag) rs += PetscAbsScalar(pv[j]); 523 } 524 525 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 526 sctx.rs = rs; 527 sctx.pv = pv[diag]; 528 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 529 if (newshift == 1) break; 530 } 531 532 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 533 /* 534 * if no shift in this attempt & shifting & started shifting & can refine, 535 * then try lower shift 536 */ 537 sctx.shift_hi = info->shift_fraction; 538 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 539 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 540 sctx.lushift = PETSC_TRUE; 541 sctx.nshift++; 542 } 543 } while (sctx.lushift); 544 545 /* invert diagonal entries for simplier triangular solves */ 546 for (i=0; i<n; i++) { 547 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 548 } 549 550 ierr = PetscFree(rtmp);CHKERRQ(ierr); 551 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 553 C->factor = FACTOR_LU; 554 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 555 C->assembled = PETSC_TRUE; 556 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 557 if (sctx.nshift){ 558 if (info->shiftnz) { 559 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 560 } else if (info->shiftpd) { 561 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 562 } 563 } 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 This routine implements inplace ILU(0) with row or/and column permutations. 569 Input: 570 A - original matrix 571 Output; 572 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 573 a->j (col index) is permuted by the inverse of colperm, then sorted 574 a->a reordered accordingly with a->j 575 a->diag (ptr to diagonal elements) is updated. 576 */ 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 579 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 580 { 581 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 582 IS isrow = a->row,isicol = a->icol; 583 PetscErrorCode ierr; 584 PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j; 585 PetscInt *ajtmp,nz,row,*ics; 586 PetscInt *diag = a->diag,nbdiag,*pj; 587 PetscScalar *rtmp,*v,*pc,multiplier,*pv,d; 588 PetscReal rs; 589 LUShift_Ctx sctx; 590 PetscInt newshift; 591 592 PetscFunctionBegin; 593 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 594 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 595 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 596 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 597 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 598 ics = ic; 599 600 sctx.shift_top = 0; 601 sctx.nshift_max = 0; 602 sctx.shift_lo = 0; 603 sctx.shift_hi = 0; 604 605 /* if both shift schemes are chosen by user, only use info->shiftpd */ 606 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 607 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 608 sctx.shift_top = 0; 609 for (i=0; i<n; i++) { 610 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 611 d = (a->a)[diag[i]]; 612 rs = -PetscAbsScalar(d) - PetscRealPart(d); 613 v = a->a+ai[i]; 614 nz = ai[i+1] - ai[i]; 615 for (j=0; j<nz; j++) 616 rs += PetscAbsScalar(v[j]); 617 if (rs>sctx.shift_top) sctx.shift_top = rs; 618 } 619 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 620 sctx.shift_top *= 1.1; 621 sctx.nshift_max = 5; 622 sctx.shift_lo = 0.; 623 sctx.shift_hi = 1.; 624 } 625 626 sctx.shift_amount = 0; 627 sctx.nshift = 0; 628 do { 629 sctx.lushift = PETSC_FALSE; 630 for (i=0; i<n; i++){ 631 /* load in initial unfactored row */ 632 nz = ai[r[i]+1] - ai[r[i]]; 633 ajtmp = aj + ai[r[i]]; 634 v = a->a + ai[r[i]]; 635 /* sort permuted ajtmp and values v accordingly */ 636 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 637 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 638 639 diag[r[i]] = ai[r[i]]; 640 for (j=0; j<nz; j++) { 641 rtmp[ajtmp[j]] = v[j]; 642 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 643 } 644 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 645 646 row = *ajtmp++; 647 while (row < i) { 648 pc = rtmp + row; 649 if (*pc != 0.0) { 650 pv = a->a + diag[r[row]]; 651 pj = aj + diag[r[row]] + 1; 652 653 multiplier = *pc / *pv++; 654 *pc = multiplier; 655 nz = ai[r[row]+1] - diag[r[row]] - 1; 656 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 657 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 658 } 659 row = *ajtmp++; 660 } 661 /* finished row so overwrite it onto a->a */ 662 pv = a->a + ai[r[i]] ; 663 pj = aj + ai[r[i]] ; 664 nz = ai[r[i]+1] - ai[r[i]]; 665 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 666 667 rs = 0.0; 668 for (j=0; j<nz; j++) { 669 pv[j] = rtmp[pj[j]]; 670 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 671 } 672 673 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 674 sctx.rs = rs; 675 sctx.pv = pv[nbdiag]; 676 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 677 if (newshift == 1) break; 678 } 679 680 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 681 /* 682 * if no shift in this attempt & shifting & started shifting & can refine, 683 * then try lower shift 684 */ 685 sctx.shift_hi = info->shift_fraction; 686 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 687 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 688 sctx.lushift = PETSC_TRUE; 689 sctx.nshift++; 690 } 691 } while (sctx.lushift); 692 693 /* invert diagonal entries for simplier triangular solves */ 694 for (i=0; i<n; i++) { 695 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 696 } 697 698 ierr = PetscFree(rtmp);CHKERRQ(ierr); 699 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 700 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 701 A->factor = FACTOR_LU; 702 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 703 A->assembled = PETSC_TRUE; 704 ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr); 705 if (sctx.nshift){ 706 if (info->shiftnz) { 707 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 708 } else if (info->shiftpd) { 709 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 710 } 711 } 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 717 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 718 { 719 PetscFunctionBegin; 720 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 721 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 722 PetscFunctionReturn(0); 723 } 724 725 726 /* ----------------------------------------------------------- */ 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatLUFactor_SeqAIJ" 729 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 730 { 731 PetscErrorCode ierr; 732 Mat C; 733 734 PetscFunctionBegin; 735 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 736 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 737 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 738 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 /* ----------------------------------------------------------- */ 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatSolve_SeqAIJ" 744 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 745 { 746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 747 IS iscol = a->col,isrow = a->row; 748 PetscErrorCode ierr; 749 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 750 PetscInt nz,*rout,*cout; 751 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 752 753 PetscFunctionBegin; 754 if (!n) PetscFunctionReturn(0); 755 756 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 757 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 758 tmp = a->solve_work; 759 760 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 761 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 762 763 /* forward solve the lower triangular */ 764 tmp[0] = b[*r++]; 765 tmps = tmp; 766 for (i=1; i<n; i++) { 767 v = aa + ai[i] ; 768 vi = aj + ai[i] ; 769 nz = a->diag[i] - ai[i]; 770 sum = b[*r++]; 771 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 772 tmp[i] = sum; 773 } 774 775 /* backward solve the upper triangular */ 776 for (i=n-1; i>=0; i--){ 777 v = aa + a->diag[i] + 1; 778 vi = aj + a->diag[i] + 1; 779 nz = ai[i+1] - a->diag[i] - 1; 780 sum = tmp[i]; 781 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 782 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 783 } 784 785 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 786 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 787 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 788 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 789 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatMatSolve_SeqAIJ" 795 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 796 { 797 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 798 IS iscol = a->col,isrow = a->row; 799 PetscErrorCode ierr; 800 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 801 PetscInt nz,*rout,*cout,neq; 802 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 803 804 PetscFunctionBegin; 805 if (!n) PetscFunctionReturn(0); 806 807 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 808 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 809 810 tmp = a->solve_work; 811 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 812 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 813 814 for (neq=0; neq<n; neq++){ 815 /* forward solve the lower triangular */ 816 tmp[0] = b[r[0]]; 817 tmps = tmp; 818 for (i=1; i<n; i++) { 819 v = aa + ai[i] ; 820 vi = aj + ai[i] ; 821 nz = a->diag[i] - ai[i]; 822 sum = b[r[i]]; 823 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 824 tmp[i] = sum; 825 } 826 /* backward solve the upper triangular */ 827 for (i=n-1; i>=0; i--){ 828 v = aa + a->diag[i] + 1; 829 vi = aj + a->diag[i] + 1; 830 nz = ai[i+1] - a->diag[i] - 1; 831 sum = tmp[i]; 832 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 833 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 834 } 835 836 b += n; 837 x += n; 838 } 839 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 840 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 841 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 842 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 843 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 844 PetscFunctionReturn(0); 845 } 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 849 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 850 { 851 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 852 IS iscol = a->col,isrow = a->row; 853 PetscErrorCode ierr; 854 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 855 PetscInt nz,*rout,*cout,row; 856 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 857 858 PetscFunctionBegin; 859 if (!n) PetscFunctionReturn(0); 860 861 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 862 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 863 tmp = a->solve_work; 864 865 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 866 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 867 868 /* forward solve the lower triangular */ 869 tmp[0] = b[*r++]; 870 tmps = tmp; 871 for (row=1; row<n; row++) { 872 i = rout[row]; /* permuted row */ 873 v = aa + ai[i] ; 874 vi = aj + ai[i] ; 875 nz = a->diag[i] - ai[i]; 876 sum = b[*r++]; 877 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 878 tmp[row] = sum; 879 } 880 881 /* backward solve the upper triangular */ 882 for (row=n-1; row>=0; row--){ 883 i = rout[row]; /* permuted row */ 884 v = aa + a->diag[i] + 1; 885 vi = aj + a->diag[i] + 1; 886 nz = ai[i+1] - a->diag[i] - 1; 887 sum = tmp[row]; 888 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 889 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 890 } 891 892 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 893 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 894 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 895 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 896 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 /* ----------------------------------------------------------- */ 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 903 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 904 { 905 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 906 PetscErrorCode ierr; 907 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 908 PetscScalar *x,*b,*aa = a->a; 909 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 910 PetscInt adiag_i,i,*vi,nz,ai_i; 911 PetscScalar *v,sum; 912 #endif 913 914 PetscFunctionBegin; 915 if (!n) PetscFunctionReturn(0); 916 917 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 918 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 919 920 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 921 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 922 #else 923 /* forward solve the lower triangular */ 924 x[0] = b[0]; 925 for (i=1; i<n; i++) { 926 ai_i = ai[i]; 927 v = aa + ai_i; 928 vi = aj + ai_i; 929 nz = adiag[i] - ai_i; 930 sum = b[i]; 931 while (nz--) sum -= *v++ * x[*vi++]; 932 x[i] = sum; 933 } 934 935 /* backward solve the upper triangular */ 936 for (i=n-1; i>=0; i--){ 937 adiag_i = adiag[i]; 938 v = aa + adiag_i + 1; 939 vi = aj + adiag_i + 1; 940 nz = ai[i+1] - adiag_i - 1; 941 sum = x[i]; 942 while (nz--) sum -= *v++ * x[*vi++]; 943 x[i] = sum*aa[adiag_i]; 944 } 945 #endif 946 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 947 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 948 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 954 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 955 { 956 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 957 IS iscol = a->col,isrow = a->row; 958 PetscErrorCode ierr; 959 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 960 PetscInt nz,*rout,*cout; 961 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 962 963 PetscFunctionBegin; 964 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 965 966 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 967 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 968 tmp = a->solve_work; 969 970 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 971 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 972 973 /* forward solve the lower triangular */ 974 tmp[0] = b[*r++]; 975 for (i=1; i<n; i++) { 976 v = aa + ai[i] ; 977 vi = aj + ai[i] ; 978 nz = a->diag[i] - ai[i]; 979 sum = b[*r++]; 980 while (nz--) sum -= *v++ * tmp[*vi++ ]; 981 tmp[i] = sum; 982 } 983 984 /* backward solve the upper triangular */ 985 for (i=n-1; i>=0; i--){ 986 v = aa + a->diag[i] + 1; 987 vi = aj + a->diag[i] + 1; 988 nz = ai[i+1] - a->diag[i] - 1; 989 sum = tmp[i]; 990 while (nz--) sum -= *v++ * tmp[*vi++ ]; 991 tmp[i] = sum*aa[a->diag[i]]; 992 x[*c--] += tmp[i]; 993 } 994 995 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 996 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 997 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 998 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 999 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1000 1001 PetscFunctionReturn(0); 1002 } 1003 /* -------------------------------------------------------------------*/ 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1006 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1007 { 1008 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1009 IS iscol = a->col,isrow = a->row; 1010 PetscErrorCode ierr; 1011 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1012 PetscInt nz,*rout,*cout,*diag = a->diag; 1013 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 1014 1015 PetscFunctionBegin; 1016 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1017 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1018 tmp = a->solve_work; 1019 1020 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1021 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1022 1023 /* copy the b into temp work space according to permutation */ 1024 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1025 1026 /* forward solve the U^T */ 1027 for (i=0; i<n; i++) { 1028 v = aa + diag[i] ; 1029 vi = aj + diag[i] + 1; 1030 nz = ai[i+1] - diag[i] - 1; 1031 s1 = tmp[i]; 1032 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1033 while (nz--) { 1034 tmp[*vi++ ] -= (*v++)*s1; 1035 } 1036 tmp[i] = s1; 1037 } 1038 1039 /* backward solve the L^T */ 1040 for (i=n-1; i>=0; i--){ 1041 v = aa + diag[i] - 1 ; 1042 vi = aj + diag[i] - 1 ; 1043 nz = diag[i] - ai[i]; 1044 s1 = tmp[i]; 1045 while (nz--) { 1046 tmp[*vi-- ] -= (*v--)*s1; 1047 } 1048 } 1049 1050 /* copy tmp into x according to permutation */ 1051 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1052 1053 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1054 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1055 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1056 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1057 1058 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1064 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1065 { 1066 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1067 IS iscol = a->col,isrow = a->row; 1068 PetscErrorCode ierr; 1069 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1070 PetscInt nz,*rout,*cout,*diag = a->diag; 1071 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 1072 1073 PetscFunctionBegin; 1074 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1075 1076 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1077 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1078 tmp = a->solve_work; 1079 1080 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1081 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1082 1083 /* copy the b into temp work space according to permutation */ 1084 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1085 1086 /* forward solve the U^T */ 1087 for (i=0; i<n; i++) { 1088 v = aa + diag[i] ; 1089 vi = aj + diag[i] + 1; 1090 nz = ai[i+1] - diag[i] - 1; 1091 tmp[i] *= *v++; 1092 while (nz--) { 1093 tmp[*vi++ ] -= (*v++)*tmp[i]; 1094 } 1095 } 1096 1097 /* backward solve the L^T */ 1098 for (i=n-1; i>=0; i--){ 1099 v = aa + diag[i] - 1 ; 1100 vi = aj + diag[i] - 1 ; 1101 nz = diag[i] - ai[i]; 1102 while (nz--) { 1103 tmp[*vi-- ] -= (*v--)*tmp[i]; 1104 } 1105 } 1106 1107 /* copy tmp into x according to permutation */ 1108 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1109 1110 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1111 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1112 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1113 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1114 1115 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 /* ----------------------------------------------------------------*/ 1119 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*); 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1123 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1124 { 1125 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1126 IS isicol; 1127 PetscErrorCode ierr; 1128 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 1129 PetscInt *bi,*cols,nnz,*cols_lvl; 1130 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1131 PetscInt i,levels,diagonal_fill; 1132 PetscTruth col_identity,row_identity; 1133 PetscReal f; 1134 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1135 PetscBT lnkbt; 1136 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1137 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1138 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1139 PetscTruth missing; 1140 1141 PetscFunctionBegin; 1142 f = info->fill; 1143 levels = (PetscInt)info->levels; 1144 diagonal_fill = (PetscInt)info->diagonal_fill; 1145 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1146 1147 /* special case that simply copies fill pattern */ 1148 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1149 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1150 if (!levels && row_identity && col_identity) { 1151 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1152 (*fact)->factor = FACTOR_LU; 1153 (*fact)->info.factor_mallocs = 0; 1154 (*fact)->info.fill_ratio_given = info->fill; 1155 (*fact)->info.fill_ratio_needed = 1.0; 1156 b = (Mat_SeqAIJ*)(*fact)->data; 1157 ierr = MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);CHKERRQ(ierr); 1158 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1159 b->row = isrow; 1160 b->col = iscol; 1161 b->icol = isicol; 1162 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1163 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1164 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1165 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1166 PetscFunctionReturn(0); 1167 } 1168 1169 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1170 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1171 1172 /* get new row pointers */ 1173 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1174 bi[0] = 0; 1175 /* bdiag is location of diagonal in factor */ 1176 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1177 bdiag[0] = 0; 1178 1179 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1180 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1181 1182 /* create a linked list for storing column indices of the active row */ 1183 nlnk = n + 1; 1184 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1185 1186 /* initial FreeSpace size is f*(ai[n]+1) */ 1187 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1188 current_space = free_space; 1189 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1190 current_space_lvl = free_space_lvl; 1191 1192 for (i=0; i<n; i++) { 1193 nzi = 0; 1194 /* copy current row into linked list */ 1195 nnz = ai[r[i]+1] - ai[r[i]]; 1196 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1197 cols = aj + ai[r[i]]; 1198 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1199 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1200 nzi += nlnk; 1201 1202 /* make sure diagonal entry is included */ 1203 if (diagonal_fill && lnk[i] == -1) { 1204 fm = n; 1205 while (lnk[fm] < i) fm = lnk[fm]; 1206 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1207 lnk[fm] = i; 1208 lnk_lvl[i] = 0; 1209 nzi++; dcount++; 1210 } 1211 1212 /* add pivot rows into the active row */ 1213 nzbd = 0; 1214 prow = lnk[n]; 1215 while (prow < i) { 1216 nnz = bdiag[prow]; 1217 cols = bj_ptr[prow] + nnz + 1; 1218 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1219 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1220 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1221 nzi += nlnk; 1222 prow = lnk[prow]; 1223 nzbd++; 1224 } 1225 bdiag[i] = nzbd; 1226 bi[i+1] = bi[i] + nzi; 1227 1228 /* if free space is not available, make more free space */ 1229 if (current_space->local_remaining<nzi) { 1230 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1231 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1232 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1233 reallocs++; 1234 } 1235 1236 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1237 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1238 bj_ptr[i] = current_space->array; 1239 bjlvl_ptr[i] = current_space_lvl->array; 1240 1241 /* make sure the active row i has diagonal entry */ 1242 if (*(bj_ptr[i]+bdiag[i]) != i) { 1243 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1244 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1245 } 1246 1247 current_space->array += nzi; 1248 current_space->local_used += nzi; 1249 current_space->local_remaining -= nzi; 1250 current_space_lvl->array += nzi; 1251 current_space_lvl->local_used += nzi; 1252 current_space_lvl->local_remaining -= nzi; 1253 } 1254 1255 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1256 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1257 1258 /* destroy list of free space and other temporary arrays */ 1259 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1260 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1261 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1262 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1263 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1264 1265 #if defined(PETSC_USE_INFO) 1266 { 1267 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1268 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1269 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1270 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1271 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1272 if (diagonal_fill) { 1273 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1274 } 1275 } 1276 #endif 1277 1278 /* put together the new matrix */ 1279 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1280 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1281 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1282 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1283 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1284 b = (Mat_SeqAIJ*)(*fact)->data; 1285 b->free_a = PETSC_TRUE; 1286 b->free_ij = PETSC_TRUE; 1287 b->singlemalloc = PETSC_FALSE; 1288 len = (bi[n] )*sizeof(PetscScalar); 1289 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1290 b->j = bj; 1291 b->i = bi; 1292 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1293 b->diag = bdiag; 1294 b->ilen = 0; 1295 b->imax = 0; 1296 b->row = isrow; 1297 b->col = iscol; 1298 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1299 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1300 b->icol = isicol; 1301 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1302 /* In b structure: Free imax, ilen, old a, old j. 1303 Allocate bdiag, solve_work, new a, new j */ 1304 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1305 b->maxnz = b->nz = bi[n] ; 1306 (*fact)->factor = FACTOR_LU; 1307 (*fact)->info.factor_mallocs = reallocs; 1308 (*fact)->info.fill_ratio_given = f; 1309 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1310 1311 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1312 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1313 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #include "src/mat/impls/sbaij/seq/sbaij.h" 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1320 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1321 { 1322 Mat C = *B; 1323 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1324 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1325 IS ip=b->row,iip = b->icol; 1326 PetscErrorCode ierr; 1327 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1328 PetscInt *ai=a->i,*aj=a->j; 1329 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1330 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1331 PetscReal zeropivot,rs,shiftnz; 1332 PetscReal shiftpd; 1333 ChShift_Ctx sctx; 1334 PetscInt newshift; 1335 1336 PetscFunctionBegin; 1337 1338 shiftnz = info->shiftnz; 1339 shiftpd = info->shiftpd; 1340 zeropivot = info->zeropivot; 1341 1342 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1343 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1344 1345 /* initialization */ 1346 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1347 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1348 jl = il + mbs; 1349 rtmp = (MatScalar*)(jl + mbs); 1350 1351 sctx.shift_amount = 0; 1352 sctx.nshift = 0; 1353 do { 1354 sctx.chshift = PETSC_FALSE; 1355 for (i=0; i<mbs; i++) { 1356 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1357 } 1358 1359 for (k = 0; k<mbs; k++){ 1360 bval = ba + bi[k]; 1361 /* initialize k-th row by the perm[k]-th row of A */ 1362 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1363 for (j = jmin; j < jmax; j++){ 1364 col = riip[aj[j]]; 1365 if (col >= k){ /* only take upper triangular entry */ 1366 rtmp[col] = aa[j]; 1367 *bval++ = 0.0; /* for in-place factorization */ 1368 } 1369 } 1370 /* shift the diagonal of the matrix */ 1371 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1372 1373 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1374 dk = rtmp[k]; 1375 i = jl[k]; /* first row to be added to k_th row */ 1376 1377 while (i < k){ 1378 nexti = jl[i]; /* next row to be added to k_th row */ 1379 1380 /* compute multiplier, update diag(k) and U(i,k) */ 1381 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1382 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1383 dk += uikdi*ba[ili]; 1384 ba[ili] = uikdi; /* -U(i,k) */ 1385 1386 /* add multiple of row i to k-th row */ 1387 jmin = ili + 1; jmax = bi[i+1]; 1388 if (jmin < jmax){ 1389 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1390 /* update il and jl for row i */ 1391 il[i] = jmin; 1392 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1393 } 1394 i = nexti; 1395 } 1396 1397 /* shift the diagonals when zero pivot is detected */ 1398 /* compute rs=sum of abs(off-diagonal) */ 1399 rs = 0.0; 1400 jmin = bi[k]+1; 1401 nz = bi[k+1] - jmin; 1402 bcol = bj + jmin; 1403 while (nz--){ 1404 rs += PetscAbsScalar(rtmp[*bcol]); 1405 bcol++; 1406 } 1407 1408 sctx.rs = rs; 1409 sctx.pv = dk; 1410 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1411 1412 if (newshift == 1) { 1413 if (!sctx.shift_amount) { 1414 sctx.shift_amount = 1e-5; 1415 } 1416 break; 1417 } 1418 1419 /* copy data into U(k,:) */ 1420 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1421 jmin = bi[k]+1; jmax = bi[k+1]; 1422 if (jmin < jmax) { 1423 for (j=jmin; j<jmax; j++){ 1424 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1425 } 1426 /* add the k-th row into il and jl */ 1427 il[k] = jmin; 1428 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1429 } 1430 } 1431 } while (sctx.chshift); 1432 ierr = PetscFree(il);CHKERRQ(ierr); 1433 1434 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1435 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1436 C->factor = FACTOR_CHOLESKY; 1437 C->assembled = PETSC_TRUE; 1438 C->preallocated = PETSC_TRUE; 1439 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1440 if (sctx.nshift){ 1441 if (shiftnz) { 1442 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1443 } else if (shiftpd) { 1444 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1445 } 1446 } 1447 PetscFunctionReturn(0); 1448 } 1449 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1452 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1453 { 1454 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1455 Mat_SeqSBAIJ *b; 1456 Mat B; 1457 PetscErrorCode ierr; 1458 PetscTruth perm_identity; 1459 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1460 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1461 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1462 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1463 PetscReal fill=info->fill,levels=info->levels; 1464 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1465 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1466 PetscBT lnkbt; 1467 IS iperm; 1468 1469 PetscFunctionBegin; 1470 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1471 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1472 1473 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1474 ui[0] = 0; 1475 1476 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1477 if (!levels && perm_identity) { 1478 for (i=0; i<am; i++) { 1479 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1480 } 1481 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1482 cols = uj; 1483 for (i=0; i<am; i++) { 1484 aj = a->j + a->diag[i]; 1485 ncols = ui[i+1] - ui[i]; 1486 for (j=0; j<ncols; j++) *cols++ = *aj++; 1487 } 1488 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1489 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1490 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1491 1492 /* initialization */ 1493 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1494 1495 /* jl: linked list for storing indices of the pivot rows 1496 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1497 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1498 il = jl + am; 1499 uj_ptr = (PetscInt**)(il + am); 1500 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1501 for (i=0; i<am; i++){ 1502 jl[i] = am; il[i] = 0; 1503 } 1504 1505 /* create and initialize a linked list for storing column indices of the active row k */ 1506 nlnk = am + 1; 1507 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1508 1509 /* initial FreeSpace size is fill*(ai[am]+1) */ 1510 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1511 current_space = free_space; 1512 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1513 current_space_lvl = free_space_lvl; 1514 1515 for (k=0; k<am; k++){ /* for each active row k */ 1516 /* initialize lnk by the column indices of row rip[k] of A */ 1517 nzk = 0; 1518 ncols = ai[rip[k]+1] - ai[rip[k]]; 1519 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1520 ncols_upper = 0; 1521 for (j=0; j<ncols; j++){ 1522 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1523 if (riip[i] >= k){ /* only take upper triangular entry */ 1524 ajtmp[ncols_upper] = i; 1525 ncols_upper++; 1526 } 1527 } 1528 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1529 nzk += nlnk; 1530 1531 /* update lnk by computing fill-in for each pivot row to be merged in */ 1532 prow = jl[k]; /* 1st pivot row */ 1533 1534 while (prow < k){ 1535 nextprow = jl[prow]; 1536 1537 /* merge prow into k-th row */ 1538 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1539 jmax = ui[prow+1]; 1540 ncols = jmax-jmin; 1541 i = jmin - ui[prow]; 1542 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1543 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1544 j = *(uj - 1); 1545 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1546 nzk += nlnk; 1547 1548 /* update il and jl for prow */ 1549 if (jmin < jmax){ 1550 il[prow] = jmin; 1551 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1552 } 1553 prow = nextprow; 1554 } 1555 1556 /* if free space is not available, make more free space */ 1557 if (current_space->local_remaining<nzk) { 1558 i = am - k + 1; /* num of unfactored rows */ 1559 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1560 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1561 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1562 reallocs++; 1563 } 1564 1565 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1566 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1567 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1568 1569 /* add the k-th row into il and jl */ 1570 if (nzk > 1){ 1571 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1572 jl[k] = jl[i]; jl[i] = k; 1573 il[k] = ui[k] + 1; 1574 } 1575 uj_ptr[k] = current_space->array; 1576 uj_lvl_ptr[k] = current_space_lvl->array; 1577 1578 current_space->array += nzk; 1579 current_space->local_used += nzk; 1580 current_space->local_remaining -= nzk; 1581 1582 current_space_lvl->array += nzk; 1583 current_space_lvl->local_used += nzk; 1584 current_space_lvl->local_remaining -= nzk; 1585 1586 ui[k+1] = ui[k] + nzk; 1587 } 1588 1589 #if defined(PETSC_USE_INFO) 1590 if (ai[am] != 0) { 1591 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1592 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1593 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1594 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1595 } else { 1596 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1597 } 1598 #endif 1599 1600 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1601 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1602 ierr = PetscFree(jl);CHKERRQ(ierr); 1603 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1604 1605 /* destroy list of free space and other temporary array(s) */ 1606 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1607 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1608 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1609 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1610 1611 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1612 1613 /* put together the new matrix in MATSEQSBAIJ format */ 1614 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1615 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1616 B = *fact; 1617 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1618 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1619 1620 b = (Mat_SeqSBAIJ*)B->data; 1621 b->singlemalloc = PETSC_FALSE; 1622 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1623 b->j = uj; 1624 b->i = ui; 1625 b->diag = 0; 1626 b->ilen = 0; 1627 b->imax = 0; 1628 b->row = perm; 1629 b->col = perm; 1630 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1631 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1632 b->icol = iperm; 1633 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1634 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1635 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1636 b->maxnz = b->nz = ui[am]; 1637 b->free_a = PETSC_TRUE; 1638 b->free_ij = PETSC_TRUE; 1639 1640 B->factor = FACTOR_CHOLESKY; 1641 B->info.factor_mallocs = reallocs; 1642 B->info.fill_ratio_given = fill; 1643 if (ai[am] != 0) { 1644 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1645 } else { 1646 B->info.fill_ratio_needed = 0.0; 1647 } 1648 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1649 if (perm_identity){ 1650 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1651 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1652 } 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1658 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1659 { 1660 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1661 Mat_SeqSBAIJ *b; 1662 Mat B; 1663 PetscErrorCode ierr; 1664 PetscTruth perm_identity; 1665 PetscReal fill = info->fill; 1666 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1667 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1668 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1669 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1670 PetscBT lnkbt; 1671 IS iperm; 1672 1673 PetscFunctionBegin; 1674 /* check whether perm is the identity mapping */ 1675 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1676 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1677 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1678 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1679 1680 /* initialization */ 1681 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1682 ui[0] = 0; 1683 1684 /* jl: linked list for storing indices of the pivot rows 1685 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1686 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1687 il = jl + am; 1688 cols = il + am; 1689 ui_ptr = (PetscInt**)(cols + am); 1690 for (i=0; i<am; i++){ 1691 jl[i] = am; il[i] = 0; 1692 } 1693 1694 /* create and initialize a linked list for storing column indices of the active row k */ 1695 nlnk = am + 1; 1696 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1697 1698 /* initial FreeSpace size is fill*(ai[am]+1) */ 1699 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1700 current_space = free_space; 1701 1702 for (k=0; k<am; k++){ /* for each active row k */ 1703 /* initialize lnk by the column indices of row rip[k] of A */ 1704 nzk = 0; 1705 ncols = ai[rip[k]+1] - ai[rip[k]]; 1706 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1707 ncols_upper = 0; 1708 for (j=0; j<ncols; j++){ 1709 i = riip[*(aj + ai[rip[k]] + j)]; 1710 if (i >= k){ /* only take upper triangular entry */ 1711 cols[ncols_upper] = i; 1712 ncols_upper++; 1713 } 1714 } 1715 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1716 nzk += nlnk; 1717 1718 /* update lnk by computing fill-in for each pivot row to be merged in */ 1719 prow = jl[k]; /* 1st pivot row */ 1720 1721 while (prow < k){ 1722 nextprow = jl[prow]; 1723 /* merge prow into k-th row */ 1724 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1725 jmax = ui[prow+1]; 1726 ncols = jmax-jmin; 1727 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1728 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1729 nzk += nlnk; 1730 1731 /* update il and jl for prow */ 1732 if (jmin < jmax){ 1733 il[prow] = jmin; 1734 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1735 } 1736 prow = nextprow; 1737 } 1738 1739 /* if free space is not available, make more free space */ 1740 if (current_space->local_remaining<nzk) { 1741 i = am - k + 1; /* num of unfactored rows */ 1742 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1743 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1744 reallocs++; 1745 } 1746 1747 /* copy data into free space, then initialize lnk */ 1748 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1749 1750 /* add the k-th row into il and jl */ 1751 if (nzk-1 > 0){ 1752 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1753 jl[k] = jl[i]; jl[i] = k; 1754 il[k] = ui[k] + 1; 1755 } 1756 ui_ptr[k] = current_space->array; 1757 current_space->array += nzk; 1758 current_space->local_used += nzk; 1759 current_space->local_remaining -= nzk; 1760 1761 ui[k+1] = ui[k] + nzk; 1762 } 1763 1764 #if defined(PETSC_USE_INFO) 1765 if (ai[am] != 0) { 1766 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1767 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1768 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1769 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1770 } else { 1771 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1772 } 1773 #endif 1774 1775 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1776 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1777 ierr = PetscFree(jl);CHKERRQ(ierr); 1778 1779 /* destroy list of free space and other temporary array(s) */ 1780 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1781 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1782 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1783 1784 /* put together the new matrix in MATSEQSBAIJ format */ 1785 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1786 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1787 B = *fact; 1788 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1789 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1790 1791 b = (Mat_SeqSBAIJ*)B->data; 1792 b->singlemalloc = PETSC_FALSE; 1793 b->free_a = PETSC_TRUE; 1794 b->free_ij = PETSC_TRUE; 1795 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1796 b->j = uj; 1797 b->i = ui; 1798 b->diag = 0; 1799 b->ilen = 0; 1800 b->imax = 0; 1801 b->row = perm; 1802 b->col = perm; 1803 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1804 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1805 b->icol = iperm; 1806 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1807 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1808 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1809 b->maxnz = b->nz = ui[am]; 1810 1811 B->factor = FACTOR_CHOLESKY; 1812 B->info.factor_mallocs = reallocs; 1813 B->info.fill_ratio_given = fill; 1814 if (ai[am] != 0) { 1815 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1816 } else { 1817 B->info.fill_ratio_needed = 0.0; 1818 } 1819 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1820 if (perm_identity){ 1821 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1822 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1823 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1824 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1825 } else { 1826 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1; 1827 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1828 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1829 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1830 } 1831 PetscFunctionReturn(0); 1832 } 1833