1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 260 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 261 { 262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 263 IS isicol; 264 PetscErrorCode ierr; 265 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 266 PetscInt *bi,*bj,*ajtmp; 267 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 268 PetscReal f; 269 PetscInt nlnk,*lnk,k,**bi_ptr; 270 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 271 PetscBT lnkbt; 272 273 PetscFunctionBegin; 274 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 275 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 276 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 277 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 278 279 /* get new row pointers */ 280 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 281 bi[0] = 0; 282 283 /* bdiag is location of diagonal in factor */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 285 bdiag[0] = 0; 286 287 /* linked list for storing column indices of the active row */ 288 nlnk = n + 1; 289 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 290 291 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 292 293 /* initial FreeSpace size is f*(ai[n]+1) */ 294 f = info->fill; 295 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 296 current_space = free_space; 297 298 for (i=0; i<n; i++) { 299 /* copy previous fill into linked list */ 300 nzi = 0; 301 nnz = ai[r[i]+1] - ai[r[i]]; 302 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 303 ajtmp = aj + ai[r[i]]; 304 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 305 nzi += nlnk; 306 307 /* add pivot rows into linked list */ 308 row = lnk[n]; 309 while (row < i) { 310 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 311 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 312 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 313 nzi += nlnk; 314 row = lnk[row]; 315 } 316 bi[i+1] = bi[i] + nzi; 317 im[i] = nzi; 318 319 /* mark bdiag */ 320 nzbd = 0; 321 nnz = nzi; 322 k = lnk[n]; 323 while (nnz-- && k < i){ 324 nzbd++; 325 k = lnk[k]; 326 } 327 bdiag[i] = bi[i] + nzbd; 328 329 /* if free space is not available, make more free space */ 330 if (current_space->local_remaining<nzi) { 331 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 332 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 333 reallocs++; 334 } 335 336 /* copy data into free space, then initialize lnk */ 337 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 338 bi_ptr[i] = current_space->array; 339 current_space->array += nzi; 340 current_space->local_used += nzi; 341 current_space->local_remaining -= nzi; 342 } 343 #if defined(PETSC_USE_INFO) 344 if (ai[n] != 0) { 345 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 346 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 347 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 348 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 349 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 350 } else { 351 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 352 } 353 #endif 354 355 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 356 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 357 358 /* destroy list of free space and other temporary array(s) */ 359 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 360 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 361 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 362 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 363 364 /* put together the new matrix */ 365 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 366 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 367 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 368 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 369 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 370 b = (Mat_SeqAIJ*)(*B)->data; 371 b->free_a = PETSC_TRUE; 372 b->free_ij = PETSC_TRUE; 373 b->singlemalloc = PETSC_FALSE; 374 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 375 b->j = bj; 376 b->i = bi; 377 b->diag = bdiag; 378 b->ilen = 0; 379 b->imax = 0; 380 b->row = isrow; 381 b->col = iscol; 382 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 383 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 384 b->icol = isicol; 385 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 386 387 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 388 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 389 b->maxnz = b->nz = bi[n] ; 390 391 (*B)->factor = FACTOR_LU; 392 (*B)->info.factor_mallocs = reallocs; 393 (*B)->info.fill_ratio_given = f; 394 395 if (ai[n] != 0) { 396 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 397 } else { 398 (*B)->info.fill_ratio_needed = 0.0; 399 } 400 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 401 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 402 PetscFunctionReturn(0); 403 } 404 405 /* 406 Trouble in factorization, should we dump the original matrix? 407 */ 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatFactorDumpMatrix" 410 PetscErrorCode MatFactorDumpMatrix(Mat A) 411 { 412 PetscErrorCode ierr; 413 PetscTruth flg; 414 415 PetscFunctionBegin; 416 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 417 if (flg) { 418 PetscViewer viewer; 419 char filename[PETSC_MAX_PATH_LEN]; 420 421 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 422 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 423 ierr = MatView(A,viewer);CHKERRQ(ierr); 424 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 /* ----------------------------------------------------------- */ 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 432 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 433 { 434 Mat C=*B; 435 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 436 IS isrow = b->row,isicol = b->icol; 437 PetscErrorCode ierr; 438 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 439 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 440 PetscInt *diag_offset = b->diag,diag,*pj; 441 PetscScalar *rtmp,*pc,multiplier,*rtmps; 442 MatScalar *v,*pv; 443 PetscScalar d; 444 PetscReal rs; 445 LUShift_Ctx sctx; 446 PetscInt newshift,*ddiag; 447 448 PetscFunctionBegin; 449 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 450 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 451 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 452 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 453 rtmps = rtmp; ics = ic; 454 455 sctx.shift_top = 0; 456 sctx.nshift_max = 0; 457 sctx.shift_lo = 0; 458 sctx.shift_hi = 0; 459 460 /* if both shift schemes are chosen by user, only use info->shiftpd */ 461 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 462 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 463 PetscInt *aai = a->i; 464 ddiag = a->diag; 465 sctx.shift_top = 0; 466 for (i=0; i<n; i++) { 467 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 468 d = (a->a)[ddiag[i]]; 469 rs = -PetscAbsScalar(d) - PetscRealPart(d); 470 v = a->a+aai[i]; 471 nz = aai[i+1] - aai[i]; 472 for (j=0; j<nz; j++) 473 rs += PetscAbsScalar(v[j]); 474 if (rs>sctx.shift_top) sctx.shift_top = rs; 475 } 476 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 477 sctx.shift_top *= 1.1; 478 sctx.nshift_max = 5; 479 sctx.shift_lo = 0.; 480 sctx.shift_hi = 1.; 481 } 482 483 sctx.shift_amount = 0; 484 sctx.nshift = 0; 485 do { 486 sctx.lushift = PETSC_FALSE; 487 for (i=0; i<n; i++){ 488 nz = bi[i+1] - bi[i]; 489 bjtmp = bj + bi[i]; 490 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 491 492 /* load in initial (unfactored row) */ 493 nz = a->i[r[i]+1] - a->i[r[i]]; 494 ajtmp = a->j + a->i[r[i]]; 495 v = a->a + a->i[r[i]]; 496 for (j=0; j<nz; j++) { 497 rtmp[ics[ajtmp[j]]] = v[j]; 498 } 499 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 500 501 row = *bjtmp++; 502 while (row < i) { 503 pc = rtmp + row; 504 if (*pc != 0.0) { 505 pv = b->a + diag_offset[row]; 506 pj = b->j + diag_offset[row] + 1; 507 multiplier = *pc / *pv++; 508 *pc = multiplier; 509 nz = bi[row+1] - diag_offset[row] - 1; 510 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 511 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 512 } 513 row = *bjtmp++; 514 } 515 /* finished row so stick it into b->a */ 516 pv = b->a + bi[i] ; 517 pj = b->j + bi[i] ; 518 nz = bi[i+1] - bi[i]; 519 diag = diag_offset[i] - bi[i]; 520 rs = 0.0; 521 for (j=0; j<nz; j++) { 522 pv[j] = rtmps[pj[j]]; 523 if (j != diag) rs += PetscAbsScalar(pv[j]); 524 } 525 526 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 527 sctx.rs = rs; 528 sctx.pv = pv[diag]; 529 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 530 if (newshift == 1) break; 531 } 532 533 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 534 /* 535 * if no shift in this attempt & shifting & started shifting & can refine, 536 * then try lower shift 537 */ 538 sctx.shift_hi = info->shift_fraction; 539 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 540 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 541 sctx.lushift = PETSC_TRUE; 542 sctx.nshift++; 543 } 544 } while (sctx.lushift); 545 546 /* invert diagonal entries for simplier triangular solves */ 547 for (i=0; i<n; i++) { 548 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 549 } 550 551 ierr = PetscFree(rtmp);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 553 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 554 C->factor = FACTOR_LU; 555 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 556 C->assembled = PETSC_TRUE; 557 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 558 if (sctx.nshift){ 559 if (info->shiftnz) { 560 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 561 } else if (info->shiftpd) { 562 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 563 } 564 } 565 PetscFunctionReturn(0); 566 } 567 568 /* 569 This routine implements inplace ILU(0) with row or/and column permutations. 570 Input: 571 A - original matrix 572 Output; 573 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 574 a->j (col index) is permuted by the inverse of colperm, then sorted 575 a->a reordered accordingly with a->j 576 a->diag (ptr to diagonal elements) is updated. 577 */ 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 580 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 581 { 582 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 583 IS isrow = a->row,isicol = a->icol; 584 PetscErrorCode ierr; 585 PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j; 586 PetscInt *ajtmp,nz,row,*ics; 587 PetscInt *diag = a->diag,nbdiag,*pj; 588 PetscScalar *rtmp,*pc,multiplier,d; 589 MatScalar *v,*pv; 590 PetscReal rs; 591 LUShift_Ctx sctx; 592 PetscInt newshift; 593 594 PetscFunctionBegin; 595 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 596 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 597 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 598 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 599 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 600 ics = ic; 601 602 sctx.shift_top = 0; 603 sctx.nshift_max = 0; 604 sctx.shift_lo = 0; 605 sctx.shift_hi = 0; 606 607 /* if both shift schemes are chosen by user, only use info->shiftpd */ 608 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 609 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 610 sctx.shift_top = 0; 611 for (i=0; i<n; i++) { 612 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 613 d = (a->a)[diag[i]]; 614 rs = -PetscAbsScalar(d) - PetscRealPart(d); 615 v = a->a+ai[i]; 616 nz = ai[i+1] - ai[i]; 617 for (j=0; j<nz; j++) 618 rs += PetscAbsScalar(v[j]); 619 if (rs>sctx.shift_top) sctx.shift_top = rs; 620 } 621 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 622 sctx.shift_top *= 1.1; 623 sctx.nshift_max = 5; 624 sctx.shift_lo = 0.; 625 sctx.shift_hi = 1.; 626 } 627 628 sctx.shift_amount = 0; 629 sctx.nshift = 0; 630 do { 631 sctx.lushift = PETSC_FALSE; 632 for (i=0; i<n; i++){ 633 /* load in initial unfactored row */ 634 nz = ai[r[i]+1] - ai[r[i]]; 635 ajtmp = aj + ai[r[i]]; 636 v = a->a + ai[r[i]]; 637 /* sort permuted ajtmp and values v accordingly */ 638 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 639 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 640 641 diag[r[i]] = ai[r[i]]; 642 for (j=0; j<nz; j++) { 643 rtmp[ajtmp[j]] = v[j]; 644 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 645 } 646 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 647 648 row = *ajtmp++; 649 while (row < i) { 650 pc = rtmp + row; 651 if (*pc != 0.0) { 652 pv = a->a + diag[r[row]]; 653 pj = aj + diag[r[row]] + 1; 654 655 multiplier = *pc / *pv++; 656 *pc = multiplier; 657 nz = ai[r[row]+1] - diag[r[row]] - 1; 658 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 659 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 660 } 661 row = *ajtmp++; 662 } 663 /* finished row so overwrite it onto a->a */ 664 pv = a->a + ai[r[i]] ; 665 pj = aj + ai[r[i]] ; 666 nz = ai[r[i]+1] - ai[r[i]]; 667 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 668 669 rs = 0.0; 670 for (j=0; j<nz; j++) { 671 pv[j] = rtmp[pj[j]]; 672 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 673 } 674 675 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 676 sctx.rs = rs; 677 sctx.pv = pv[nbdiag]; 678 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 679 if (newshift == 1) break; 680 } 681 682 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 683 /* 684 * if no shift in this attempt & shifting & started shifting & can refine, 685 * then try lower shift 686 */ 687 sctx.shift_hi = info->shift_fraction; 688 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 689 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 690 sctx.lushift = PETSC_TRUE; 691 sctx.nshift++; 692 } 693 } while (sctx.lushift); 694 695 /* invert diagonal entries for simplier triangular solves */ 696 for (i=0; i<n; i++) { 697 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 698 } 699 700 ierr = PetscFree(rtmp);CHKERRQ(ierr); 701 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 702 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 703 A->factor = FACTOR_LU; 704 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 705 A->assembled = PETSC_TRUE; 706 ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr); 707 if (sctx.nshift){ 708 if (info->shiftnz) { 709 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 710 } else if (info->shiftpd) { 711 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 712 } 713 } 714 PetscFunctionReturn(0); 715 } 716 717 #undef __FUNCT__ 718 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 719 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 720 { 721 PetscFunctionBegin; 722 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 723 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 724 PetscFunctionReturn(0); 725 } 726 727 728 /* ----------------------------------------------------------- */ 729 #undef __FUNCT__ 730 #define __FUNCT__ "MatLUFactor_SeqAIJ" 731 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 732 { 733 PetscErrorCode ierr; 734 Mat C; 735 736 PetscFunctionBegin; 737 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 738 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 739 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 740 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 741 PetscFunctionReturn(0); 742 } 743 /* ----------------------------------------------------------- */ 744 #undef __FUNCT__ 745 #define __FUNCT__ "MatSolve_SeqAIJ" 746 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 747 { 748 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 749 IS iscol = a->col,isrow = a->row; 750 PetscErrorCode ierr; 751 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 752 PetscInt nz,*rout,*cout; 753 PetscScalar *x,*tmp,*tmps,*aa = a->a,sum,*v; 754 const PetscScalar *b; 755 756 PetscFunctionBegin; 757 if (!n) PetscFunctionReturn(0); 758 759 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 760 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 761 tmp = a->solve_work; 762 763 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 764 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 765 766 /* forward solve the lower triangular */ 767 tmp[0] = b[*r++]; 768 tmps = tmp; 769 for (i=1; i<n; i++) { 770 v = aa + ai[i] ; 771 vi = aj + ai[i] ; 772 nz = a->diag[i] - ai[i]; 773 sum = b[*r++]; 774 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 775 tmp[i] = sum; 776 } 777 778 /* backward solve the upper triangular */ 779 for (i=n-1; i>=0; i--){ 780 v = aa + a->diag[i] + 1; 781 vi = aj + a->diag[i] + 1; 782 nz = ai[i+1] - a->diag[i] - 1; 783 sum = tmp[i]; 784 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 785 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 786 } 787 788 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 789 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 790 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 791 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 792 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 796 #undef __FUNCT__ 797 #define __FUNCT__ "MatMatSolve_SeqAIJ" 798 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 799 { 800 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 801 IS iscol = a->col,isrow = a->row; 802 PetscErrorCode ierr; 803 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 804 PetscInt nz,*rout,*cout,neq; 805 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 806 807 PetscFunctionBegin; 808 if (!n) PetscFunctionReturn(0); 809 810 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 811 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 812 813 tmp = a->solve_work; 814 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 815 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 816 817 for (neq=0; neq<n; neq++){ 818 /* forward solve the lower triangular */ 819 tmp[0] = b[r[0]]; 820 tmps = tmp; 821 for (i=1; i<n; i++) { 822 v = aa + ai[i] ; 823 vi = aj + ai[i] ; 824 nz = a->diag[i] - ai[i]; 825 sum = b[r[i]]; 826 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 827 tmp[i] = sum; 828 } 829 /* backward solve the upper triangular */ 830 for (i=n-1; i>=0; i--){ 831 v = aa + a->diag[i] + 1; 832 vi = aj + a->diag[i] + 1; 833 nz = ai[i+1] - a->diag[i] - 1; 834 sum = tmp[i]; 835 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 836 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 837 } 838 839 b += n; 840 x += n; 841 } 842 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 843 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 844 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 845 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 846 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 847 PetscFunctionReturn(0); 848 } 849 850 #undef __FUNCT__ 851 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 852 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 853 { 854 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 855 IS iscol = a->col,isrow = a->row; 856 PetscErrorCode ierr; 857 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 858 PetscInt nz,*rout,*cout,row; 859 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 860 861 PetscFunctionBegin; 862 if (!n) PetscFunctionReturn(0); 863 864 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 865 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 866 tmp = a->solve_work; 867 868 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 869 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 870 871 /* forward solve the lower triangular */ 872 tmp[0] = b[*r++]; 873 tmps = tmp; 874 for (row=1; row<n; row++) { 875 i = rout[row]; /* permuted row */ 876 v = aa + ai[i] ; 877 vi = aj + ai[i] ; 878 nz = a->diag[i] - ai[i]; 879 sum = b[*r++]; 880 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 881 tmp[row] = sum; 882 } 883 884 /* backward solve the upper triangular */ 885 for (row=n-1; row>=0; row--){ 886 i = rout[row]; /* permuted row */ 887 v = aa + a->diag[i] + 1; 888 vi = aj + a->diag[i] + 1; 889 nz = ai[i+1] - a->diag[i] - 1; 890 sum = tmp[row]; 891 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 892 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 893 } 894 895 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 896 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 897 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 898 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 899 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 900 PetscFunctionReturn(0); 901 } 902 903 /* ----------------------------------------------------------- */ 904 #undef __FUNCT__ 905 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 906 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 907 { 908 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 909 PetscErrorCode ierr; 910 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 911 PetscScalar *x,*b,*aa = a->a; 912 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 913 PetscInt adiag_i,i,*vi,nz,ai_i; 914 PetscScalar *v,sum; 915 #endif 916 917 PetscFunctionBegin; 918 if (!n) PetscFunctionReturn(0); 919 920 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 921 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 922 923 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 924 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 925 #else 926 /* forward solve the lower triangular */ 927 x[0] = b[0]; 928 for (i=1; i<n; i++) { 929 ai_i = ai[i]; 930 v = aa + ai_i; 931 vi = aj + ai_i; 932 nz = adiag[i] - ai_i; 933 sum = b[i]; 934 while (nz--) sum -= *v++ * x[*vi++]; 935 x[i] = sum; 936 } 937 938 /* backward solve the upper triangular */ 939 for (i=n-1; i>=0; i--){ 940 adiag_i = adiag[i]; 941 v = aa + adiag_i + 1; 942 vi = aj + adiag_i + 1; 943 nz = ai[i+1] - adiag_i - 1; 944 sum = x[i]; 945 while (nz--) sum -= *v++ * x[*vi++]; 946 x[i] = sum*aa[adiag_i]; 947 } 948 #endif 949 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 950 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 951 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 952 PetscFunctionReturn(0); 953 } 954 955 #undef __FUNCT__ 956 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 957 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 958 { 959 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 960 IS iscol = a->col,isrow = a->row; 961 PetscErrorCode ierr; 962 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 963 PetscInt nz,*rout,*cout; 964 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 965 966 PetscFunctionBegin; 967 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 968 969 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 970 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 971 tmp = a->solve_work; 972 973 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 974 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 975 976 /* forward solve the lower triangular */ 977 tmp[0] = b[*r++]; 978 for (i=1; i<n; i++) { 979 v = aa + ai[i] ; 980 vi = aj + ai[i] ; 981 nz = a->diag[i] - ai[i]; 982 sum = b[*r++]; 983 while (nz--) sum -= *v++ * tmp[*vi++ ]; 984 tmp[i] = sum; 985 } 986 987 /* backward solve the upper triangular */ 988 for (i=n-1; i>=0; i--){ 989 v = aa + a->diag[i] + 1; 990 vi = aj + a->diag[i] + 1; 991 nz = ai[i+1] - a->diag[i] - 1; 992 sum = tmp[i]; 993 while (nz--) sum -= *v++ * tmp[*vi++ ]; 994 tmp[i] = sum*aa[a->diag[i]]; 995 x[*c--] += tmp[i]; 996 } 997 998 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 999 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1000 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1001 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1002 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1003 1004 PetscFunctionReturn(0); 1005 } 1006 /* -------------------------------------------------------------------*/ 1007 #undef __FUNCT__ 1008 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1009 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1010 { 1011 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1012 IS iscol = a->col,isrow = a->row; 1013 PetscErrorCode ierr; 1014 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1015 PetscInt nz,*rout,*cout,*diag = a->diag; 1016 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 1017 1018 PetscFunctionBegin; 1019 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1020 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1021 tmp = a->solve_work; 1022 1023 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1024 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1025 1026 /* copy the b into temp work space according to permutation */ 1027 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1028 1029 /* forward solve the U^T */ 1030 for (i=0; i<n; i++) { 1031 v = aa + diag[i] ; 1032 vi = aj + diag[i] + 1; 1033 nz = ai[i+1] - diag[i] - 1; 1034 s1 = tmp[i]; 1035 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1036 while (nz--) { 1037 tmp[*vi++ ] -= (*v++)*s1; 1038 } 1039 tmp[i] = s1; 1040 } 1041 1042 /* backward solve the L^T */ 1043 for (i=n-1; i>=0; i--){ 1044 v = aa + diag[i] - 1 ; 1045 vi = aj + diag[i] - 1 ; 1046 nz = diag[i] - ai[i]; 1047 s1 = tmp[i]; 1048 while (nz--) { 1049 tmp[*vi-- ] -= (*v--)*s1; 1050 } 1051 } 1052 1053 /* copy tmp into x according to permutation */ 1054 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1055 1056 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1057 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1058 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1059 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1060 1061 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 #undef __FUNCT__ 1066 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1067 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1068 { 1069 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1070 IS iscol = a->col,isrow = a->row; 1071 PetscErrorCode ierr; 1072 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1073 PetscInt nz,*rout,*cout,*diag = a->diag; 1074 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 1075 1076 PetscFunctionBegin; 1077 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1078 1079 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1080 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1081 tmp = a->solve_work; 1082 1083 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1084 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1085 1086 /* copy the b into temp work space according to permutation */ 1087 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1088 1089 /* forward solve the U^T */ 1090 for (i=0; i<n; i++) { 1091 v = aa + diag[i] ; 1092 vi = aj + diag[i] + 1; 1093 nz = ai[i+1] - diag[i] - 1; 1094 tmp[i] *= *v++; 1095 while (nz--) { 1096 tmp[*vi++ ] -= (*v++)*tmp[i]; 1097 } 1098 } 1099 1100 /* backward solve the L^T */ 1101 for (i=n-1; i>=0; i--){ 1102 v = aa + diag[i] - 1 ; 1103 vi = aj + diag[i] - 1 ; 1104 nz = diag[i] - ai[i]; 1105 while (nz--) { 1106 tmp[*vi-- ] -= (*v--)*tmp[i]; 1107 } 1108 } 1109 1110 /* copy tmp into x according to permutation */ 1111 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1112 1113 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1114 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1115 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1116 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1117 1118 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 /* ----------------------------------------------------------------*/ 1122 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1126 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1127 { 1128 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1129 IS isicol; 1130 PetscErrorCode ierr; 1131 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 1132 PetscInt *bi,*cols,nnz,*cols_lvl; 1133 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1134 PetscInt i,levels,diagonal_fill; 1135 PetscTruth col_identity,row_identity; 1136 PetscReal f; 1137 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1138 PetscBT lnkbt; 1139 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1140 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1141 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1142 PetscTruth missing; 1143 1144 PetscFunctionBegin; 1145 f = info->fill; 1146 levels = (PetscInt)info->levels; 1147 diagonal_fill = (PetscInt)info->diagonal_fill; 1148 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1149 1150 /* special case that simply copies fill pattern */ 1151 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1152 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1153 if (!levels && row_identity && col_identity) { 1154 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1155 (*fact)->factor = FACTOR_LU; 1156 (*fact)->info.factor_mallocs = 0; 1157 (*fact)->info.fill_ratio_given = info->fill; 1158 (*fact)->info.fill_ratio_needed = 1.0; 1159 b = (Mat_SeqAIJ*)(*fact)->data; 1160 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1161 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1162 b->row = isrow; 1163 b->col = iscol; 1164 b->icol = isicol; 1165 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1166 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1167 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1168 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1169 ierr = Mat_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1170 PetscFunctionReturn(0); 1171 } 1172 1173 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1174 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1175 1176 /* get new row pointers */ 1177 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1178 bi[0] = 0; 1179 /* bdiag is location of diagonal in factor */ 1180 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1181 bdiag[0] = 0; 1182 1183 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1184 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1185 1186 /* create a linked list for storing column indices of the active row */ 1187 nlnk = n + 1; 1188 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1189 1190 /* initial FreeSpace size is f*(ai[n]+1) */ 1191 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1192 current_space = free_space; 1193 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1194 current_space_lvl = free_space_lvl; 1195 1196 for (i=0; i<n; i++) { 1197 nzi = 0; 1198 /* copy current row into linked list */ 1199 nnz = ai[r[i]+1] - ai[r[i]]; 1200 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1201 cols = aj + ai[r[i]]; 1202 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1203 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1204 nzi += nlnk; 1205 1206 /* make sure diagonal entry is included */ 1207 if (diagonal_fill && lnk[i] == -1) { 1208 fm = n; 1209 while (lnk[fm] < i) fm = lnk[fm]; 1210 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1211 lnk[fm] = i; 1212 lnk_lvl[i] = 0; 1213 nzi++; dcount++; 1214 } 1215 1216 /* add pivot rows into the active row */ 1217 nzbd = 0; 1218 prow = lnk[n]; 1219 while (prow < i) { 1220 nnz = bdiag[prow]; 1221 cols = bj_ptr[prow] + nnz + 1; 1222 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1223 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1224 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1225 nzi += nlnk; 1226 prow = lnk[prow]; 1227 nzbd++; 1228 } 1229 bdiag[i] = nzbd; 1230 bi[i+1] = bi[i] + nzi; 1231 1232 /* if free space is not available, make more free space */ 1233 if (current_space->local_remaining<nzi) { 1234 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1235 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1236 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1237 reallocs++; 1238 } 1239 1240 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1241 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1242 bj_ptr[i] = current_space->array; 1243 bjlvl_ptr[i] = current_space_lvl->array; 1244 1245 /* make sure the active row i has diagonal entry */ 1246 if (*(bj_ptr[i]+bdiag[i]) != i) { 1247 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1248 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1249 } 1250 1251 current_space->array += nzi; 1252 current_space->local_used += nzi; 1253 current_space->local_remaining -= nzi; 1254 current_space_lvl->array += nzi; 1255 current_space_lvl->local_used += nzi; 1256 current_space_lvl->local_remaining -= nzi; 1257 } 1258 1259 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1260 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1261 1262 /* destroy list of free space and other temporary arrays */ 1263 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1264 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1265 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1266 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1267 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1268 1269 #if defined(PETSC_USE_INFO) 1270 { 1271 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1272 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1273 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1274 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1275 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1276 if (diagonal_fill) { 1277 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1278 } 1279 } 1280 #endif 1281 1282 /* put together the new matrix */ 1283 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 1284 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1285 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 1286 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1287 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1288 b = (Mat_SeqAIJ*)(*fact)->data; 1289 b->free_a = PETSC_TRUE; 1290 b->free_ij = PETSC_TRUE; 1291 b->singlemalloc = PETSC_FALSE; 1292 len = (bi[n] )*sizeof(PetscScalar); 1293 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1294 b->j = bj; 1295 b->i = bi; 1296 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1297 b->diag = bdiag; 1298 b->ilen = 0; 1299 b->imax = 0; 1300 b->row = isrow; 1301 b->col = iscol; 1302 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1303 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1304 b->icol = isicol; 1305 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1306 /* In b structure: Free imax, ilen, old a, old j. 1307 Allocate bdiag, solve_work, new a, new j */ 1308 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1309 b->maxnz = b->nz = bi[n] ; 1310 (*fact)->factor = FACTOR_LU; 1311 (*fact)->info.factor_mallocs = reallocs; 1312 (*fact)->info.fill_ratio_given = f; 1313 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1314 1315 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1316 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1317 1318 PetscFunctionReturn(0); 1319 } 1320 1321 #include "src/mat/impls/sbaij/seq/sbaij.h" 1322 #undef __FUNCT__ 1323 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1324 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1325 { 1326 Mat C = *B; 1327 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1328 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1329 IS ip=b->row,iip = b->icol; 1330 PetscErrorCode ierr; 1331 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1332 PetscInt *ai=a->i,*aj=a->j; 1333 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1334 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1335 PetscReal zeropivot,rs,shiftnz; 1336 PetscReal shiftpd; 1337 ChShift_Ctx sctx; 1338 PetscInt newshift; 1339 1340 PetscFunctionBegin; 1341 1342 shiftnz = info->shiftnz; 1343 shiftpd = info->shiftpd; 1344 zeropivot = info->zeropivot; 1345 1346 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1347 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1348 1349 /* initialization */ 1350 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1351 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1352 jl = il + mbs; 1353 rtmp = (MatScalar*)(jl + mbs); 1354 1355 sctx.shift_amount = 0; 1356 sctx.nshift = 0; 1357 do { 1358 sctx.chshift = PETSC_FALSE; 1359 for (i=0; i<mbs; i++) { 1360 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1361 } 1362 1363 for (k = 0; k<mbs; k++){ 1364 bval = ba + bi[k]; 1365 /* initialize k-th row by the perm[k]-th row of A */ 1366 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1367 for (j = jmin; j < jmax; j++){ 1368 col = riip[aj[j]]; 1369 if (col >= k){ /* only take upper triangular entry */ 1370 rtmp[col] = aa[j]; 1371 *bval++ = 0.0; /* for in-place factorization */ 1372 } 1373 } 1374 /* shift the diagonal of the matrix */ 1375 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1376 1377 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1378 dk = rtmp[k]; 1379 i = jl[k]; /* first row to be added to k_th row */ 1380 1381 while (i < k){ 1382 nexti = jl[i]; /* next row to be added to k_th row */ 1383 1384 /* compute multiplier, update diag(k) and U(i,k) */ 1385 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1386 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1387 dk += uikdi*ba[ili]; 1388 ba[ili] = uikdi; /* -U(i,k) */ 1389 1390 /* add multiple of row i to k-th row */ 1391 jmin = ili + 1; jmax = bi[i+1]; 1392 if (jmin < jmax){ 1393 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1394 /* update il and jl for row i */ 1395 il[i] = jmin; 1396 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1397 } 1398 i = nexti; 1399 } 1400 1401 /* shift the diagonals when zero pivot is detected */ 1402 /* compute rs=sum of abs(off-diagonal) */ 1403 rs = 0.0; 1404 jmin = bi[k]+1; 1405 nz = bi[k+1] - jmin; 1406 bcol = bj + jmin; 1407 while (nz--){ 1408 rs += PetscAbsScalar(rtmp[*bcol]); 1409 bcol++; 1410 } 1411 1412 sctx.rs = rs; 1413 sctx.pv = dk; 1414 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1415 1416 if (newshift == 1) { 1417 if (!sctx.shift_amount) { 1418 sctx.shift_amount = 1e-5; 1419 } 1420 break; 1421 } 1422 1423 /* copy data into U(k,:) */ 1424 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1425 jmin = bi[k]+1; jmax = bi[k+1]; 1426 if (jmin < jmax) { 1427 for (j=jmin; j<jmax; j++){ 1428 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1429 } 1430 /* add the k-th row into il and jl */ 1431 il[k] = jmin; 1432 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1433 } 1434 } 1435 } while (sctx.chshift); 1436 ierr = PetscFree(il);CHKERRQ(ierr); 1437 1438 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1439 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1440 C->factor = FACTOR_CHOLESKY; 1441 C->assembled = PETSC_TRUE; 1442 C->preallocated = PETSC_TRUE; 1443 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1444 if (sctx.nshift){ 1445 if (shiftnz) { 1446 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1447 } else if (shiftpd) { 1448 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1449 } 1450 } 1451 PetscFunctionReturn(0); 1452 } 1453 1454 #undef __FUNCT__ 1455 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1456 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1457 { 1458 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1459 Mat_SeqSBAIJ *b; 1460 Mat B; 1461 PetscErrorCode ierr; 1462 PetscTruth perm_identity,missing; 1463 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1464 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1465 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1466 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1467 PetscReal fill=info->fill,levels=info->levels; 1468 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1469 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1470 PetscBT lnkbt; 1471 IS iperm; 1472 1473 PetscFunctionBegin; 1474 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1475 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1476 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1477 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1478 1479 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1480 ui[0] = 0; 1481 1482 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1483 if (!levels && perm_identity) { 1484 1485 for (i=0; i<am; i++) { 1486 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1487 } 1488 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1489 cols = uj; 1490 for (i=0; i<am; i++) { 1491 aj = a->j + a->diag[i]; 1492 ncols = ui[i+1] - ui[i]; 1493 for (j=0; j<ncols; j++) *cols++ = *aj++; 1494 } 1495 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1496 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1497 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1498 1499 /* initialization */ 1500 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1501 1502 /* jl: linked list for storing indices of the pivot rows 1503 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1504 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1505 il = jl + am; 1506 uj_ptr = (PetscInt**)(il + am); 1507 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1508 for (i=0; i<am; i++){ 1509 jl[i] = am; il[i] = 0; 1510 } 1511 1512 /* create and initialize a linked list for storing column indices of the active row k */ 1513 nlnk = am + 1; 1514 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1515 1516 /* initial FreeSpace size is fill*(ai[am]+1) */ 1517 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1518 current_space = free_space; 1519 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1520 current_space_lvl = free_space_lvl; 1521 1522 for (k=0; k<am; k++){ /* for each active row k */ 1523 /* initialize lnk by the column indices of row rip[k] of A */ 1524 nzk = 0; 1525 ncols = ai[rip[k]+1] - ai[rip[k]]; 1526 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1527 ncols_upper = 0; 1528 for (j=0; j<ncols; j++){ 1529 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1530 if (riip[i] >= k){ /* only take upper triangular entry */ 1531 ajtmp[ncols_upper] = i; 1532 ncols_upper++; 1533 } 1534 } 1535 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1536 nzk += nlnk; 1537 1538 /* update lnk by computing fill-in for each pivot row to be merged in */ 1539 prow = jl[k]; /* 1st pivot row */ 1540 1541 while (prow < k){ 1542 nextprow = jl[prow]; 1543 1544 /* merge prow into k-th row */ 1545 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1546 jmax = ui[prow+1]; 1547 ncols = jmax-jmin; 1548 i = jmin - ui[prow]; 1549 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1550 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1551 j = *(uj - 1); 1552 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1553 nzk += nlnk; 1554 1555 /* update il and jl for prow */ 1556 if (jmin < jmax){ 1557 il[prow] = jmin; 1558 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1559 } 1560 prow = nextprow; 1561 } 1562 1563 /* if free space is not available, make more free space */ 1564 if (current_space->local_remaining<nzk) { 1565 i = am - k + 1; /* num of unfactored rows */ 1566 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1567 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1568 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1569 reallocs++; 1570 } 1571 1572 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1573 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1574 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1575 1576 /* add the k-th row into il and jl */ 1577 if (nzk > 1){ 1578 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1579 jl[k] = jl[i]; jl[i] = k; 1580 il[k] = ui[k] + 1; 1581 } 1582 uj_ptr[k] = current_space->array; 1583 uj_lvl_ptr[k] = current_space_lvl->array; 1584 1585 current_space->array += nzk; 1586 current_space->local_used += nzk; 1587 current_space->local_remaining -= nzk; 1588 1589 current_space_lvl->array += nzk; 1590 current_space_lvl->local_used += nzk; 1591 current_space_lvl->local_remaining -= nzk; 1592 1593 ui[k+1] = ui[k] + nzk; 1594 } 1595 1596 #if defined(PETSC_USE_INFO) 1597 if (ai[am] != 0) { 1598 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1599 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1600 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1601 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1602 } else { 1603 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1604 } 1605 #endif 1606 1607 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1608 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1609 ierr = PetscFree(jl);CHKERRQ(ierr); 1610 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1611 1612 /* destroy list of free space and other temporary array(s) */ 1613 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1614 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1615 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1616 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1617 1618 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1619 1620 /* put together the new matrix in MATSEQSBAIJ format */ 1621 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1622 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1623 B = *fact; 1624 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1625 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1626 1627 b = (Mat_SeqSBAIJ*)B->data; 1628 b->singlemalloc = PETSC_FALSE; 1629 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1630 b->j = uj; 1631 b->i = ui; 1632 b->diag = 0; 1633 b->ilen = 0; 1634 b->imax = 0; 1635 b->row = perm; 1636 b->col = perm; 1637 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1638 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1639 b->icol = iperm; 1640 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1641 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1642 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1643 b->maxnz = b->nz = ui[am]; 1644 b->free_a = PETSC_TRUE; 1645 b->free_ij = PETSC_TRUE; 1646 1647 B->factor = FACTOR_CHOLESKY; 1648 B->info.factor_mallocs = reallocs; 1649 B->info.fill_ratio_given = fill; 1650 if (ai[am] != 0) { 1651 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1652 } else { 1653 B->info.fill_ratio_needed = 0.0; 1654 } 1655 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1656 if (perm_identity){ 1657 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1658 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1659 } 1660 PetscFunctionReturn(0); 1661 } 1662 1663 #undef __FUNCT__ 1664 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1665 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1666 { 1667 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1668 Mat_SeqSBAIJ *b; 1669 Mat B; 1670 PetscErrorCode ierr; 1671 PetscTruth perm_identity; 1672 PetscReal fill = info->fill; 1673 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1674 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1675 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1676 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1677 PetscBT lnkbt; 1678 IS iperm; 1679 1680 PetscFunctionBegin; 1681 /* check whether perm is the identity mapping */ 1682 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1683 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1684 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1685 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1686 1687 /* initialization */ 1688 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1689 ui[0] = 0; 1690 1691 /* jl: linked list for storing indices of the pivot rows 1692 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1693 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1694 il = jl + am; 1695 cols = il + am; 1696 ui_ptr = (PetscInt**)(cols + am); 1697 for (i=0; i<am; i++){ 1698 jl[i] = am; il[i] = 0; 1699 } 1700 1701 /* create and initialize a linked list for storing column indices of the active row k */ 1702 nlnk = am + 1; 1703 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1704 1705 /* initial FreeSpace size is fill*(ai[am]+1) */ 1706 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1707 current_space = free_space; 1708 1709 for (k=0; k<am; k++){ /* for each active row k */ 1710 /* initialize lnk by the column indices of row rip[k] of A */ 1711 nzk = 0; 1712 ncols = ai[rip[k]+1] - ai[rip[k]]; 1713 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1714 ncols_upper = 0; 1715 for (j=0; j<ncols; j++){ 1716 i = riip[*(aj + ai[rip[k]] + j)]; 1717 if (i >= k){ /* only take upper triangular entry */ 1718 cols[ncols_upper] = i; 1719 ncols_upper++; 1720 } 1721 } 1722 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1723 nzk += nlnk; 1724 1725 /* update lnk by computing fill-in for each pivot row to be merged in */ 1726 prow = jl[k]; /* 1st pivot row */ 1727 1728 while (prow < k){ 1729 nextprow = jl[prow]; 1730 /* merge prow into k-th row */ 1731 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1732 jmax = ui[prow+1]; 1733 ncols = jmax-jmin; 1734 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1735 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1736 nzk += nlnk; 1737 1738 /* update il and jl for prow */ 1739 if (jmin < jmax){ 1740 il[prow] = jmin; 1741 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1742 } 1743 prow = nextprow; 1744 } 1745 1746 /* if free space is not available, make more free space */ 1747 if (current_space->local_remaining<nzk) { 1748 i = am - k + 1; /* num of unfactored rows */ 1749 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1750 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1751 reallocs++; 1752 } 1753 1754 /* copy data into free space, then initialize lnk */ 1755 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1756 1757 /* add the k-th row into il and jl */ 1758 if (nzk-1 > 0){ 1759 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1760 jl[k] = jl[i]; jl[i] = k; 1761 il[k] = ui[k] + 1; 1762 } 1763 ui_ptr[k] = current_space->array; 1764 current_space->array += nzk; 1765 current_space->local_used += nzk; 1766 current_space->local_remaining -= nzk; 1767 1768 ui[k+1] = ui[k] + nzk; 1769 } 1770 1771 #if defined(PETSC_USE_INFO) 1772 if (ai[am] != 0) { 1773 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1774 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1775 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1776 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1777 } else { 1778 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1779 } 1780 #endif 1781 1782 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1783 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1784 ierr = PetscFree(jl);CHKERRQ(ierr); 1785 1786 /* destroy list of free space and other temporary array(s) */ 1787 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1788 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1789 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1790 1791 /* put together the new matrix in MATSEQSBAIJ format */ 1792 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1793 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1794 B = *fact; 1795 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1796 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1797 1798 b = (Mat_SeqSBAIJ*)B->data; 1799 b->singlemalloc = PETSC_FALSE; 1800 b->free_a = PETSC_TRUE; 1801 b->free_ij = PETSC_TRUE; 1802 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1803 b->j = uj; 1804 b->i = ui; 1805 b->diag = 0; 1806 b->ilen = 0; 1807 b->imax = 0; 1808 b->row = perm; 1809 b->col = perm; 1810 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1811 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1812 b->icol = iperm; 1813 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1814 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1815 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1816 b->maxnz = b->nz = ui[am]; 1817 1818 B->factor = FACTOR_CHOLESKY; 1819 B->info.factor_mallocs = reallocs; 1820 B->info.fill_ratio_given = fill; 1821 if (ai[am] != 0) { 1822 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1823 } else { 1824 B->info.fill_ratio_needed = 0.0; 1825 } 1826 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1827 if (perm_identity){ 1828 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1829 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1830 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1831 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1832 } else { 1833 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1; 1834 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1835 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1836 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1837 } 1838 PetscFunctionReturn(0); 1839 } 1840