1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 260 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 261 { 262 PetscInt n = A->rmap.n; 263 PetscErrorCode ierr; 264 265 PetscFunctionBegin; 266 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 267 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 268 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 269 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 270 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 271 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 272 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 273 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 274 PetscFunctionReturn(0); 275 } 276 277 #undef __FUNCT__ 278 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 279 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 280 { 281 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 282 IS isicol; 283 PetscErrorCode ierr; 284 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 285 PetscInt *bi,*bj,*ajtmp; 286 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 287 PetscReal f; 288 PetscInt nlnk,*lnk,k,**bi_ptr; 289 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 290 PetscBT lnkbt; 291 292 PetscFunctionBegin; 293 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 294 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 295 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 296 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 297 298 /* get new row pointers */ 299 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 300 bi[0] = 0; 301 302 /* bdiag is location of diagonal in factor */ 303 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 304 bdiag[0] = 0; 305 306 /* linked list for storing column indices of the active row */ 307 nlnk = n + 1; 308 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 309 310 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 311 312 /* initial FreeSpace size is f*(ai[n]+1) */ 313 f = info->fill; 314 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 315 current_space = free_space; 316 317 for (i=0; i<n; i++) { 318 /* copy previous fill into linked list */ 319 nzi = 0; 320 nnz = ai[r[i]+1] - ai[r[i]]; 321 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 322 ajtmp = aj + ai[r[i]]; 323 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 324 nzi += nlnk; 325 326 /* add pivot rows into linked list */ 327 row = lnk[n]; 328 while (row < i) { 329 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 330 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 331 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 332 nzi += nlnk; 333 row = lnk[row]; 334 } 335 bi[i+1] = bi[i] + nzi; 336 im[i] = nzi; 337 338 /* mark bdiag */ 339 nzbd = 0; 340 nnz = nzi; 341 k = lnk[n]; 342 while (nnz-- && k < i){ 343 nzbd++; 344 k = lnk[k]; 345 } 346 bdiag[i] = bi[i] + nzbd; 347 348 /* if free space is not available, make more free space */ 349 if (current_space->local_remaining<nzi) { 350 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 351 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 352 reallocs++; 353 } 354 355 /* copy data into free space, then initialize lnk */ 356 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 357 bi_ptr[i] = current_space->array; 358 current_space->array += nzi; 359 current_space->local_used += nzi; 360 current_space->local_remaining -= nzi; 361 } 362 #if defined(PETSC_USE_INFO) 363 if (ai[n] != 0) { 364 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 365 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 366 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 367 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 368 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 369 } else { 370 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 371 } 372 #endif 373 374 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 375 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 376 377 /* destroy list of free space and other temporary array(s) */ 378 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 379 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 380 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 381 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 382 383 /* put together the new matrix */ 384 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 385 b = (Mat_SeqAIJ*)(*B)->data; 386 b->free_a = PETSC_TRUE; 387 b->free_ij = PETSC_TRUE; 388 b->singlemalloc = PETSC_FALSE; 389 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 390 b->j = bj; 391 b->i = bi; 392 b->diag = bdiag; 393 b->ilen = 0; 394 b->imax = 0; 395 b->row = isrow; 396 b->col = iscol; 397 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 398 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 399 b->icol = isicol; 400 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 401 402 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 403 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 404 b->maxnz = b->nz = bi[n] ; 405 406 (*B)->factor = FACTOR_LU; 407 (*B)->info.factor_mallocs = reallocs; 408 (*B)->info.fill_ratio_given = f; 409 410 if (ai[n] != 0) { 411 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 412 } else { 413 (*B)->info.fill_ratio_needed = 0.0; 414 } 415 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 416 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Trouble in factorization, should we dump the original matrix? 422 */ 423 #undef __FUNCT__ 424 #define __FUNCT__ "MatFactorDumpMatrix" 425 PetscErrorCode MatFactorDumpMatrix(Mat A) 426 { 427 PetscErrorCode ierr; 428 PetscTruth flg; 429 430 PetscFunctionBegin; 431 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 432 if (flg) { 433 PetscViewer viewer; 434 char filename[PETSC_MAX_PATH_LEN]; 435 436 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 437 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 438 ierr = MatView(A,viewer);CHKERRQ(ierr); 439 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 440 } 441 PetscFunctionReturn(0); 442 } 443 444 /* ----------------------------------------------------------- */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 447 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 448 { 449 Mat C=*B; 450 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 451 IS isrow = b->row,isicol = b->icol; 452 PetscErrorCode ierr; 453 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 454 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 455 PetscInt *diag_offset = b->diag,diag,*pj; 456 PetscScalar *rtmp,*pc,multiplier,*rtmps; 457 MatScalar *v,*pv; 458 PetscScalar d; 459 PetscReal rs; 460 LUShift_Ctx sctx; 461 PetscInt newshift,*ddiag; 462 463 PetscFunctionBegin; 464 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 465 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 466 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 467 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 468 rtmps = rtmp; ics = ic; 469 470 sctx.shift_top = 0; 471 sctx.nshift_max = 0; 472 sctx.shift_lo = 0; 473 sctx.shift_hi = 0; 474 475 /* if both shift schemes are chosen by user, only use info->shiftpd */ 476 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 477 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 478 PetscInt *aai = a->i; 479 ddiag = a->diag; 480 sctx.shift_top = 0; 481 for (i=0; i<n; i++) { 482 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 483 d = (a->a)[ddiag[i]]; 484 rs = -PetscAbsScalar(d) - PetscRealPart(d); 485 v = a->a+aai[i]; 486 nz = aai[i+1] - aai[i]; 487 for (j=0; j<nz; j++) 488 rs += PetscAbsScalar(v[j]); 489 if (rs>sctx.shift_top) sctx.shift_top = rs; 490 } 491 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 492 sctx.shift_top *= 1.1; 493 sctx.nshift_max = 5; 494 sctx.shift_lo = 0.; 495 sctx.shift_hi = 1.; 496 } 497 498 sctx.shift_amount = 0; 499 sctx.nshift = 0; 500 do { 501 sctx.lushift = PETSC_FALSE; 502 for (i=0; i<n; i++){ 503 nz = bi[i+1] - bi[i]; 504 bjtmp = bj + bi[i]; 505 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 506 507 /* load in initial (unfactored row) */ 508 nz = a->i[r[i]+1] - a->i[r[i]]; 509 ajtmp = a->j + a->i[r[i]]; 510 v = a->a + a->i[r[i]]; 511 for (j=0; j<nz; j++) { 512 rtmp[ics[ajtmp[j]]] = v[j]; 513 } 514 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 515 516 row = *bjtmp++; 517 while (row < i) { 518 pc = rtmp + row; 519 if (*pc != 0.0) { 520 pv = b->a + diag_offset[row]; 521 pj = b->j + diag_offset[row] + 1; 522 multiplier = *pc / *pv++; 523 *pc = multiplier; 524 nz = bi[row+1] - diag_offset[row] - 1; 525 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 526 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 527 } 528 row = *bjtmp++; 529 } 530 /* finished row so stick it into b->a */ 531 pv = b->a + bi[i] ; 532 pj = b->j + bi[i] ; 533 nz = bi[i+1] - bi[i]; 534 diag = diag_offset[i] - bi[i]; 535 rs = 0.0; 536 for (j=0; j<nz; j++) { 537 pv[j] = rtmps[pj[j]]; 538 if (j != diag) rs += PetscAbsScalar(pv[j]); 539 } 540 541 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 542 sctx.rs = rs; 543 sctx.pv = pv[diag]; 544 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 545 if (newshift == 1) break; 546 } 547 548 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 549 /* 550 * if no shift in this attempt & shifting & started shifting & can refine, 551 * then try lower shift 552 */ 553 sctx.shift_hi = info->shift_fraction; 554 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 555 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 556 sctx.lushift = PETSC_TRUE; 557 sctx.nshift++; 558 } 559 } while (sctx.lushift); 560 561 /* invert diagonal entries for simplier triangular solves */ 562 for (i=0; i<n; i++) { 563 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 564 } 565 566 ierr = PetscFree(rtmp);CHKERRQ(ierr); 567 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 568 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 569 C->factor = FACTOR_LU; 570 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 571 C->assembled = PETSC_TRUE; 572 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 573 if (sctx.nshift){ 574 if (info->shiftnz) { 575 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 576 } else if (info->shiftpd) { 577 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 578 } 579 } 580 PetscFunctionReturn(0); 581 } 582 583 /* 584 This routine implements inplace ILU(0) with row or/and column permutations. 585 Input: 586 A - original matrix 587 Output; 588 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 589 a->j (col index) is permuted by the inverse of colperm, then sorted 590 a->a reordered accordingly with a->j 591 a->diag (ptr to diagonal elements) is updated. 592 */ 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 595 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 596 { 597 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 598 IS isrow = a->row,isicol = a->icol; 599 PetscErrorCode ierr; 600 PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j; 601 PetscInt *ajtmp,nz,row,*ics; 602 PetscInt *diag = a->diag,nbdiag,*pj; 603 PetscScalar *rtmp,*pc,multiplier,d; 604 MatScalar *v,*pv; 605 PetscReal rs; 606 LUShift_Ctx sctx; 607 PetscInt newshift; 608 609 PetscFunctionBegin; 610 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 611 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 612 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 613 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 614 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 615 ics = ic; 616 617 sctx.shift_top = 0; 618 sctx.nshift_max = 0; 619 sctx.shift_lo = 0; 620 sctx.shift_hi = 0; 621 622 /* if both shift schemes are chosen by user, only use info->shiftpd */ 623 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 624 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 625 sctx.shift_top = 0; 626 for (i=0; i<n; i++) { 627 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 628 d = (a->a)[diag[i]]; 629 rs = -PetscAbsScalar(d) - PetscRealPart(d); 630 v = a->a+ai[i]; 631 nz = ai[i+1] - ai[i]; 632 for (j=0; j<nz; j++) 633 rs += PetscAbsScalar(v[j]); 634 if (rs>sctx.shift_top) sctx.shift_top = rs; 635 } 636 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 637 sctx.shift_top *= 1.1; 638 sctx.nshift_max = 5; 639 sctx.shift_lo = 0.; 640 sctx.shift_hi = 1.; 641 } 642 643 sctx.shift_amount = 0; 644 sctx.nshift = 0; 645 do { 646 sctx.lushift = PETSC_FALSE; 647 for (i=0; i<n; i++){ 648 /* load in initial unfactored row */ 649 nz = ai[r[i]+1] - ai[r[i]]; 650 ajtmp = aj + ai[r[i]]; 651 v = a->a + ai[r[i]]; 652 /* sort permuted ajtmp and values v accordingly */ 653 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 654 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 655 656 diag[r[i]] = ai[r[i]]; 657 for (j=0; j<nz; j++) { 658 rtmp[ajtmp[j]] = v[j]; 659 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 660 } 661 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 662 663 row = *ajtmp++; 664 while (row < i) { 665 pc = rtmp + row; 666 if (*pc != 0.0) { 667 pv = a->a + diag[r[row]]; 668 pj = aj + diag[r[row]] + 1; 669 670 multiplier = *pc / *pv++; 671 *pc = multiplier; 672 nz = ai[r[row]+1] - diag[r[row]] - 1; 673 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 674 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 675 } 676 row = *ajtmp++; 677 } 678 /* finished row so overwrite it onto a->a */ 679 pv = a->a + ai[r[i]] ; 680 pj = aj + ai[r[i]] ; 681 nz = ai[r[i]+1] - ai[r[i]]; 682 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 683 684 rs = 0.0; 685 for (j=0; j<nz; j++) { 686 pv[j] = rtmp[pj[j]]; 687 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 688 } 689 690 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 691 sctx.rs = rs; 692 sctx.pv = pv[nbdiag]; 693 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 694 if (newshift == 1) break; 695 } 696 697 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 698 /* 699 * if no shift in this attempt & shifting & started shifting & can refine, 700 * then try lower shift 701 */ 702 sctx.shift_hi = info->shift_fraction; 703 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 704 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 705 sctx.lushift = PETSC_TRUE; 706 sctx.nshift++; 707 } 708 } while (sctx.lushift); 709 710 /* invert diagonal entries for simplier triangular solves */ 711 for (i=0; i<n; i++) { 712 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 713 } 714 715 ierr = PetscFree(rtmp);CHKERRQ(ierr); 716 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 717 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 718 A->factor = FACTOR_LU; 719 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 720 A->assembled = PETSC_TRUE; 721 ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr); 722 if (sctx.nshift){ 723 if (info->shiftnz) { 724 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 725 } else if (info->shiftpd) { 726 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 727 } 728 } 729 PetscFunctionReturn(0); 730 } 731 732 /* ----------------------------------------------------------- */ 733 #undef __FUNCT__ 734 #define __FUNCT__ "MatLUFactor_SeqAIJ" 735 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 736 { 737 PetscErrorCode ierr; 738 Mat C; 739 740 PetscFunctionBegin; 741 ierr = MatGetFactor(A,"petsc",MAT_FACTOR_LU,&C);CHKERRQ(ierr); 742 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 743 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 744 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 745 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 746 PetscFunctionReturn(0); 747 } 748 /* ----------------------------------------------------------- */ 749 #undef __FUNCT__ 750 #define __FUNCT__ "MatSolve_SeqAIJ" 751 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 752 { 753 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 754 IS iscol = a->col,isrow = a->row; 755 PetscErrorCode ierr; 756 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 757 PetscInt nz,*rout,*cout; 758 PetscScalar *x,*tmp,*tmps,sum; 759 const PetscScalar *b; 760 const MatScalar *aa = a->a,*v; 761 762 PetscFunctionBegin; 763 if (!n) PetscFunctionReturn(0); 764 765 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 766 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 767 tmp = a->solve_work; 768 769 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 770 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 771 772 /* forward solve the lower triangular */ 773 tmp[0] = b[*r++]; 774 tmps = tmp; 775 for (i=1; i<n; i++) { 776 v = aa + ai[i] ; 777 vi = aj + ai[i] ; 778 nz = a->diag[i] - ai[i]; 779 sum = b[*r++]; 780 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 781 tmp[i] = sum; 782 } 783 784 /* backward solve the upper triangular */ 785 for (i=n-1; i>=0; i--){ 786 v = aa + a->diag[i] + 1; 787 vi = aj + a->diag[i] + 1; 788 nz = ai[i+1] - a->diag[i] - 1; 789 sum = tmp[i]; 790 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 791 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 792 } 793 794 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 795 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 796 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 797 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 798 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 799 PetscFunctionReturn(0); 800 } 801 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatMatSolve_SeqAIJ" 804 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 805 { 806 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 807 IS iscol = a->col,isrow = a->row; 808 PetscErrorCode ierr; 809 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 810 PetscInt nz,*rout,*cout,neq; 811 PetscScalar *x,*b,*tmp,*tmps,sum; 812 const MatScalar *aa = a->a,*v; 813 814 PetscFunctionBegin; 815 if (!n) PetscFunctionReturn(0); 816 817 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 818 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 819 820 tmp = a->solve_work; 821 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 822 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 823 824 for (neq=0; neq<n; neq++){ 825 /* forward solve the lower triangular */ 826 tmp[0] = b[r[0]]; 827 tmps = tmp; 828 for (i=1; i<n; i++) { 829 v = aa + ai[i] ; 830 vi = aj + ai[i] ; 831 nz = a->diag[i] - ai[i]; 832 sum = b[r[i]]; 833 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 834 tmp[i] = sum; 835 } 836 /* backward solve the upper triangular */ 837 for (i=n-1; i>=0; i--){ 838 v = aa + a->diag[i] + 1; 839 vi = aj + a->diag[i] + 1; 840 nz = ai[i+1] - a->diag[i] - 1; 841 sum = tmp[i]; 842 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 843 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 844 } 845 846 b += n; 847 x += n; 848 } 849 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 850 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 851 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 852 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 853 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 859 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 860 { 861 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 862 IS iscol = a->col,isrow = a->row; 863 PetscErrorCode ierr; 864 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 865 PetscInt nz,*rout,*cout,row; 866 PetscScalar *x,*b,*tmp,*tmps,sum; 867 const MatScalar *aa = a->a,*v; 868 869 PetscFunctionBegin; 870 if (!n) PetscFunctionReturn(0); 871 872 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 873 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 874 tmp = a->solve_work; 875 876 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 877 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 878 879 /* forward solve the lower triangular */ 880 tmp[0] = b[*r++]; 881 tmps = tmp; 882 for (row=1; row<n; row++) { 883 i = rout[row]; /* permuted row */ 884 v = aa + ai[i] ; 885 vi = aj + ai[i] ; 886 nz = a->diag[i] - ai[i]; 887 sum = b[*r++]; 888 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 889 tmp[row] = sum; 890 } 891 892 /* backward solve the upper triangular */ 893 for (row=n-1; row>=0; row--){ 894 i = rout[row]; /* permuted row */ 895 v = aa + a->diag[i] + 1; 896 vi = aj + a->diag[i] + 1; 897 nz = ai[i+1] - a->diag[i] - 1; 898 sum = tmp[row]; 899 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 900 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 901 } 902 903 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 904 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 905 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 906 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 907 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 /* ----------------------------------------------------------- */ 912 #undef __FUNCT__ 913 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 914 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 915 { 916 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 917 PetscErrorCode ierr; 918 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 919 PetscScalar *x,*b; 920 const MatScalar *aa = a->a; 921 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 922 PetscInt adiag_i,i,*vi,nz,ai_i; 923 const MatScalar *v; 924 PetscScalar sum; 925 #endif 926 927 PetscFunctionBegin; 928 if (!n) PetscFunctionReturn(0); 929 930 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 931 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 932 933 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 934 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 935 #else 936 /* forward solve the lower triangular */ 937 x[0] = b[0]; 938 for (i=1; i<n; i++) { 939 ai_i = ai[i]; 940 v = aa + ai_i; 941 vi = aj + ai_i; 942 nz = adiag[i] - ai_i; 943 sum = b[i]; 944 while (nz--) sum -= *v++ * x[*vi++]; 945 x[i] = sum; 946 } 947 948 /* backward solve the upper triangular */ 949 for (i=n-1; i>=0; i--){ 950 adiag_i = adiag[i]; 951 v = aa + adiag_i + 1; 952 vi = aj + adiag_i + 1; 953 nz = ai[i+1] - adiag_i - 1; 954 sum = x[i]; 955 while (nz--) sum -= *v++ * x[*vi++]; 956 x[i] = sum*aa[adiag_i]; 957 } 958 #endif 959 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 960 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 961 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 965 #undef __FUNCT__ 966 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 967 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 968 { 969 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 970 IS iscol = a->col,isrow = a->row; 971 PetscErrorCode ierr; 972 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 973 PetscInt nz,*rout,*cout; 974 PetscScalar *x,*b,*tmp,sum; 975 const MatScalar *aa = a->a,*v; 976 977 PetscFunctionBegin; 978 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 979 980 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 981 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 982 tmp = a->solve_work; 983 984 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 985 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 986 987 /* forward solve the lower triangular */ 988 tmp[0] = b[*r++]; 989 for (i=1; i<n; i++) { 990 v = aa + ai[i] ; 991 vi = aj + ai[i] ; 992 nz = a->diag[i] - ai[i]; 993 sum = b[*r++]; 994 while (nz--) sum -= *v++ * tmp[*vi++ ]; 995 tmp[i] = sum; 996 } 997 998 /* backward solve the upper triangular */ 999 for (i=n-1; i>=0; i--){ 1000 v = aa + a->diag[i] + 1; 1001 vi = aj + a->diag[i] + 1; 1002 nz = ai[i+1] - a->diag[i] - 1; 1003 sum = tmp[i]; 1004 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1005 tmp[i] = sum*aa[a->diag[i]]; 1006 x[*c--] += tmp[i]; 1007 } 1008 1009 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1010 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1011 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1012 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1013 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1014 1015 PetscFunctionReturn(0); 1016 } 1017 /* -------------------------------------------------------------------*/ 1018 #undef __FUNCT__ 1019 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1020 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1021 { 1022 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1023 IS iscol = a->col,isrow = a->row; 1024 PetscErrorCode ierr; 1025 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1026 PetscInt nz,*rout,*cout,*diag = a->diag; 1027 PetscScalar *x,*b,*tmp,s1; 1028 const MatScalar *aa = a->a,*v; 1029 1030 PetscFunctionBegin; 1031 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1032 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1033 tmp = a->solve_work; 1034 1035 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1036 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1037 1038 /* copy the b into temp work space according to permutation */ 1039 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1040 1041 /* forward solve the U^T */ 1042 for (i=0; i<n; i++) { 1043 v = aa + diag[i] ; 1044 vi = aj + diag[i] + 1; 1045 nz = ai[i+1] - diag[i] - 1; 1046 s1 = tmp[i]; 1047 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1048 while (nz--) { 1049 tmp[*vi++ ] -= (*v++)*s1; 1050 } 1051 tmp[i] = s1; 1052 } 1053 1054 /* backward solve the L^T */ 1055 for (i=n-1; i>=0; i--){ 1056 v = aa + diag[i] - 1 ; 1057 vi = aj + diag[i] - 1 ; 1058 nz = diag[i] - ai[i]; 1059 s1 = tmp[i]; 1060 while (nz--) { 1061 tmp[*vi-- ] -= (*v--)*s1; 1062 } 1063 } 1064 1065 /* copy tmp into x according to permutation */ 1066 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1067 1068 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1069 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1070 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1071 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1072 1073 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 1074 PetscFunctionReturn(0); 1075 } 1076 1077 #undef __FUNCT__ 1078 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1079 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1080 { 1081 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1082 IS iscol = a->col,isrow = a->row; 1083 PetscErrorCode ierr; 1084 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1085 PetscInt nz,*rout,*cout,*diag = a->diag; 1086 PetscScalar *x,*b,*tmp; 1087 const MatScalar *aa = a->a,*v; 1088 1089 PetscFunctionBegin; 1090 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1091 1092 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1093 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1094 tmp = a->solve_work; 1095 1096 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1097 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1098 1099 /* copy the b into temp work space according to permutation */ 1100 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1101 1102 /* forward solve the U^T */ 1103 for (i=0; i<n; i++) { 1104 v = aa + diag[i] ; 1105 vi = aj + diag[i] + 1; 1106 nz = ai[i+1] - diag[i] - 1; 1107 tmp[i] *= *v++; 1108 while (nz--) { 1109 tmp[*vi++ ] -= (*v++)*tmp[i]; 1110 } 1111 } 1112 1113 /* backward solve the L^T */ 1114 for (i=n-1; i>=0; i--){ 1115 v = aa + diag[i] - 1 ; 1116 vi = aj + diag[i] - 1 ; 1117 nz = diag[i] - ai[i]; 1118 while (nz--) { 1119 tmp[*vi-- ] -= (*v--)*tmp[i]; 1120 } 1121 } 1122 1123 /* copy tmp into x according to permutation */ 1124 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1125 1126 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1127 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1128 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1129 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1130 1131 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 /* ----------------------------------------------------------------*/ 1135 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1136 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,MatDuplicateOption,Mat*); 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1140 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1141 { 1142 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1143 IS isicol; 1144 PetscErrorCode ierr; 1145 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 1146 PetscInt *bi,*cols,nnz,*cols_lvl; 1147 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1148 PetscInt i,levels,diagonal_fill; 1149 PetscTruth col_identity,row_identity; 1150 PetscReal f; 1151 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1152 PetscBT lnkbt; 1153 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1154 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1155 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1156 PetscTruth missing; 1157 1158 PetscFunctionBegin; 1159 f = info->fill; 1160 levels = (PetscInt)info->levels; 1161 diagonal_fill = (PetscInt)info->diagonal_fill; 1162 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1163 1164 /* special case that simply copies fill pattern */ 1165 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1166 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1167 if (!levels && row_identity && col_identity) { 1168 ierr = MatDuplicateNoCreate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1169 (*fact)->factor = FACTOR_LU; 1170 (*fact)->info.factor_mallocs = 0; 1171 (*fact)->info.fill_ratio_given = info->fill; 1172 (*fact)->info.fill_ratio_needed = 1.0; 1173 b = (Mat_SeqAIJ*)(*fact)->data; 1174 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1175 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1176 b->row = isrow; 1177 b->col = iscol; 1178 b->icol = isicol; 1179 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1180 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1181 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1182 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1183 ierr = Mat_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1184 PetscFunctionReturn(0); 1185 } 1186 1187 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1188 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1189 1190 /* get new row pointers */ 1191 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1192 bi[0] = 0; 1193 /* bdiag is location of diagonal in factor */ 1194 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1195 bdiag[0] = 0; 1196 1197 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1198 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1199 1200 /* create a linked list for storing column indices of the active row */ 1201 nlnk = n + 1; 1202 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1203 1204 /* initial FreeSpace size is f*(ai[n]+1) */ 1205 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1206 current_space = free_space; 1207 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1208 current_space_lvl = free_space_lvl; 1209 1210 for (i=0; i<n; i++) { 1211 nzi = 0; 1212 /* copy current row into linked list */ 1213 nnz = ai[r[i]+1] - ai[r[i]]; 1214 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1215 cols = aj + ai[r[i]]; 1216 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1217 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1218 nzi += nlnk; 1219 1220 /* make sure diagonal entry is included */ 1221 if (diagonal_fill && lnk[i] == -1) { 1222 fm = n; 1223 while (lnk[fm] < i) fm = lnk[fm]; 1224 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1225 lnk[fm] = i; 1226 lnk_lvl[i] = 0; 1227 nzi++; dcount++; 1228 } 1229 1230 /* add pivot rows into the active row */ 1231 nzbd = 0; 1232 prow = lnk[n]; 1233 while (prow < i) { 1234 nnz = bdiag[prow]; 1235 cols = bj_ptr[prow] + nnz + 1; 1236 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1237 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1238 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1239 nzi += nlnk; 1240 prow = lnk[prow]; 1241 nzbd++; 1242 } 1243 bdiag[i] = nzbd; 1244 bi[i+1] = bi[i] + nzi; 1245 1246 /* if free space is not available, make more free space */ 1247 if (current_space->local_remaining<nzi) { 1248 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1249 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1250 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1251 reallocs++; 1252 } 1253 1254 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1255 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1256 bj_ptr[i] = current_space->array; 1257 bjlvl_ptr[i] = current_space_lvl->array; 1258 1259 /* make sure the active row i has diagonal entry */ 1260 if (*(bj_ptr[i]+bdiag[i]) != i) { 1261 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1262 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1263 } 1264 1265 current_space->array += nzi; 1266 current_space->local_used += nzi; 1267 current_space->local_remaining -= nzi; 1268 current_space_lvl->array += nzi; 1269 current_space_lvl->local_used += nzi; 1270 current_space_lvl->local_remaining -= nzi; 1271 } 1272 1273 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1274 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1275 1276 /* destroy list of free space and other temporary arrays */ 1277 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1278 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1279 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1280 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1281 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1282 1283 #if defined(PETSC_USE_INFO) 1284 { 1285 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1286 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1287 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1288 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1289 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1290 if (diagonal_fill) { 1291 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1292 } 1293 } 1294 #endif 1295 1296 /* put together the new matrix */ 1297 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1298 b = (Mat_SeqAIJ*)(*fact)->data; 1299 b->free_a = PETSC_TRUE; 1300 b->free_ij = PETSC_TRUE; 1301 b->singlemalloc = PETSC_FALSE; 1302 len = (bi[n] )*sizeof(PetscScalar); 1303 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1304 b->j = bj; 1305 b->i = bi; 1306 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1307 b->diag = bdiag; 1308 b->ilen = 0; 1309 b->imax = 0; 1310 b->row = isrow; 1311 b->col = iscol; 1312 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1313 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1314 b->icol = isicol; 1315 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1316 /* In b structure: Free imax, ilen, old a, old j. 1317 Allocate bdiag, solve_work, new a, new j */ 1318 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1319 b->maxnz = b->nz = bi[n] ; 1320 (*fact)->factor = FACTOR_LU; 1321 (*fact)->info.factor_mallocs = reallocs; 1322 (*fact)->info.fill_ratio_given = f; 1323 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1324 1325 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1326 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1327 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #include "src/mat/impls/sbaij/seq/sbaij.h" 1332 #undef __FUNCT__ 1333 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1334 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1335 { 1336 Mat C = *B; 1337 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1338 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1339 IS ip=b->row,iip = b->icol; 1340 PetscErrorCode ierr; 1341 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1342 PetscInt *ai=a->i,*aj=a->j; 1343 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1344 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1345 PetscReal zeropivot,rs,shiftnz; 1346 PetscReal shiftpd; 1347 ChShift_Ctx sctx; 1348 PetscInt newshift; 1349 1350 PetscFunctionBegin; 1351 1352 shiftnz = info->shiftnz; 1353 shiftpd = info->shiftpd; 1354 zeropivot = info->zeropivot; 1355 1356 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1357 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1358 1359 /* initialization */ 1360 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1361 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1362 jl = il + mbs; 1363 rtmp = (MatScalar*)(jl + mbs); 1364 1365 sctx.shift_amount = 0; 1366 sctx.nshift = 0; 1367 do { 1368 sctx.chshift = PETSC_FALSE; 1369 for (i=0; i<mbs; i++) { 1370 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1371 } 1372 1373 for (k = 0; k<mbs; k++){ 1374 bval = ba + bi[k]; 1375 /* initialize k-th row by the perm[k]-th row of A */ 1376 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1377 for (j = jmin; j < jmax; j++){ 1378 col = riip[aj[j]]; 1379 if (col >= k){ /* only take upper triangular entry */ 1380 rtmp[col] = aa[j]; 1381 *bval++ = 0.0; /* for in-place factorization */ 1382 } 1383 } 1384 /* shift the diagonal of the matrix */ 1385 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1386 1387 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1388 dk = rtmp[k]; 1389 i = jl[k]; /* first row to be added to k_th row */ 1390 1391 while (i < k){ 1392 nexti = jl[i]; /* next row to be added to k_th row */ 1393 1394 /* compute multiplier, update diag(k) and U(i,k) */ 1395 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1396 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1397 dk += uikdi*ba[ili]; 1398 ba[ili] = uikdi; /* -U(i,k) */ 1399 1400 /* add multiple of row i to k-th row */ 1401 jmin = ili + 1; jmax = bi[i+1]; 1402 if (jmin < jmax){ 1403 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1404 /* update il and jl for row i */ 1405 il[i] = jmin; 1406 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1407 } 1408 i = nexti; 1409 } 1410 1411 /* shift the diagonals when zero pivot is detected */ 1412 /* compute rs=sum of abs(off-diagonal) */ 1413 rs = 0.0; 1414 jmin = bi[k]+1; 1415 nz = bi[k+1] - jmin; 1416 bcol = bj + jmin; 1417 while (nz--){ 1418 rs += PetscAbsScalar(rtmp[*bcol]); 1419 bcol++; 1420 } 1421 1422 sctx.rs = rs; 1423 sctx.pv = dk; 1424 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1425 1426 if (newshift == 1) { 1427 if (!sctx.shift_amount) { 1428 sctx.shift_amount = 1e-5; 1429 } 1430 break; 1431 } 1432 1433 /* copy data into U(k,:) */ 1434 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1435 jmin = bi[k]+1; jmax = bi[k+1]; 1436 if (jmin < jmax) { 1437 for (j=jmin; j<jmax; j++){ 1438 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1439 } 1440 /* add the k-th row into il and jl */ 1441 il[k] = jmin; 1442 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1443 } 1444 } 1445 } while (sctx.chshift); 1446 ierr = PetscFree(il);CHKERRQ(ierr); 1447 1448 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1449 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1450 C->factor = FACTOR_CHOLESKY; 1451 C->assembled = PETSC_TRUE; 1452 C->preallocated = PETSC_TRUE; 1453 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1454 if (sctx.nshift){ 1455 if (shiftnz) { 1456 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1457 } else if (shiftpd) { 1458 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1459 } 1460 } 1461 PetscFunctionReturn(0); 1462 } 1463 1464 #undef __FUNCT__ 1465 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1466 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1467 { 1468 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1469 Mat_SeqSBAIJ *b; 1470 PetscErrorCode ierr; 1471 PetscTruth perm_identity,missing; 1472 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1473 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1474 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1475 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1476 PetscReal fill=info->fill,levels=info->levels; 1477 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1478 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1479 PetscBT lnkbt; 1480 IS iperm; 1481 1482 PetscFunctionBegin; 1483 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1484 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1485 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1486 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1487 1488 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1489 ui[0] = 0; 1490 1491 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1492 if (!levels && perm_identity) { 1493 1494 for (i=0; i<am; i++) { 1495 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1496 } 1497 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1498 cols = uj; 1499 for (i=0; i<am; i++) { 1500 aj = a->j + a->diag[i]; 1501 ncols = ui[i+1] - ui[i]; 1502 for (j=0; j<ncols; j++) *cols++ = *aj++; 1503 } 1504 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1505 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1506 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1507 1508 /* initialization */ 1509 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1510 1511 /* jl: linked list for storing indices of the pivot rows 1512 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1513 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1514 il = jl + am; 1515 uj_ptr = (PetscInt**)(il + am); 1516 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1517 for (i=0; i<am; i++){ 1518 jl[i] = am; il[i] = 0; 1519 } 1520 1521 /* create and initialize a linked list for storing column indices of the active row k */ 1522 nlnk = am + 1; 1523 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1524 1525 /* initial FreeSpace size is fill*(ai[am]+1) */ 1526 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1527 current_space = free_space; 1528 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1529 current_space_lvl = free_space_lvl; 1530 1531 for (k=0; k<am; k++){ /* for each active row k */ 1532 /* initialize lnk by the column indices of row rip[k] of A */ 1533 nzk = 0; 1534 ncols = ai[rip[k]+1] - ai[rip[k]]; 1535 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1536 ncols_upper = 0; 1537 for (j=0; j<ncols; j++){ 1538 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1539 if (riip[i] >= k){ /* only take upper triangular entry */ 1540 ajtmp[ncols_upper] = i; 1541 ncols_upper++; 1542 } 1543 } 1544 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1545 nzk += nlnk; 1546 1547 /* update lnk by computing fill-in for each pivot row to be merged in */ 1548 prow = jl[k]; /* 1st pivot row */ 1549 1550 while (prow < k){ 1551 nextprow = jl[prow]; 1552 1553 /* merge prow into k-th row */ 1554 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1555 jmax = ui[prow+1]; 1556 ncols = jmax-jmin; 1557 i = jmin - ui[prow]; 1558 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1559 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1560 j = *(uj - 1); 1561 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1562 nzk += nlnk; 1563 1564 /* update il and jl for prow */ 1565 if (jmin < jmax){ 1566 il[prow] = jmin; 1567 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1568 } 1569 prow = nextprow; 1570 } 1571 1572 /* if free space is not available, make more free space */ 1573 if (current_space->local_remaining<nzk) { 1574 i = am - k + 1; /* num of unfactored rows */ 1575 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1576 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1577 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1578 reallocs++; 1579 } 1580 1581 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1582 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1583 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1584 1585 /* add the k-th row into il and jl */ 1586 if (nzk > 1){ 1587 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1588 jl[k] = jl[i]; jl[i] = k; 1589 il[k] = ui[k] + 1; 1590 } 1591 uj_ptr[k] = current_space->array; 1592 uj_lvl_ptr[k] = current_space_lvl->array; 1593 1594 current_space->array += nzk; 1595 current_space->local_used += nzk; 1596 current_space->local_remaining -= nzk; 1597 1598 current_space_lvl->array += nzk; 1599 current_space_lvl->local_used += nzk; 1600 current_space_lvl->local_remaining -= nzk; 1601 1602 ui[k+1] = ui[k] + nzk; 1603 } 1604 1605 #if defined(PETSC_USE_INFO) 1606 if (ai[am] != 0) { 1607 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1608 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1609 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1610 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1611 } else { 1612 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1613 } 1614 #endif 1615 1616 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1617 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1618 ierr = PetscFree(jl);CHKERRQ(ierr); 1619 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1620 1621 /* destroy list of free space and other temporary array(s) */ 1622 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1623 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1624 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1625 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1626 1627 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1628 1629 /* put together the new matrix in MATSEQSBAIJ format */ 1630 1631 b = (Mat_SeqSBAIJ*)(*fact)->data; 1632 b->singlemalloc = PETSC_FALSE; 1633 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1634 b->j = uj; 1635 b->i = ui; 1636 b->diag = 0; 1637 b->ilen = 0; 1638 b->imax = 0; 1639 b->row = perm; 1640 b->col = perm; 1641 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1642 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1643 b->icol = iperm; 1644 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1645 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1646 ierr = PetscLogObjectMemory((*fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1647 b->maxnz = b->nz = ui[am]; 1648 b->free_a = PETSC_TRUE; 1649 b->free_ij = PETSC_TRUE; 1650 1651 (*fact)->factor = FACTOR_CHOLESKY; 1652 (*fact)->info.factor_mallocs = reallocs; 1653 (*fact)->info.fill_ratio_given = fill; 1654 if (ai[am] != 0) { 1655 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1656 } else { 1657 (*fact)->info.fill_ratio_needed = 0.0; 1658 } 1659 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1660 if (perm_identity){ 1661 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1662 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1663 } 1664 PetscFunctionReturn(0); 1665 } 1666 1667 #undef __FUNCT__ 1668 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1669 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1670 { 1671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1672 Mat_SeqSBAIJ *b; 1673 PetscErrorCode ierr; 1674 PetscTruth perm_identity; 1675 PetscReal fill = info->fill; 1676 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1677 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1678 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1679 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1680 PetscBT lnkbt; 1681 IS iperm; 1682 1683 PetscFunctionBegin; 1684 /* check whether perm is the identity mapping */ 1685 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1686 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1687 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1688 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1689 1690 /* initialization */ 1691 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1692 ui[0] = 0; 1693 1694 /* jl: linked list for storing indices of the pivot rows 1695 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1696 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1697 il = jl + am; 1698 cols = il + am; 1699 ui_ptr = (PetscInt**)(cols + am); 1700 for (i=0; i<am; i++){ 1701 jl[i] = am; il[i] = 0; 1702 } 1703 1704 /* create and initialize a linked list for storing column indices of the active row k */ 1705 nlnk = am + 1; 1706 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1707 1708 /* initial FreeSpace size is fill*(ai[am]+1) */ 1709 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1710 current_space = free_space; 1711 1712 for (k=0; k<am; k++){ /* for each active row k */ 1713 /* initialize lnk by the column indices of row rip[k] of A */ 1714 nzk = 0; 1715 ncols = ai[rip[k]+1] - ai[rip[k]]; 1716 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1717 ncols_upper = 0; 1718 for (j=0; j<ncols; j++){ 1719 i = riip[*(aj + ai[rip[k]] + j)]; 1720 if (i >= k){ /* only take upper triangular entry */ 1721 cols[ncols_upper] = i; 1722 ncols_upper++; 1723 } 1724 } 1725 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1726 nzk += nlnk; 1727 1728 /* update lnk by computing fill-in for each pivot row to be merged in */ 1729 prow = jl[k]; /* 1st pivot row */ 1730 1731 while (prow < k){ 1732 nextprow = jl[prow]; 1733 /* merge prow into k-th row */ 1734 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1735 jmax = ui[prow+1]; 1736 ncols = jmax-jmin; 1737 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1738 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1739 nzk += nlnk; 1740 1741 /* update il and jl for prow */ 1742 if (jmin < jmax){ 1743 il[prow] = jmin; 1744 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1745 } 1746 prow = nextprow; 1747 } 1748 1749 /* if free space is not available, make more free space */ 1750 if (current_space->local_remaining<nzk) { 1751 i = am - k + 1; /* num of unfactored rows */ 1752 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1753 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1754 reallocs++; 1755 } 1756 1757 /* copy data into free space, then initialize lnk */ 1758 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1759 1760 /* add the k-th row into il and jl */ 1761 if (nzk-1 > 0){ 1762 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1763 jl[k] = jl[i]; jl[i] = k; 1764 il[k] = ui[k] + 1; 1765 } 1766 ui_ptr[k] = current_space->array; 1767 current_space->array += nzk; 1768 current_space->local_used += nzk; 1769 current_space->local_remaining -= nzk; 1770 1771 ui[k+1] = ui[k] + nzk; 1772 } 1773 1774 #if defined(PETSC_USE_INFO) 1775 if (ai[am] != 0) { 1776 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1777 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1778 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1779 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1780 } else { 1781 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1782 } 1783 #endif 1784 1785 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1786 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1787 ierr = PetscFree(jl);CHKERRQ(ierr); 1788 1789 /* destroy list of free space and other temporary array(s) */ 1790 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1791 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1792 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1793 1794 /* put together the new matrix in MATSEQSBAIJ format */ 1795 1796 b = (Mat_SeqSBAIJ*)(*fact)->data; 1797 b->singlemalloc = PETSC_FALSE; 1798 b->free_a = PETSC_TRUE; 1799 b->free_ij = PETSC_TRUE; 1800 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1801 b->j = uj; 1802 b->i = ui; 1803 b->diag = 0; 1804 b->ilen = 0; 1805 b->imax = 0; 1806 b->row = perm; 1807 b->col = perm; 1808 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1809 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1810 b->icol = iperm; 1811 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1812 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1813 ierr = PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1814 b->maxnz = b->nz = ui[am]; 1815 1816 (*fact)->factor = FACTOR_CHOLESKY; 1817 (*fact)->info.factor_mallocs = reallocs; 1818 (*fact)->info.fill_ratio_given = fill; 1819 if (ai[am] != 0) { 1820 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1821 } else { 1822 (*fact)->info.fill_ratio_needed = 0.0; 1823 } 1824 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1825 if (perm_identity){ 1826 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1827 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1828 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1829 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1830 } else { 1831 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1; 1832 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1833 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1834 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1835 } 1836 PetscFunctionReturn(0); 1837 } 1838