1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = MAT_FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 260 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 261 { 262 PetscInt n = A->rmap.n; 263 PetscErrorCode ierr; 264 265 PetscFunctionBegin; 266 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 267 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 268 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 269 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 270 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 271 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 272 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 273 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 274 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 275 (*B)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 276 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 282 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 283 { 284 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 285 IS isicol; 286 PetscErrorCode ierr; 287 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 288 PetscInt *bi,*bj,*ajtmp; 289 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 290 PetscReal f; 291 PetscInt nlnk,*lnk,k,**bi_ptr; 292 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 293 PetscBT lnkbt; 294 295 PetscFunctionBegin; 296 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 297 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 298 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 299 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 300 301 /* get new row pointers */ 302 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 303 bi[0] = 0; 304 305 /* bdiag is location of diagonal in factor */ 306 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 307 bdiag[0] = 0; 308 309 /* linked list for storing column indices of the active row */ 310 nlnk = n + 1; 311 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 312 313 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 314 315 /* initial FreeSpace size is f*(ai[n]+1) */ 316 f = info->fill; 317 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 318 current_space = free_space; 319 320 for (i=0; i<n; i++) { 321 /* copy previous fill into linked list */ 322 nzi = 0; 323 nnz = ai[r[i]+1] - ai[r[i]]; 324 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 325 ajtmp = aj + ai[r[i]]; 326 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 327 nzi += nlnk; 328 329 /* add pivot rows into linked list */ 330 row = lnk[n]; 331 while (row < i) { 332 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 333 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 334 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 335 nzi += nlnk; 336 row = lnk[row]; 337 } 338 bi[i+1] = bi[i] + nzi; 339 im[i] = nzi; 340 341 /* mark bdiag */ 342 nzbd = 0; 343 nnz = nzi; 344 k = lnk[n]; 345 while (nnz-- && k < i){ 346 nzbd++; 347 k = lnk[k]; 348 } 349 bdiag[i] = bi[i] + nzbd; 350 351 /* if free space is not available, make more free space */ 352 if (current_space->local_remaining<nzi) { 353 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 354 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 355 reallocs++; 356 } 357 358 /* copy data into free space, then initialize lnk */ 359 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 360 bi_ptr[i] = current_space->array; 361 current_space->array += nzi; 362 current_space->local_used += nzi; 363 current_space->local_remaining -= nzi; 364 } 365 #if defined(PETSC_USE_INFO) 366 if (ai[n] != 0) { 367 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 368 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 369 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 370 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 371 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 372 } else { 373 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 374 } 375 #endif 376 377 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 378 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 379 380 /* destroy list of free space and other temporary array(s) */ 381 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 382 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 383 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 384 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 385 386 /* put together the new matrix */ 387 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 388 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 389 b = (Mat_SeqAIJ*)(*B)->data; 390 b->free_a = PETSC_TRUE; 391 b->free_ij = PETSC_TRUE; 392 b->singlemalloc = PETSC_FALSE; 393 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 394 b->j = bj; 395 b->i = bi; 396 b->diag = bdiag; 397 b->ilen = 0; 398 b->imax = 0; 399 b->row = isrow; 400 b->col = iscol; 401 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 402 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 403 b->icol = isicol; 404 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 405 406 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 407 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 408 b->maxnz = b->nz = bi[n] ; 409 410 (*B)->factor = MAT_FACTOR_LU; 411 (*B)->info.factor_mallocs = reallocs; 412 (*B)->info.fill_ratio_given = f; 413 414 if (ai[n] != 0) { 415 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 416 } else { 417 (*B)->info.fill_ratio_needed = 0.0; 418 } 419 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 420 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 421 PetscFunctionReturn(0); 422 } 423 424 /* 425 Trouble in factorization, should we dump the original matrix? 426 */ 427 #undef __FUNCT__ 428 #define __FUNCT__ "MatFactorDumpMatrix" 429 PetscErrorCode MatFactorDumpMatrix(Mat A) 430 { 431 PetscErrorCode ierr; 432 PetscTruth flg; 433 434 PetscFunctionBegin; 435 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 436 if (flg) { 437 PetscViewer viewer; 438 char filename[PETSC_MAX_PATH_LEN]; 439 440 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 441 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 442 ierr = MatView(A,viewer);CHKERRQ(ierr); 443 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 444 } 445 PetscFunctionReturn(0); 446 } 447 448 /* ----------------------------------------------------------- */ 449 #undef __FUNCT__ 450 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 451 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 452 { 453 Mat C=*B; 454 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 455 IS isrow = b->row,isicol = b->icol; 456 PetscErrorCode ierr; 457 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 458 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 459 PetscInt *diag_offset = b->diag,diag,*pj; 460 PetscScalar *rtmp,*pc,multiplier,*rtmps; 461 MatScalar *v,*pv; 462 PetscScalar d; 463 PetscReal rs; 464 LUShift_Ctx sctx; 465 PetscInt newshift,*ddiag; 466 467 PetscFunctionBegin; 468 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 469 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 470 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 471 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 472 rtmps = rtmp; ics = ic; 473 474 sctx.shift_top = 0; 475 sctx.nshift_max = 0; 476 sctx.shift_lo = 0; 477 sctx.shift_hi = 0; 478 479 /* if both shift schemes are chosen by user, only use info->shiftpd */ 480 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 481 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 482 PetscInt *aai = a->i; 483 ddiag = a->diag; 484 sctx.shift_top = 0; 485 for (i=0; i<n; i++) { 486 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 487 d = (a->a)[ddiag[i]]; 488 rs = -PetscAbsScalar(d) - PetscRealPart(d); 489 v = a->a+aai[i]; 490 nz = aai[i+1] - aai[i]; 491 for (j=0; j<nz; j++) 492 rs += PetscAbsScalar(v[j]); 493 if (rs>sctx.shift_top) sctx.shift_top = rs; 494 } 495 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 496 sctx.shift_top *= 1.1; 497 sctx.nshift_max = 5; 498 sctx.shift_lo = 0.; 499 sctx.shift_hi = 1.; 500 } 501 502 sctx.shift_amount = 0; 503 sctx.nshift = 0; 504 do { 505 sctx.lushift = PETSC_FALSE; 506 for (i=0; i<n; i++){ 507 nz = bi[i+1] - bi[i]; 508 bjtmp = bj + bi[i]; 509 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 510 511 /* load in initial (unfactored row) */ 512 nz = a->i[r[i]+1] - a->i[r[i]]; 513 ajtmp = a->j + a->i[r[i]]; 514 v = a->a + a->i[r[i]]; 515 for (j=0; j<nz; j++) { 516 rtmp[ics[ajtmp[j]]] = v[j]; 517 } 518 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 519 520 row = *bjtmp++; 521 while (row < i) { 522 pc = rtmp + row; 523 if (*pc != 0.0) { 524 pv = b->a + diag_offset[row]; 525 pj = b->j + diag_offset[row] + 1; 526 multiplier = *pc / *pv++; 527 *pc = multiplier; 528 nz = bi[row+1] - diag_offset[row] - 1; 529 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 530 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 531 } 532 row = *bjtmp++; 533 } 534 /* finished row so stick it into b->a */ 535 pv = b->a + bi[i] ; 536 pj = b->j + bi[i] ; 537 nz = bi[i+1] - bi[i]; 538 diag = diag_offset[i] - bi[i]; 539 rs = 0.0; 540 for (j=0; j<nz; j++) { 541 pv[j] = rtmps[pj[j]]; 542 if (j != diag) rs += PetscAbsScalar(pv[j]); 543 } 544 545 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 546 sctx.rs = rs; 547 sctx.pv = pv[diag]; 548 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 549 if (newshift == 1) break; 550 } 551 552 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 553 /* 554 * if no shift in this attempt & shifting & started shifting & can refine, 555 * then try lower shift 556 */ 557 sctx.shift_hi = info->shift_fraction; 558 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 559 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 560 sctx.lushift = PETSC_TRUE; 561 sctx.nshift++; 562 } 563 } while (sctx.lushift); 564 565 /* invert diagonal entries for simplier triangular solves */ 566 for (i=0; i<n; i++) { 567 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 568 } 569 ierr = PetscFree(rtmp);CHKERRQ(ierr); 570 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 571 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 572 C->factor = MAT_FACTOR_LU; 573 C->assembled = PETSC_TRUE; 574 C->preallocated = PETSC_TRUE; 575 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 576 if (sctx.nshift){ 577 if (info->shiftnz) { 578 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 579 } else if (info->shiftpd) { 580 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 581 } 582 } 583 PetscFunctionReturn(0); 584 } 585 586 /* 587 This routine implements inplace ILU(0) with row or/and column permutations. 588 Input: 589 A - original matrix 590 Output; 591 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 592 a->j (col index) is permuted by the inverse of colperm, then sorted 593 a->a reordered accordingly with a->j 594 a->diag (ptr to diagonal elements) is updated. 595 */ 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 598 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 599 { 600 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 601 IS isrow = a->row,isicol = a->icol; 602 PetscErrorCode ierr; 603 PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j; 604 PetscInt *ajtmp,nz,row,*ics; 605 PetscInt *diag = a->diag,nbdiag,*pj; 606 PetscScalar *rtmp,*pc,multiplier,d; 607 MatScalar *v,*pv; 608 PetscReal rs; 609 LUShift_Ctx sctx; 610 PetscInt newshift; 611 612 PetscFunctionBegin; 613 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 614 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 615 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 616 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 617 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 618 ics = ic; 619 620 sctx.shift_top = 0; 621 sctx.nshift_max = 0; 622 sctx.shift_lo = 0; 623 sctx.shift_hi = 0; 624 625 /* if both shift schemes are chosen by user, only use info->shiftpd */ 626 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 627 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 628 sctx.shift_top = 0; 629 for (i=0; i<n; i++) { 630 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 631 d = (a->a)[diag[i]]; 632 rs = -PetscAbsScalar(d) - PetscRealPart(d); 633 v = a->a+ai[i]; 634 nz = ai[i+1] - ai[i]; 635 for (j=0; j<nz; j++) 636 rs += PetscAbsScalar(v[j]); 637 if (rs>sctx.shift_top) sctx.shift_top = rs; 638 } 639 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 640 sctx.shift_top *= 1.1; 641 sctx.nshift_max = 5; 642 sctx.shift_lo = 0.; 643 sctx.shift_hi = 1.; 644 } 645 646 sctx.shift_amount = 0; 647 sctx.nshift = 0; 648 do { 649 sctx.lushift = PETSC_FALSE; 650 for (i=0; i<n; i++){ 651 /* load in initial unfactored row */ 652 nz = ai[r[i]+1] - ai[r[i]]; 653 ajtmp = aj + ai[r[i]]; 654 v = a->a + ai[r[i]]; 655 /* sort permuted ajtmp and values v accordingly */ 656 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 657 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 658 659 diag[r[i]] = ai[r[i]]; 660 for (j=0; j<nz; j++) { 661 rtmp[ajtmp[j]] = v[j]; 662 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 663 } 664 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 665 666 row = *ajtmp++; 667 while (row < i) { 668 pc = rtmp + row; 669 if (*pc != 0.0) { 670 pv = a->a + diag[r[row]]; 671 pj = aj + diag[r[row]] + 1; 672 673 multiplier = *pc / *pv++; 674 *pc = multiplier; 675 nz = ai[r[row]+1] - diag[r[row]] - 1; 676 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 677 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 678 } 679 row = *ajtmp++; 680 } 681 /* finished row so overwrite it onto a->a */ 682 pv = a->a + ai[r[i]] ; 683 pj = aj + ai[r[i]] ; 684 nz = ai[r[i]+1] - ai[r[i]]; 685 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 686 687 rs = 0.0; 688 for (j=0; j<nz; j++) { 689 pv[j] = rtmp[pj[j]]; 690 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 691 } 692 693 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 694 sctx.rs = rs; 695 sctx.pv = pv[nbdiag]; 696 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 697 if (newshift == 1) break; 698 } 699 700 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 701 /* 702 * if no shift in this attempt & shifting & started shifting & can refine, 703 * then try lower shift 704 */ 705 sctx.shift_hi = info->shift_fraction; 706 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 707 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 708 sctx.lushift = PETSC_TRUE; 709 sctx.nshift++; 710 } 711 } while (sctx.lushift); 712 713 /* invert diagonal entries for simplier triangular solves */ 714 for (i=0; i<n; i++) { 715 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 716 } 717 718 ierr = PetscFree(rtmp);CHKERRQ(ierr); 719 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 720 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 721 A->factor = MAT_FACTOR_LU; 722 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 723 A->assembled = PETSC_TRUE; 724 A->preallocated = PETSC_TRUE; 725 ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr); 726 if (sctx.nshift){ 727 if (info->shiftnz) { 728 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 729 } else if (info->shiftpd) { 730 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 731 } 732 } 733 PetscFunctionReturn(0); 734 } 735 736 /* ----------------------------------------------------------- */ 737 #undef __FUNCT__ 738 #define __FUNCT__ "MatLUFactor_SeqAIJ" 739 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 740 { 741 PetscErrorCode ierr; 742 Mat C; 743 744 PetscFunctionBegin; 745 ierr = MatGetFactor(A,"petsc",MAT_FACTOR_LU,&C);CHKERRQ(ierr); 746 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 747 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 748 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 749 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 750 PetscFunctionReturn(0); 751 } 752 /* ----------------------------------------------------------- */ 753 #undef __FUNCT__ 754 #define __FUNCT__ "MatSolve_SeqAIJ" 755 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 756 { 757 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 758 IS iscol = a->col,isrow = a->row; 759 PetscErrorCode ierr; 760 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 761 PetscInt nz,*rout,*cout; 762 PetscScalar *x,*tmp,*tmps,sum; 763 const PetscScalar *b; 764 const MatScalar *aa = a->a,*v; 765 766 PetscFunctionBegin; 767 if (!n) PetscFunctionReturn(0); 768 769 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 770 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 771 tmp = a->solve_work; 772 773 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 774 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 775 776 /* forward solve the lower triangular */ 777 tmp[0] = b[*r++]; 778 tmps = tmp; 779 for (i=1; i<n; i++) { 780 v = aa + ai[i] ; 781 vi = aj + ai[i] ; 782 nz = a->diag[i] - ai[i]; 783 sum = b[*r++]; 784 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 785 tmp[i] = sum; 786 } 787 788 /* backward solve the upper triangular */ 789 for (i=n-1; i>=0; i--){ 790 v = aa + a->diag[i] + 1; 791 vi = aj + a->diag[i] + 1; 792 nz = ai[i+1] - a->diag[i] - 1; 793 sum = tmp[i]; 794 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 795 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 796 } 797 798 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 799 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 800 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 801 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 802 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 803 PetscFunctionReturn(0); 804 } 805 806 #undef __FUNCT__ 807 #define __FUNCT__ "MatMatSolve_SeqAIJ" 808 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 809 { 810 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 811 IS iscol = a->col,isrow = a->row; 812 PetscErrorCode ierr; 813 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 814 PetscInt nz,*rout,*cout,neq; 815 PetscScalar *x,*b,*tmp,*tmps,sum; 816 const MatScalar *aa = a->a,*v; 817 818 PetscFunctionBegin; 819 if (!n) PetscFunctionReturn(0); 820 821 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 822 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 823 824 tmp = a->solve_work; 825 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 826 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 827 828 for (neq=0; neq<n; neq++){ 829 /* forward solve the lower triangular */ 830 tmp[0] = b[r[0]]; 831 tmps = tmp; 832 for (i=1; i<n; i++) { 833 v = aa + ai[i] ; 834 vi = aj + ai[i] ; 835 nz = a->diag[i] - ai[i]; 836 sum = b[r[i]]; 837 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 838 tmp[i] = sum; 839 } 840 /* backward solve the upper triangular */ 841 for (i=n-1; i>=0; i--){ 842 v = aa + a->diag[i] + 1; 843 vi = aj + a->diag[i] + 1; 844 nz = ai[i+1] - a->diag[i] - 1; 845 sum = tmp[i]; 846 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 847 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 848 } 849 850 b += n; 851 x += n; 852 } 853 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 854 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 855 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 856 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 857 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 858 PetscFunctionReturn(0); 859 } 860 861 #undef __FUNCT__ 862 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 863 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 864 { 865 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 866 IS iscol = a->col,isrow = a->row; 867 PetscErrorCode ierr; 868 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 869 PetscInt nz,*rout,*cout,row; 870 PetscScalar *x,*b,*tmp,*tmps,sum; 871 const MatScalar *aa = a->a,*v; 872 873 PetscFunctionBegin; 874 if (!n) PetscFunctionReturn(0); 875 876 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 877 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 878 tmp = a->solve_work; 879 880 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 881 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 882 883 /* forward solve the lower triangular */ 884 tmp[0] = b[*r++]; 885 tmps = tmp; 886 for (row=1; row<n; row++) { 887 i = rout[row]; /* permuted row */ 888 v = aa + ai[i] ; 889 vi = aj + ai[i] ; 890 nz = a->diag[i] - ai[i]; 891 sum = b[*r++]; 892 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 893 tmp[row] = sum; 894 } 895 896 /* backward solve the upper triangular */ 897 for (row=n-1; row>=0; row--){ 898 i = rout[row]; /* permuted row */ 899 v = aa + a->diag[i] + 1; 900 vi = aj + a->diag[i] + 1; 901 nz = ai[i+1] - a->diag[i] - 1; 902 sum = tmp[row]; 903 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 904 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 905 } 906 907 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 908 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 909 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 910 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 911 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914 915 /* ----------------------------------------------------------- */ 916 #undef __FUNCT__ 917 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 918 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 919 { 920 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 921 PetscErrorCode ierr; 922 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 923 PetscScalar *x,*b; 924 const MatScalar *aa = a->a; 925 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 926 PetscInt adiag_i,i,*vi,nz,ai_i; 927 const MatScalar *v; 928 PetscScalar sum; 929 #endif 930 931 PetscFunctionBegin; 932 if (!n) PetscFunctionReturn(0); 933 934 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 935 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 936 937 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 938 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 939 #else 940 /* forward solve the lower triangular */ 941 x[0] = b[0]; 942 for (i=1; i<n; i++) { 943 ai_i = ai[i]; 944 v = aa + ai_i; 945 vi = aj + ai_i; 946 nz = adiag[i] - ai_i; 947 sum = b[i]; 948 while (nz--) sum -= *v++ * x[*vi++]; 949 x[i] = sum; 950 } 951 952 /* backward solve the upper triangular */ 953 for (i=n-1; i>=0; i--){ 954 adiag_i = adiag[i]; 955 v = aa + adiag_i + 1; 956 vi = aj + adiag_i + 1; 957 nz = ai[i+1] - adiag_i - 1; 958 sum = x[i]; 959 while (nz--) sum -= *v++ * x[*vi++]; 960 x[i] = sum*aa[adiag_i]; 961 } 962 #endif 963 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 964 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 965 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 971 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 972 { 973 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 974 IS iscol = a->col,isrow = a->row; 975 PetscErrorCode ierr; 976 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 977 PetscInt nz,*rout,*cout; 978 PetscScalar *x,*b,*tmp,sum; 979 const MatScalar *aa = a->a,*v; 980 981 PetscFunctionBegin; 982 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 983 984 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 985 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 986 tmp = a->solve_work; 987 988 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 989 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 990 991 /* forward solve the lower triangular */ 992 tmp[0] = b[*r++]; 993 for (i=1; i<n; i++) { 994 v = aa + ai[i] ; 995 vi = aj + ai[i] ; 996 nz = a->diag[i] - ai[i]; 997 sum = b[*r++]; 998 while (nz--) sum -= *v++ * tmp[*vi++ ]; 999 tmp[i] = sum; 1000 } 1001 1002 /* backward solve the upper triangular */ 1003 for (i=n-1; i>=0; i--){ 1004 v = aa + a->diag[i] + 1; 1005 vi = aj + a->diag[i] + 1; 1006 nz = ai[i+1] - a->diag[i] - 1; 1007 sum = tmp[i]; 1008 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1009 tmp[i] = sum*aa[a->diag[i]]; 1010 x[*c--] += tmp[i]; 1011 } 1012 1013 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1014 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1015 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1016 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1017 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1018 1019 PetscFunctionReturn(0); 1020 } 1021 /* -------------------------------------------------------------------*/ 1022 #undef __FUNCT__ 1023 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1024 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1025 { 1026 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1027 IS iscol = a->col,isrow = a->row; 1028 PetscErrorCode ierr; 1029 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1030 PetscInt nz,*rout,*cout,*diag = a->diag; 1031 PetscScalar *x,*b,*tmp,s1; 1032 const MatScalar *aa = a->a,*v; 1033 1034 PetscFunctionBegin; 1035 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1036 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1037 tmp = a->solve_work; 1038 1039 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1040 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1041 1042 /* copy the b into temp work space according to permutation */ 1043 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1044 1045 /* forward solve the U^T */ 1046 for (i=0; i<n; i++) { 1047 v = aa + diag[i] ; 1048 vi = aj + diag[i] + 1; 1049 nz = ai[i+1] - diag[i] - 1; 1050 s1 = tmp[i]; 1051 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1052 while (nz--) { 1053 tmp[*vi++ ] -= (*v++)*s1; 1054 } 1055 tmp[i] = s1; 1056 } 1057 1058 /* backward solve the L^T */ 1059 for (i=n-1; i>=0; i--){ 1060 v = aa + diag[i] - 1 ; 1061 vi = aj + diag[i] - 1 ; 1062 nz = diag[i] - ai[i]; 1063 s1 = tmp[i]; 1064 while (nz--) { 1065 tmp[*vi-- ] -= (*v--)*s1; 1066 } 1067 } 1068 1069 /* copy tmp into x according to permutation */ 1070 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1071 1072 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1073 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1074 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1075 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1076 1077 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 1078 PetscFunctionReturn(0); 1079 } 1080 1081 #undef __FUNCT__ 1082 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1083 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1084 { 1085 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1086 IS iscol = a->col,isrow = a->row; 1087 PetscErrorCode ierr; 1088 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1089 PetscInt nz,*rout,*cout,*diag = a->diag; 1090 PetscScalar *x,*b,*tmp; 1091 const MatScalar *aa = a->a,*v; 1092 1093 PetscFunctionBegin; 1094 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1095 1096 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1097 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1098 tmp = a->solve_work; 1099 1100 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1101 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1102 1103 /* copy the b into temp work space according to permutation */ 1104 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1105 1106 /* forward solve the U^T */ 1107 for (i=0; i<n; i++) { 1108 v = aa + diag[i] ; 1109 vi = aj + diag[i] + 1; 1110 nz = ai[i+1] - diag[i] - 1; 1111 tmp[i] *= *v++; 1112 while (nz--) { 1113 tmp[*vi++ ] -= (*v++)*tmp[i]; 1114 } 1115 } 1116 1117 /* backward solve the L^T */ 1118 for (i=n-1; i>=0; i--){ 1119 v = aa + diag[i] - 1 ; 1120 vi = aj + diag[i] - 1 ; 1121 nz = diag[i] - ai[i]; 1122 while (nz--) { 1123 tmp[*vi-- ] -= (*v--)*tmp[i]; 1124 } 1125 } 1126 1127 /* copy tmp into x according to permutation */ 1128 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1129 1130 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1131 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1132 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1133 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1134 1135 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1136 PetscFunctionReturn(0); 1137 } 1138 /* ----------------------------------------------------------------*/ 1139 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1140 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,MatDuplicateOption,Mat*); 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1144 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1145 { 1146 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1147 IS isicol; 1148 PetscErrorCode ierr; 1149 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 1150 PetscInt *bi,*cols,nnz,*cols_lvl; 1151 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1152 PetscInt i,levels,diagonal_fill; 1153 PetscTruth col_identity,row_identity; 1154 PetscReal f; 1155 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1156 PetscBT lnkbt; 1157 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1158 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1159 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1160 PetscTruth missing; 1161 1162 PetscFunctionBegin; 1163 f = info->fill; 1164 levels = (PetscInt)info->levels; 1165 diagonal_fill = (PetscInt)info->diagonal_fill; 1166 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1167 1168 /* special case that simply copies fill pattern */ 1169 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1170 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1171 if (!levels && row_identity && col_identity) { 1172 ierr = MatDuplicateNoCreate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1173 (*fact)->factor = MAT_FACTOR_LU; 1174 (*fact)->info.factor_mallocs = 0; 1175 (*fact)->info.fill_ratio_given = info->fill; 1176 (*fact)->info.fill_ratio_needed = 1.0; 1177 b = (Mat_SeqAIJ*)(*fact)->data; 1178 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1179 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1180 b->row = isrow; 1181 b->col = iscol; 1182 b->icol = isicol; 1183 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1184 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1185 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1186 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1187 ierr = Mat_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1192 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1193 1194 /* get new row pointers */ 1195 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1196 bi[0] = 0; 1197 /* bdiag is location of diagonal in factor */ 1198 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1199 bdiag[0] = 0; 1200 1201 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1202 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1203 1204 /* create a linked list for storing column indices of the active row */ 1205 nlnk = n + 1; 1206 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1207 1208 /* initial FreeSpace size is f*(ai[n]+1) */ 1209 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1210 current_space = free_space; 1211 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1212 current_space_lvl = free_space_lvl; 1213 1214 for (i=0; i<n; i++) { 1215 nzi = 0; 1216 /* copy current row into linked list */ 1217 nnz = ai[r[i]+1] - ai[r[i]]; 1218 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1219 cols = aj + ai[r[i]]; 1220 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1221 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1222 nzi += nlnk; 1223 1224 /* make sure diagonal entry is included */ 1225 if (diagonal_fill && lnk[i] == -1) { 1226 fm = n; 1227 while (lnk[fm] < i) fm = lnk[fm]; 1228 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1229 lnk[fm] = i; 1230 lnk_lvl[i] = 0; 1231 nzi++; dcount++; 1232 } 1233 1234 /* add pivot rows into the active row */ 1235 nzbd = 0; 1236 prow = lnk[n]; 1237 while (prow < i) { 1238 nnz = bdiag[prow]; 1239 cols = bj_ptr[prow] + nnz + 1; 1240 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1241 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1242 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1243 nzi += nlnk; 1244 prow = lnk[prow]; 1245 nzbd++; 1246 } 1247 bdiag[i] = nzbd; 1248 bi[i+1] = bi[i] + nzi; 1249 1250 /* if free space is not available, make more free space */ 1251 if (current_space->local_remaining<nzi) { 1252 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1253 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1254 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1255 reallocs++; 1256 } 1257 1258 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1259 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1260 bj_ptr[i] = current_space->array; 1261 bjlvl_ptr[i] = current_space_lvl->array; 1262 1263 /* make sure the active row i has diagonal entry */ 1264 if (*(bj_ptr[i]+bdiag[i]) != i) { 1265 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1266 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1267 } 1268 1269 current_space->array += nzi; 1270 current_space->local_used += nzi; 1271 current_space->local_remaining -= nzi; 1272 current_space_lvl->array += nzi; 1273 current_space_lvl->local_used += nzi; 1274 current_space_lvl->local_remaining -= nzi; 1275 } 1276 1277 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1278 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1279 1280 /* destroy list of free space and other temporary arrays */ 1281 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1282 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1283 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1284 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1285 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1286 1287 #if defined(PETSC_USE_INFO) 1288 { 1289 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1290 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1291 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1292 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1293 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1294 if (diagonal_fill) { 1295 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1296 } 1297 } 1298 #endif 1299 1300 /* put together the new matrix */ 1301 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1302 b = (Mat_SeqAIJ*)(*fact)->data; 1303 b->free_a = PETSC_TRUE; 1304 b->free_ij = PETSC_TRUE; 1305 b->singlemalloc = PETSC_FALSE; 1306 len = (bi[n] )*sizeof(PetscScalar); 1307 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1308 b->j = bj; 1309 b->i = bi; 1310 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1311 b->diag = bdiag; 1312 b->ilen = 0; 1313 b->imax = 0; 1314 b->row = isrow; 1315 b->col = iscol; 1316 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1317 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1318 b->icol = isicol; 1319 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1320 /* In b structure: Free imax, ilen, old a, old j. 1321 Allocate bdiag, solve_work, new a, new j */ 1322 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1323 b->maxnz = b->nz = bi[n] ; 1324 (*fact)->factor = MAT_FACTOR_LU; 1325 (*fact)->info.factor_mallocs = reallocs; 1326 (*fact)->info.fill_ratio_given = f; 1327 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1328 1329 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1330 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1331 1332 PetscFunctionReturn(0); 1333 } 1334 1335 #include "src/mat/impls/sbaij/seq/sbaij.h" 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1338 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1339 { 1340 Mat C = *B; 1341 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1342 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1343 IS ip=b->row,iip = b->icol; 1344 PetscErrorCode ierr; 1345 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1346 PetscInt *ai=a->i,*aj=a->j; 1347 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1348 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1349 PetscReal zeropivot,rs,shiftnz; 1350 PetscReal shiftpd; 1351 ChShift_Ctx sctx; 1352 PetscInt newshift; 1353 1354 PetscFunctionBegin; 1355 1356 shiftnz = info->shiftnz; 1357 shiftpd = info->shiftpd; 1358 zeropivot = info->zeropivot; 1359 1360 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1361 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1362 1363 /* initialization */ 1364 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1365 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1366 jl = il + mbs; 1367 rtmp = (MatScalar*)(jl + mbs); 1368 1369 sctx.shift_amount = 0; 1370 sctx.nshift = 0; 1371 do { 1372 sctx.chshift = PETSC_FALSE; 1373 for (i=0; i<mbs; i++) { 1374 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1375 } 1376 1377 for (k = 0; k<mbs; k++){ 1378 bval = ba + bi[k]; 1379 /* initialize k-th row by the perm[k]-th row of A */ 1380 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1381 for (j = jmin; j < jmax; j++){ 1382 col = riip[aj[j]]; 1383 if (col >= k){ /* only take upper triangular entry */ 1384 rtmp[col] = aa[j]; 1385 *bval++ = 0.0; /* for in-place factorization */ 1386 } 1387 } 1388 /* shift the diagonal of the matrix */ 1389 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1390 1391 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1392 dk = rtmp[k]; 1393 i = jl[k]; /* first row to be added to k_th row */ 1394 1395 while (i < k){ 1396 nexti = jl[i]; /* next row to be added to k_th row */ 1397 1398 /* compute multiplier, update diag(k) and U(i,k) */ 1399 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1400 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1401 dk += uikdi*ba[ili]; 1402 ba[ili] = uikdi; /* -U(i,k) */ 1403 1404 /* add multiple of row i to k-th row */ 1405 jmin = ili + 1; jmax = bi[i+1]; 1406 if (jmin < jmax){ 1407 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1408 /* update il and jl for row i */ 1409 il[i] = jmin; 1410 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1411 } 1412 i = nexti; 1413 } 1414 1415 /* shift the diagonals when zero pivot is detected */ 1416 /* compute rs=sum of abs(off-diagonal) */ 1417 rs = 0.0; 1418 jmin = bi[k]+1; 1419 nz = bi[k+1] - jmin; 1420 bcol = bj + jmin; 1421 while (nz--){ 1422 rs += PetscAbsScalar(rtmp[*bcol]); 1423 bcol++; 1424 } 1425 1426 sctx.rs = rs; 1427 sctx.pv = dk; 1428 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1429 1430 if (newshift == 1) { 1431 if (!sctx.shift_amount) { 1432 sctx.shift_amount = 1e-5; 1433 } 1434 break; 1435 } 1436 1437 /* copy data into U(k,:) */ 1438 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1439 jmin = bi[k]+1; jmax = bi[k+1]; 1440 if (jmin < jmax) { 1441 for (j=jmin; j<jmax; j++){ 1442 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1443 } 1444 /* add the k-th row into il and jl */ 1445 il[k] = jmin; 1446 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1447 } 1448 } 1449 } while (sctx.chshift); 1450 ierr = PetscFree(il);CHKERRQ(ierr); 1451 1452 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1453 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1454 C->factor = MAT_FACTOR_CHOLESKY; 1455 C->assembled = PETSC_TRUE; 1456 C->preallocated = PETSC_TRUE; 1457 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1458 if (sctx.nshift){ 1459 if (shiftnz) { 1460 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1461 } else if (shiftpd) { 1462 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1463 } 1464 } 1465 PetscFunctionReturn(0); 1466 } 1467 1468 #undef __FUNCT__ 1469 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1470 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1471 { 1472 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1473 Mat_SeqSBAIJ *b; 1474 PetscErrorCode ierr; 1475 PetscTruth perm_identity,missing; 1476 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1477 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1478 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1479 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1480 PetscReal fill=info->fill,levels=info->levels; 1481 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1482 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1483 PetscBT lnkbt; 1484 IS iperm; 1485 1486 PetscFunctionBegin; 1487 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1488 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1489 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1490 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1491 1492 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1493 ui[0] = 0; 1494 1495 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1496 if (!levels && perm_identity) { 1497 1498 for (i=0; i<am; i++) { 1499 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1500 } 1501 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1502 cols = uj; 1503 for (i=0; i<am; i++) { 1504 aj = a->j + a->diag[i]; 1505 ncols = ui[i+1] - ui[i]; 1506 for (j=0; j<ncols; j++) *cols++ = *aj++; 1507 } 1508 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1509 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1510 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1511 1512 /* initialization */ 1513 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1514 1515 /* jl: linked list for storing indices of the pivot rows 1516 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1517 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1518 il = jl + am; 1519 uj_ptr = (PetscInt**)(il + am); 1520 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1521 for (i=0; i<am; i++){ 1522 jl[i] = am; il[i] = 0; 1523 } 1524 1525 /* create and initialize a linked list for storing column indices of the active row k */ 1526 nlnk = am + 1; 1527 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1528 1529 /* initial FreeSpace size is fill*(ai[am]+1) */ 1530 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1531 current_space = free_space; 1532 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1533 current_space_lvl = free_space_lvl; 1534 1535 for (k=0; k<am; k++){ /* for each active row k */ 1536 /* initialize lnk by the column indices of row rip[k] of A */ 1537 nzk = 0; 1538 ncols = ai[rip[k]+1] - ai[rip[k]]; 1539 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1540 ncols_upper = 0; 1541 for (j=0; j<ncols; j++){ 1542 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1543 if (riip[i] >= k){ /* only take upper triangular entry */ 1544 ajtmp[ncols_upper] = i; 1545 ncols_upper++; 1546 } 1547 } 1548 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1549 nzk += nlnk; 1550 1551 /* update lnk by computing fill-in for each pivot row to be merged in */ 1552 prow = jl[k]; /* 1st pivot row */ 1553 1554 while (prow < k){ 1555 nextprow = jl[prow]; 1556 1557 /* merge prow into k-th row */ 1558 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1559 jmax = ui[prow+1]; 1560 ncols = jmax-jmin; 1561 i = jmin - ui[prow]; 1562 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1563 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1564 j = *(uj - 1); 1565 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1566 nzk += nlnk; 1567 1568 /* update il and jl for prow */ 1569 if (jmin < jmax){ 1570 il[prow] = jmin; 1571 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1572 } 1573 prow = nextprow; 1574 } 1575 1576 /* if free space is not available, make more free space */ 1577 if (current_space->local_remaining<nzk) { 1578 i = am - k + 1; /* num of unfactored rows */ 1579 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1580 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1581 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1582 reallocs++; 1583 } 1584 1585 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1586 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1587 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1588 1589 /* add the k-th row into il and jl */ 1590 if (nzk > 1){ 1591 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1592 jl[k] = jl[i]; jl[i] = k; 1593 il[k] = ui[k] + 1; 1594 } 1595 uj_ptr[k] = current_space->array; 1596 uj_lvl_ptr[k] = current_space_lvl->array; 1597 1598 current_space->array += nzk; 1599 current_space->local_used += nzk; 1600 current_space->local_remaining -= nzk; 1601 1602 current_space_lvl->array += nzk; 1603 current_space_lvl->local_used += nzk; 1604 current_space_lvl->local_remaining -= nzk; 1605 1606 ui[k+1] = ui[k] + nzk; 1607 } 1608 1609 #if defined(PETSC_USE_INFO) 1610 if (ai[am] != 0) { 1611 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1612 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1613 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1614 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1615 } else { 1616 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1617 } 1618 #endif 1619 1620 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1621 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1622 ierr = PetscFree(jl);CHKERRQ(ierr); 1623 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1624 1625 /* destroy list of free space and other temporary array(s) */ 1626 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1627 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1628 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1629 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1630 1631 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1632 1633 /* put together the new matrix in MATSEQSBAIJ format */ 1634 1635 b = (Mat_SeqSBAIJ*)(*fact)->data; 1636 b->singlemalloc = PETSC_FALSE; 1637 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1638 b->j = uj; 1639 b->i = ui; 1640 b->diag = 0; 1641 b->ilen = 0; 1642 b->imax = 0; 1643 b->row = perm; 1644 b->col = perm; 1645 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1646 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1647 b->icol = iperm; 1648 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1649 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1650 ierr = PetscLogObjectMemory((*fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1651 b->maxnz = b->nz = ui[am]; 1652 b->free_a = PETSC_TRUE; 1653 b->free_ij = PETSC_TRUE; 1654 1655 (*fact)->factor = MAT_FACTOR_CHOLESKY; 1656 (*fact)->info.factor_mallocs = reallocs; 1657 (*fact)->info.fill_ratio_given = fill; 1658 if (ai[am] != 0) { 1659 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1660 } else { 1661 (*fact)->info.fill_ratio_needed = 0.0; 1662 } 1663 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1664 if (perm_identity){ 1665 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1666 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1667 } 1668 PetscFunctionReturn(0); 1669 } 1670 1671 #undef __FUNCT__ 1672 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1673 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1674 { 1675 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1676 Mat_SeqSBAIJ *b; 1677 PetscErrorCode ierr; 1678 PetscTruth perm_identity; 1679 PetscReal fill = info->fill; 1680 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1681 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1682 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1683 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1684 PetscBT lnkbt; 1685 IS iperm; 1686 1687 PetscFunctionBegin; 1688 /* check whether perm is the identity mapping */ 1689 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1690 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1691 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1692 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1693 1694 /* initialization */ 1695 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1696 ui[0] = 0; 1697 1698 /* jl: linked list for storing indices of the pivot rows 1699 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1700 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1701 il = jl + am; 1702 cols = il + am; 1703 ui_ptr = (PetscInt**)(cols + am); 1704 for (i=0; i<am; i++){ 1705 jl[i] = am; il[i] = 0; 1706 } 1707 1708 /* create and initialize a linked list for storing column indices of the active row k */ 1709 nlnk = am + 1; 1710 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1711 1712 /* initial FreeSpace size is fill*(ai[am]+1) */ 1713 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1714 current_space = free_space; 1715 1716 for (k=0; k<am; k++){ /* for each active row k */ 1717 /* initialize lnk by the column indices of row rip[k] of A */ 1718 nzk = 0; 1719 ncols = ai[rip[k]+1] - ai[rip[k]]; 1720 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1721 ncols_upper = 0; 1722 for (j=0; j<ncols; j++){ 1723 i = riip[*(aj + ai[rip[k]] + j)]; 1724 if (i >= k){ /* only take upper triangular entry */ 1725 cols[ncols_upper] = i; 1726 ncols_upper++; 1727 } 1728 } 1729 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1730 nzk += nlnk; 1731 1732 /* update lnk by computing fill-in for each pivot row to be merged in */ 1733 prow = jl[k]; /* 1st pivot row */ 1734 1735 while (prow < k){ 1736 nextprow = jl[prow]; 1737 /* merge prow into k-th row */ 1738 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1739 jmax = ui[prow+1]; 1740 ncols = jmax-jmin; 1741 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1742 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1743 nzk += nlnk; 1744 1745 /* update il and jl for prow */ 1746 if (jmin < jmax){ 1747 il[prow] = jmin; 1748 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1749 } 1750 prow = nextprow; 1751 } 1752 1753 /* if free space is not available, make more free space */ 1754 if (current_space->local_remaining<nzk) { 1755 i = am - k + 1; /* num of unfactored rows */ 1756 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1757 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1758 reallocs++; 1759 } 1760 1761 /* copy data into free space, then initialize lnk */ 1762 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1763 1764 /* add the k-th row into il and jl */ 1765 if (nzk-1 > 0){ 1766 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1767 jl[k] = jl[i]; jl[i] = k; 1768 il[k] = ui[k] + 1; 1769 } 1770 ui_ptr[k] = current_space->array; 1771 current_space->array += nzk; 1772 current_space->local_used += nzk; 1773 current_space->local_remaining -= nzk; 1774 1775 ui[k+1] = ui[k] + nzk; 1776 } 1777 1778 #if defined(PETSC_USE_INFO) 1779 if (ai[am] != 0) { 1780 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1781 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1782 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1783 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1784 } else { 1785 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1786 } 1787 #endif 1788 1789 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1790 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1791 ierr = PetscFree(jl);CHKERRQ(ierr); 1792 1793 /* destroy list of free space and other temporary array(s) */ 1794 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1795 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1796 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1797 1798 /* put together the new matrix in MATSEQSBAIJ format */ 1799 1800 b = (Mat_SeqSBAIJ*)(*fact)->data; 1801 b->singlemalloc = PETSC_FALSE; 1802 b->free_a = PETSC_TRUE; 1803 b->free_ij = PETSC_TRUE; 1804 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1805 b->j = uj; 1806 b->i = ui; 1807 b->diag = 0; 1808 b->ilen = 0; 1809 b->imax = 0; 1810 b->row = perm; 1811 b->col = perm; 1812 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1813 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1814 b->icol = iperm; 1815 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1816 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1817 ierr = PetscLogObjectMemory(*fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1818 b->maxnz = b->nz = ui[am]; 1819 1820 (*fact)->factor = MAT_FACTOR_CHOLESKY; 1821 (*fact)->info.factor_mallocs = reallocs; 1822 (*fact)->info.fill_ratio_given = fill; 1823 if (ai[am] != 0) { 1824 (*fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1825 } else { 1826 (*fact)->info.fill_ratio_needed = 0.0; 1827 } 1828 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1829 if (perm_identity){ 1830 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1831 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1832 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1833 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1834 } else { 1835 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1; 1836 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1837 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1838 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842