1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/seq/aij.h" 4 #include "../src/inline/dot.h" 5 #include "../src/inline/spops.h" 6 #include "petscbt.h" 7 #include "../src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ_saad" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ_saad(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 const PetscInt *c,*r,*ic; 64 PetscInt i,n = A->rmap->n,*cc,*cr; 65 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 PetscInt *ordcol,*iwk,*iperm,*jw; 67 PetscInt jmax,lfill,job,*o_i,*o_j; 68 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 69 PetscReal af,dt,dtcount,dtcol,fill; 70 71 PetscFunctionBegin; 72 73 if (info->dt == PETSC_DEFAULT) dt = .005; else dt = info->dt; 74 if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); else dtcount = info->dtcount; 75 if (info->dtcol == PETSC_DEFAULT) dtcol = .01; else dtcol = info->dtcol; 76 if (info->fill == PETSC_DEFAULT) fill = ((double)(n*(dtcount+1)))/a->nz; else fill = info->fill; 77 lfill = (PetscInt)(dtcount/2.0); 78 jmax = (PetscInt)(fill*a->nz); 79 80 81 /* ------------------------------------------------------------ 82 If reorder=.TRUE., then the original matrix has to be 83 reordered to reflect the user selected ordering scheme, and 84 then de-reordered so it is in it's original format. 85 Because Saad's dperm() is NOT in place, we have to copy 86 the original matrix and allocate more storage. . . 87 ------------------------------------------------------------ 88 */ 89 90 /* set reorder to true if either isrow or iscol is not identity */ 91 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 92 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 93 reorder = PetscNot(reorder); 94 95 96 /* storage for ilu factor */ 97 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 100 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 101 102 /* ------------------------------------------------------------ 103 Make sure that everything is Fortran formatted (1-Based) 104 ------------------------------------------------------------ 105 */ 106 for (i=old_i[0];i<old_i[n];i++) { 107 old_j[i]++; 108 } 109 for(i=0;i<n+1;i++) { 110 old_i[i]++; 111 }; 112 113 114 if (reorder) { 115 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 116 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 117 ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 cr[i] = r[i]+1; 120 cc[i] = c[i]+1; 121 } 122 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 123 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 124 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job); 128 ierr = PetscFree2(cc,cr);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 222 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = MAT_FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 b->free_a = PETSC_TRUE; 230 b->free_ij = PETSC_TRUE; 231 b->singlemalloc = PETSC_FALSE; 232 b->a = new_a; 233 b->j = new_j; 234 b->i = new_i; 235 b->ilen = 0; 236 b->imax = 0; 237 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238 b->row = isirow; 239 b->col = iscolf; 240 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 241 b->maxnz = b->nz = new_i[n]; 242 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 243 (*fact)->info.factor_mallocs = 0; 244 245 af = ((double)b->nz)/((double)a->nz) + .001; 246 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr); 247 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 249 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 250 251 if (reorder) (*fact)->ops->solve = MatSolve_SeqAIJ; 252 else (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 253 (*fact)->ops->solveadd = MatSolveAdd_SeqAIJ; 254 (*fact)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 255 (*fact)->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 256 257 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 258 259 PetscFunctionReturn(0); 260 #endif 261 } 262 263 EXTERN_C_BEGIN 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 266 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 267 { 268 PetscFunctionBegin; 269 *flg = PETSC_TRUE; 270 PetscFunctionReturn(0); 271 } 272 EXTERN_C_END 273 274 EXTERN_C_BEGIN 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 277 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 278 { 279 PetscInt n = A->rmap->n; 280 PetscErrorCode ierr; 281 282 PetscFunctionBegin; 283 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 284 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 285 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 286 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 287 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 288 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 289 (*B)->ops->iludtfactor = MatILUDTFactor_SeqAIJ; 290 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 291 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 292 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 293 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 294 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 295 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 296 (*B)->factor = ftype; 297 PetscFunctionReturn(0); 298 } 299 EXTERN_C_END 300 301 #undef __FUNCT__ 302 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 303 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 304 { 305 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 306 IS isicol; 307 PetscErrorCode ierr; 308 const PetscInt *r,*ic; 309 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 310 PetscInt *bi,*bj,*ajtmp; 311 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 312 PetscReal f; 313 PetscInt nlnk,*lnk,k,**bi_ptr; 314 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 315 PetscBT lnkbt; 316 317 PetscFunctionBegin; 318 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 319 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 320 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 321 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 322 323 /* get new row pointers */ 324 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 325 bi[0] = 0; 326 327 /* bdiag is location of diagonal in factor */ 328 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 329 bdiag[0] = 0; 330 331 /* linked list for storing column indices of the active row */ 332 nlnk = n + 1; 333 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 334 335 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 336 337 /* initial FreeSpace size is f*(ai[n]+1) */ 338 f = info->fill; 339 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 340 current_space = free_space; 341 342 for (i=0; i<n; i++) { 343 /* copy previous fill into linked list */ 344 nzi = 0; 345 nnz = ai[r[i]+1] - ai[r[i]]; 346 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 347 ajtmp = aj + ai[r[i]]; 348 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 349 nzi += nlnk; 350 351 /* add pivot rows into linked list */ 352 row = lnk[n]; 353 while (row < i) { 354 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 355 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 356 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 357 nzi += nlnk; 358 row = lnk[row]; 359 } 360 bi[i+1] = bi[i] + nzi; 361 im[i] = nzi; 362 363 /* mark bdiag */ 364 nzbd = 0; 365 nnz = nzi; 366 k = lnk[n]; 367 while (nnz-- && k < i){ 368 nzbd++; 369 k = lnk[k]; 370 } 371 bdiag[i] = bi[i] + nzbd; 372 373 /* if free space is not available, make more free space */ 374 if (current_space->local_remaining<nzi) { 375 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 376 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 377 reallocs++; 378 } 379 380 /* copy data into free space, then initialize lnk */ 381 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 382 bi_ptr[i] = current_space->array; 383 current_space->array += nzi; 384 current_space->local_used += nzi; 385 current_space->local_remaining -= nzi; 386 } 387 #if defined(PETSC_USE_INFO) 388 if (ai[n] != 0) { 389 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 390 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 391 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 392 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 393 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 394 } else { 395 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 396 } 397 #endif 398 399 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 400 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 401 402 /* destroy list of free space and other temporary array(s) */ 403 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 404 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 405 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 406 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 407 408 /* put together the new matrix */ 409 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 410 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 411 b = (Mat_SeqAIJ*)(B)->data; 412 b->free_a = PETSC_TRUE; 413 b->free_ij = PETSC_TRUE; 414 b->singlemalloc = PETSC_FALSE; 415 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 416 b->j = bj; 417 b->i = bi; 418 b->diag = bdiag; 419 b->ilen = 0; 420 b->imax = 0; 421 b->row = isrow; 422 b->col = iscol; 423 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 424 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 425 b->icol = isicol; 426 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 427 428 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 429 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 430 b->maxnz = b->nz = bi[n] ; 431 432 (B)->factor = MAT_FACTOR_LU; 433 (B)->info.factor_mallocs = reallocs; 434 (B)->info.fill_ratio_given = f; 435 436 if (ai[n]) { 437 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 438 } else { 439 (B)->info.fill_ratio_needed = 0.0; 440 } 441 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 442 (B)->ops->solve = MatSolve_SeqAIJ; 443 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 444 /* switch to inodes if appropriate */ 445 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448 449 /* 450 Trouble in factorization, should we dump the original matrix? 451 */ 452 #undef __FUNCT__ 453 #define __FUNCT__ "MatFactorDumpMatrix" 454 PetscErrorCode MatFactorDumpMatrix(Mat A) 455 { 456 PetscErrorCode ierr; 457 PetscTruth flg = PETSC_FALSE; 458 459 PetscFunctionBegin; 460 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 461 if (flg) { 462 PetscViewer viewer; 463 char filename[PETSC_MAX_PATH_LEN]; 464 465 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 466 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 467 ierr = MatView(A,viewer);CHKERRQ(ierr); 468 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 469 } 470 PetscFunctionReturn(0); 471 } 472 473 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 474 475 /* ----------------------------------------------------------- */ 476 #undef __FUNCT__ 477 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 478 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 479 { 480 Mat C=B; 481 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 482 IS isrow = b->row,isicol = b->icol; 483 PetscErrorCode ierr; 484 const PetscInt *r,*ic,*ics; 485 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 486 PetscInt *ajtmp,*bjtmp,nz,row,*diag_offset = b->diag,diag,*pj; 487 MatScalar *rtmp,*pc,multiplier,*v,*pv,d,*aa=a->a; 488 PetscReal rs = 0.0; 489 LUShift_Ctx sctx; 490 PetscInt newshift,*ddiag; 491 492 PetscFunctionBegin; 493 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 494 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 495 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 496 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 497 ics = ic; 498 499 sctx.shift_top = 0; 500 sctx.nshift_max = 0; 501 sctx.shift_lo = 0; 502 sctx.shift_hi = 0; 503 sctx.shift_fraction = 0; 504 505 /* if both shift schemes are chosen by user, only use info->shiftpd */ 506 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 507 ddiag = a->diag; 508 sctx.shift_top = info->zeropivot; 509 for (i=0; i<n; i++) { 510 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 511 d = (aa)[ddiag[i]]; 512 rs = -PetscAbsScalar(d) - PetscRealPart(d); 513 v = aa+ai[i]; 514 nz = ai[i+1] - ai[i]; 515 for (j=0; j<nz; j++) 516 rs += PetscAbsScalar(v[j]); 517 if (rs>sctx.shift_top) sctx.shift_top = rs; 518 } 519 sctx.shift_top *= 1.1; 520 sctx.nshift_max = 5; 521 sctx.shift_lo = 0.; 522 sctx.shift_hi = 1.; 523 } 524 525 sctx.shift_amount = 0.0; 526 sctx.nshift = 0; 527 do { 528 sctx.lushift = PETSC_FALSE; 529 for (i=0; i<n; i++){ 530 nz = bi[i+1] - bi[i]; 531 bjtmp = bj + bi[i]; 532 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 533 534 /* load in initial (unfactored row) */ 535 nz = ai[r[i]+1] - ai[r[i]]; 536 ajtmp = aj + ai[r[i]]; 537 v = aa + ai[r[i]]; 538 for (j=0; j<nz; j++) { 539 rtmp[ics[ajtmp[j]]] = v[j]; 540 } 541 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 542 543 row = *bjtmp++; 544 while (row < i) { 545 pc = rtmp + row; 546 if (*pc != 0.0) { 547 pv = b->a + diag_offset[row]; 548 pj = b->j + diag_offset[row] + 1; 549 multiplier = *pc / *pv++; 550 *pc = multiplier; 551 nz = bi[row+1] - diag_offset[row] - 1; 552 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 553 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 554 } 555 row = *bjtmp++; 556 } 557 /* finished row so stick it into b->a */ 558 pv = b->a + bi[i] ; 559 pj = b->j + bi[i] ; 560 nz = bi[i+1] - bi[i]; 561 diag = diag_offset[i] - bi[i]; 562 rs = -PetscAbsScalar(pv[diag]); 563 for (j=0; j<nz; j++) { 564 pv[j] = rtmp[pj[j]]; 565 rs += PetscAbsScalar(pv[j]); 566 } 567 568 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 569 sctx.rs = rs; 570 sctx.pv = pv[diag]; 571 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 572 if (newshift == 1) break; 573 } 574 575 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 576 /* 577 * if no shift in this attempt & shifting & started shifting & can refine, 578 * then try lower shift 579 */ 580 sctx.shift_hi = sctx.shift_fraction; 581 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 582 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 583 sctx.lushift = PETSC_TRUE; 584 sctx.nshift++; 585 } 586 } while (sctx.lushift); 587 588 /* invert diagonal entries for simplier triangular solves */ 589 for (i=0; i<n; i++) { 590 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 591 } 592 ierr = PetscFree(rtmp);CHKERRQ(ierr); 593 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 594 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 595 if (b->inode.use) { 596 C->ops->solve = MatSolve_Inode; 597 } else { 598 PetscTruth row_identity, col_identity; 599 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 600 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 601 if (row_identity && col_identity) { 602 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 603 } else { 604 C->ops->solve = MatSolve_SeqAIJ; 605 } 606 } 607 C->ops->solveadd = MatSolveAdd_SeqAIJ; 608 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 609 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 610 C->ops->matsolve = MatMatSolve_SeqAIJ; 611 C->assembled = PETSC_TRUE; 612 C->preallocated = PETSC_TRUE; 613 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 614 if (sctx.nshift){ 615 if (info->shiftpd) { 616 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 617 } else if (info->shiftnz) { 618 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 619 } 620 } 621 PetscFunctionReturn(0); 622 } 623 624 /* 625 This routine implements inplace ILU(0) with row or/and column permutations. 626 Input: 627 A - original matrix 628 Output; 629 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 630 a->j (col index) is permuted by the inverse of colperm, then sorted 631 a->a reordered accordingly with a->j 632 a->diag (ptr to diagonal elements) is updated. 633 */ 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 636 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 637 { 638 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 639 IS isrow = a->row,isicol = a->icol; 640 PetscErrorCode ierr; 641 const PetscInt *r,*ic,*ics; 642 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 643 PetscInt *ajtmp,nz,row; 644 PetscInt *diag = a->diag,nbdiag,*pj; 645 PetscScalar *rtmp,*pc,multiplier,d; 646 MatScalar *v,*pv; 647 PetscReal rs; 648 LUShift_Ctx sctx; 649 PetscInt newshift; 650 651 PetscFunctionBegin; 652 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 653 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 654 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 655 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 656 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 657 ics = ic; 658 659 sctx.shift_top = 0; 660 sctx.nshift_max = 0; 661 sctx.shift_lo = 0; 662 sctx.shift_hi = 0; 663 sctx.shift_fraction = 0; 664 665 /* if both shift schemes are chosen by user, only use info->shiftpd */ 666 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 667 sctx.shift_top = 0; 668 for (i=0; i<n; i++) { 669 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 670 d = (a->a)[diag[i]]; 671 rs = -PetscAbsScalar(d) - PetscRealPart(d); 672 v = a->a+ai[i]; 673 nz = ai[i+1] - ai[i]; 674 for (j=0; j<nz; j++) 675 rs += PetscAbsScalar(v[j]); 676 if (rs>sctx.shift_top) sctx.shift_top = rs; 677 } 678 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 679 sctx.shift_top *= 1.1; 680 sctx.nshift_max = 5; 681 sctx.shift_lo = 0.; 682 sctx.shift_hi = 1.; 683 } 684 685 sctx.shift_amount = 0; 686 sctx.nshift = 0; 687 do { 688 sctx.lushift = PETSC_FALSE; 689 for (i=0; i<n; i++){ 690 /* load in initial unfactored row */ 691 nz = ai[r[i]+1] - ai[r[i]]; 692 ajtmp = aj + ai[r[i]]; 693 v = a->a + ai[r[i]]; 694 /* sort permuted ajtmp and values v accordingly */ 695 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 696 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 697 698 diag[r[i]] = ai[r[i]]; 699 for (j=0; j<nz; j++) { 700 rtmp[ajtmp[j]] = v[j]; 701 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 702 } 703 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 704 705 row = *ajtmp++; 706 while (row < i) { 707 pc = rtmp + row; 708 if (*pc != 0.0) { 709 pv = a->a + diag[r[row]]; 710 pj = aj + diag[r[row]] + 1; 711 712 multiplier = *pc / *pv++; 713 *pc = multiplier; 714 nz = ai[r[row]+1] - diag[r[row]] - 1; 715 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 716 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 717 } 718 row = *ajtmp++; 719 } 720 /* finished row so overwrite it onto a->a */ 721 pv = a->a + ai[r[i]] ; 722 pj = aj + ai[r[i]] ; 723 nz = ai[r[i]+1] - ai[r[i]]; 724 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 725 726 rs = 0.0; 727 for (j=0; j<nz; j++) { 728 pv[j] = rtmp[pj[j]]; 729 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 730 } 731 732 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 733 sctx.rs = rs; 734 sctx.pv = pv[nbdiag]; 735 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 736 if (newshift == 1) break; 737 } 738 739 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 740 /* 741 * if no shift in this attempt & shifting & started shifting & can refine, 742 * then try lower shift 743 */ 744 sctx.shift_hi = sctx.shift_fraction; 745 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 746 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 747 sctx.lushift = PETSC_TRUE; 748 sctx.nshift++; 749 } 750 } while (sctx.lushift); 751 752 /* invert diagonal entries for simplier triangular solves */ 753 for (i=0; i<n; i++) { 754 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 755 } 756 757 ierr = PetscFree(rtmp);CHKERRQ(ierr); 758 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 759 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 760 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 761 A->ops->solveadd = MatSolveAdd_SeqAIJ; 762 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 763 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 764 A->assembled = PETSC_TRUE; 765 A->preallocated = PETSC_TRUE; 766 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 767 if (sctx.nshift){ 768 if (info->shiftpd) { 769 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 770 } else if (info->shiftnz) { 771 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 772 } 773 } 774 PetscFunctionReturn(0); 775 } 776 777 /* ----------------------------------------------------------- */ 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatLUFactor_SeqAIJ" 780 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 781 { 782 PetscErrorCode ierr; 783 Mat C; 784 785 PetscFunctionBegin; 786 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 787 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 788 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 789 A->ops->solve = C->ops->solve; 790 A->ops->solvetranspose = C->ops->solvetranspose; 791 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 792 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 793 PetscFunctionReturn(0); 794 } 795 /* ----------------------------------------------------------- */ 796 797 #undef __FUNCT__ 798 #define __FUNCT__ "MatSolve_SeqAIJ" 799 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 800 { 801 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 802 IS iscol = a->col,isrow = a->row; 803 PetscErrorCode ierr; 804 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 805 PetscInt nz; 806 const PetscInt *rout,*cout,*r,*c; 807 PetscScalar *x,*tmp,*tmps,sum; 808 const PetscScalar *b; 809 const MatScalar *aa = a->a,*v; 810 811 PetscFunctionBegin; 812 if (!n) PetscFunctionReturn(0); 813 814 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 815 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 816 tmp = a->solve_work; 817 818 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 819 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 820 821 /* forward solve the lower triangular */ 822 tmp[0] = b[*r++]; 823 tmps = tmp; 824 for (i=1; i<n; i++) { 825 v = aa + ai[i] ; 826 vi = aj + ai[i] ; 827 nz = a->diag[i] - ai[i]; 828 sum = b[*r++]; 829 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 830 tmp[i] = sum; 831 } 832 833 /* backward solve the upper triangular */ 834 for (i=n-1; i>=0; i--){ 835 v = aa + a->diag[i] + 1; 836 vi = aj + a->diag[i] + 1; 837 nz = ai[i+1] - a->diag[i] - 1; 838 sum = tmp[i]; 839 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 840 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 841 } 842 843 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 844 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 845 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 846 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 847 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 #undef __FUNCT__ 852 #define __FUNCT__ "MatMatSolve_SeqAIJ" 853 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 854 { 855 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 856 IS iscol = a->col,isrow = a->row; 857 PetscErrorCode ierr; 858 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 859 PetscInt nz,neq; 860 const PetscInt *rout,*cout,*r,*c; 861 PetscScalar *x,*b,*tmp,*tmps,sum; 862 const MatScalar *aa = a->a,*v; 863 PetscTruth bisdense,xisdense; 864 865 PetscFunctionBegin; 866 if (!n) PetscFunctionReturn(0); 867 868 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 869 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 870 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 871 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 872 873 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 874 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 875 876 tmp = a->solve_work; 877 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 878 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 879 880 for (neq=0; neq<B->cmap->n; neq++){ 881 /* forward solve the lower triangular */ 882 tmp[0] = b[r[0]]; 883 tmps = tmp; 884 for (i=1; i<n; i++) { 885 v = aa + ai[i] ; 886 vi = aj + ai[i] ; 887 nz = a->diag[i] - ai[i]; 888 sum = b[r[i]]; 889 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 890 tmp[i] = sum; 891 } 892 /* backward solve the upper triangular */ 893 for (i=n-1; i>=0; i--){ 894 v = aa + a->diag[i] + 1; 895 vi = aj + a->diag[i] + 1; 896 nz = ai[i+1] - a->diag[i] - 1; 897 sum = tmp[i]; 898 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 899 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 900 } 901 902 b += n; 903 x += n; 904 } 905 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 906 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 907 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 908 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 909 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 910 PetscFunctionReturn(0); 911 } 912 913 #undef __FUNCT__ 914 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 915 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 916 { 917 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 918 IS iscol = a->col,isrow = a->row; 919 PetscErrorCode ierr; 920 const PetscInt *r,*c,*rout,*cout; 921 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 922 PetscInt nz,row; 923 PetscScalar *x,*b,*tmp,*tmps,sum; 924 const MatScalar *aa = a->a,*v; 925 926 PetscFunctionBegin; 927 if (!n) PetscFunctionReturn(0); 928 929 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 930 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 931 tmp = a->solve_work; 932 933 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 934 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 935 936 /* forward solve the lower triangular */ 937 tmp[0] = b[*r++]; 938 tmps = tmp; 939 for (row=1; row<n; row++) { 940 i = rout[row]; /* permuted row */ 941 v = aa + ai[i] ; 942 vi = aj + ai[i] ; 943 nz = a->diag[i] - ai[i]; 944 sum = b[*r++]; 945 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 946 tmp[row] = sum; 947 } 948 949 /* backward solve the upper triangular */ 950 for (row=n-1; row>=0; row--){ 951 i = rout[row]; /* permuted row */ 952 v = aa + a->diag[i] + 1; 953 vi = aj + a->diag[i] + 1; 954 nz = ai[i+1] - a->diag[i] - 1; 955 sum = tmp[row]; 956 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 957 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 958 } 959 960 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 961 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 962 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 963 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 964 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 /* ----------------------------------------------------------- */ 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 971 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 972 { 973 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 974 PetscErrorCode ierr; 975 PetscInt n = A->rmap->n; 976 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 977 PetscScalar *x; 978 const PetscScalar *b; 979 const MatScalar *aa = a->a; 980 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 981 PetscInt adiag_i,i,nz,ai_i; 982 const MatScalar *v; 983 PetscScalar sum; 984 #endif 985 986 PetscFunctionBegin; 987 if (!n) PetscFunctionReturn(0); 988 989 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 990 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 991 992 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 993 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 994 #else 995 /* forward solve the lower triangular */ 996 x[0] = b[0]; 997 for (i=1; i<n; i++) { 998 ai_i = ai[i]; 999 v = aa + ai_i; 1000 vi = aj + ai_i; 1001 nz = adiag[i] - ai_i; 1002 sum = b[i]; 1003 while (nz--) sum -= *v++ * x[*vi++]; 1004 x[i] = sum; 1005 } 1006 1007 /* backward solve the upper triangular */ 1008 for (i=n-1; i>=0; i--){ 1009 adiag_i = adiag[i]; 1010 v = aa + adiag_i + 1; 1011 vi = aj + adiag_i + 1; 1012 nz = ai[i+1] - adiag_i - 1; 1013 sum = x[i]; 1014 while (nz--) sum -= *v++ * x[*vi++]; 1015 x[i] = sum*aa[adiag_i]; 1016 } 1017 #endif 1018 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1019 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1020 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1026 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1027 { 1028 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1029 IS iscol = a->col,isrow = a->row; 1030 PetscErrorCode ierr; 1031 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1032 PetscInt nz; 1033 const PetscInt *rout,*cout,*r,*c; 1034 PetscScalar *x,*b,*tmp,sum; 1035 const MatScalar *aa = a->a,*v; 1036 1037 PetscFunctionBegin; 1038 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1039 1040 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1041 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1042 tmp = a->solve_work; 1043 1044 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1045 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1046 1047 /* forward solve the lower triangular */ 1048 tmp[0] = b[*r++]; 1049 for (i=1; i<n; i++) { 1050 v = aa + ai[i] ; 1051 vi = aj + ai[i] ; 1052 nz = a->diag[i] - ai[i]; 1053 sum = b[*r++]; 1054 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1055 tmp[i] = sum; 1056 } 1057 1058 /* backward solve the upper triangular */ 1059 for (i=n-1; i>=0; i--){ 1060 v = aa + a->diag[i] + 1; 1061 vi = aj + a->diag[i] + 1; 1062 nz = ai[i+1] - a->diag[i] - 1; 1063 sum = tmp[i]; 1064 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1065 tmp[i] = sum*aa[a->diag[i]]; 1066 x[*c--] += tmp[i]; 1067 } 1068 1069 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1070 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1071 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1072 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1073 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1074 1075 PetscFunctionReturn(0); 1076 } 1077 /* -------------------------------------------------------------------*/ 1078 #undef __FUNCT__ 1079 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1080 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1081 { 1082 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1083 IS iscol = a->col,isrow = a->row; 1084 PetscErrorCode ierr; 1085 const PetscInt *rout,*cout,*r,*c; 1086 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1087 PetscInt nz,*diag = a->diag; 1088 PetscScalar *x,*b,*tmp,s1; 1089 const MatScalar *aa = a->a,*v; 1090 1091 PetscFunctionBegin; 1092 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1093 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1094 tmp = a->solve_work; 1095 1096 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1097 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1098 1099 /* copy the b into temp work space according to permutation */ 1100 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1101 1102 /* forward solve the U^T */ 1103 for (i=0; i<n; i++) { 1104 v = aa + diag[i] ; 1105 vi = aj + diag[i] + 1; 1106 nz = ai[i+1] - diag[i] - 1; 1107 s1 = tmp[i]; 1108 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1109 while (nz--) { 1110 tmp[*vi++ ] -= (*v++)*s1; 1111 } 1112 tmp[i] = s1; 1113 } 1114 1115 /* backward solve the L^T */ 1116 for (i=n-1; i>=0; i--){ 1117 v = aa + diag[i] - 1 ; 1118 vi = aj + diag[i] - 1 ; 1119 nz = diag[i] - ai[i]; 1120 s1 = tmp[i]; 1121 while (nz--) { 1122 tmp[*vi-- ] -= (*v--)*s1; 1123 } 1124 } 1125 1126 /* copy tmp into x according to permutation */ 1127 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1128 1129 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1130 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1131 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1132 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1133 1134 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1140 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1141 { 1142 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1143 IS iscol = a->col,isrow = a->row; 1144 PetscErrorCode ierr; 1145 const PetscInt *r,*c,*rout,*cout; 1146 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1147 PetscInt nz,*diag = a->diag; 1148 PetscScalar *x,*b,*tmp; 1149 const MatScalar *aa = a->a,*v; 1150 1151 PetscFunctionBegin; 1152 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1153 1154 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1155 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1156 tmp = a->solve_work; 1157 1158 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1159 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1160 1161 /* copy the b into temp work space according to permutation */ 1162 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1163 1164 /* forward solve the U^T */ 1165 for (i=0; i<n; i++) { 1166 v = aa + diag[i] ; 1167 vi = aj + diag[i] + 1; 1168 nz = ai[i+1] - diag[i] - 1; 1169 tmp[i] *= *v++; 1170 while (nz--) { 1171 tmp[*vi++ ] -= (*v++)*tmp[i]; 1172 } 1173 } 1174 1175 /* backward solve the L^T */ 1176 for (i=n-1; i>=0; i--){ 1177 v = aa + diag[i] - 1 ; 1178 vi = aj + diag[i] - 1 ; 1179 nz = diag[i] - ai[i]; 1180 while (nz--) { 1181 tmp[*vi-- ] -= (*v--)*tmp[i]; 1182 } 1183 } 1184 1185 /* copy tmp into x according to permutation */ 1186 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1187 1188 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1189 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1190 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1191 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1192 1193 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 /* ----------------------------------------------------------------*/ 1197 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1198 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption); 1199 1200 #undef __FUNCT__ 1201 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1202 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1203 { 1204 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1205 IS isicol; 1206 PetscErrorCode ierr; 1207 const PetscInt *r,*ic; 1208 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1209 PetscInt *bi,*cols,nnz,*cols_lvl; 1210 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1211 PetscInt i,levels,diagonal_fill; 1212 PetscTruth col_identity,row_identity; 1213 PetscReal f; 1214 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1215 PetscBT lnkbt; 1216 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1217 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1218 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1219 PetscTruth missing; 1220 1221 PetscFunctionBegin; 1222 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1223 f = info->fill; 1224 levels = (PetscInt)info->levels; 1225 diagonal_fill = (PetscInt)info->diagonal_fill; 1226 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1227 1228 /* special case that simply copies fill pattern */ 1229 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1230 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1231 if (!levels && row_identity && col_identity) { 1232 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr); 1233 fact->factor = MAT_FACTOR_ILU; 1234 (fact)->info.factor_mallocs = 0; 1235 (fact)->info.fill_ratio_given = info->fill; 1236 (fact)->info.fill_ratio_needed = 1.0; 1237 b = (Mat_SeqAIJ*)(fact)->data; 1238 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1239 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1240 b->row = isrow; 1241 b->col = iscol; 1242 b->icol = isicol; 1243 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1244 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1245 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1246 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1247 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1252 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1253 1254 /* get new row pointers */ 1255 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1256 bi[0] = 0; 1257 /* bdiag is location of diagonal in factor */ 1258 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1259 bdiag[0] = 0; 1260 1261 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1262 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1263 1264 /* create a linked list for storing column indices of the active row */ 1265 nlnk = n + 1; 1266 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1267 1268 /* initial FreeSpace size is f*(ai[n]+1) */ 1269 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1270 current_space = free_space; 1271 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1272 current_space_lvl = free_space_lvl; 1273 1274 for (i=0; i<n; i++) { 1275 nzi = 0; 1276 /* copy current row into linked list */ 1277 nnz = ai[r[i]+1] - ai[r[i]]; 1278 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1279 cols = aj + ai[r[i]]; 1280 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1281 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1282 nzi += nlnk; 1283 1284 /* make sure diagonal entry is included */ 1285 if (diagonal_fill && lnk[i] == -1) { 1286 fm = n; 1287 while (lnk[fm] < i) fm = lnk[fm]; 1288 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1289 lnk[fm] = i; 1290 lnk_lvl[i] = 0; 1291 nzi++; dcount++; 1292 } 1293 1294 /* add pivot rows into the active row */ 1295 nzbd = 0; 1296 prow = lnk[n]; 1297 while (prow < i) { 1298 nnz = bdiag[prow]; 1299 cols = bj_ptr[prow] + nnz + 1; 1300 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1301 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1302 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1303 nzi += nlnk; 1304 prow = lnk[prow]; 1305 nzbd++; 1306 } 1307 bdiag[i] = nzbd; 1308 bi[i+1] = bi[i] + nzi; 1309 1310 /* if free space is not available, make more free space */ 1311 if (current_space->local_remaining<nzi) { 1312 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1313 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1314 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1315 reallocs++; 1316 } 1317 1318 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1319 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1320 bj_ptr[i] = current_space->array; 1321 bjlvl_ptr[i] = current_space_lvl->array; 1322 1323 /* make sure the active row i has diagonal entry */ 1324 if (*(bj_ptr[i]+bdiag[i]) != i) { 1325 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1326 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1327 } 1328 1329 current_space->array += nzi; 1330 current_space->local_used += nzi; 1331 current_space->local_remaining -= nzi; 1332 current_space_lvl->array += nzi; 1333 current_space_lvl->local_used += nzi; 1334 current_space_lvl->local_remaining -= nzi; 1335 } 1336 1337 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1338 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1339 1340 /* destroy list of free space and other temporary arrays */ 1341 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1342 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1343 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1344 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1345 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1346 1347 #if defined(PETSC_USE_INFO) 1348 { 1349 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1350 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1351 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1352 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1353 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1354 if (diagonal_fill) { 1355 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1356 } 1357 } 1358 #endif 1359 1360 /* put together the new matrix */ 1361 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1362 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1363 b = (Mat_SeqAIJ*)(fact)->data; 1364 b->free_a = PETSC_TRUE; 1365 b->free_ij = PETSC_TRUE; 1366 b->singlemalloc = PETSC_FALSE; 1367 ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1368 b->j = bj; 1369 b->i = bi; 1370 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1371 b->diag = bdiag; 1372 b->ilen = 0; 1373 b->imax = 0; 1374 b->row = isrow; 1375 b->col = iscol; 1376 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1377 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1378 b->icol = isicol; 1379 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1380 /* In b structure: Free imax, ilen, old a, old j. 1381 Allocate bdiag, solve_work, new a, new j */ 1382 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1383 b->maxnz = b->nz = bi[n] ; 1384 (fact)->info.factor_mallocs = reallocs; 1385 (fact)->info.fill_ratio_given = f; 1386 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1387 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1388 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1389 PetscFunctionReturn(0); 1390 } 1391 1392 #include "../src/mat/impls/sbaij/seq/sbaij.h" 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1395 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 1396 { 1397 Mat C = B; 1398 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1399 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1400 IS ip=b->row,iip = b->icol; 1401 PetscErrorCode ierr; 1402 const PetscInt *rip,*riip; 1403 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1404 PetscInt *ai=a->i,*aj=a->j; 1405 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1406 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1407 PetscReal zeropivot,rs,shiftnz; 1408 PetscReal shiftpd; 1409 ChShift_Ctx sctx; 1410 PetscInt newshift; 1411 PetscTruth perm_identity; 1412 1413 PetscFunctionBegin; 1414 1415 shiftnz = info->shiftnz; 1416 shiftpd = info->shiftpd; 1417 zeropivot = info->zeropivot; 1418 1419 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1420 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1421 1422 /* initialization */ 1423 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1424 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1425 jl = il + mbs; 1426 rtmp = (MatScalar*)(jl + mbs); 1427 1428 sctx.shift_amount = 0; 1429 sctx.nshift = 0; 1430 do { 1431 sctx.chshift = PETSC_FALSE; 1432 for (i=0; i<mbs; i++) { 1433 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1434 } 1435 1436 for (k = 0; k<mbs; k++){ 1437 bval = ba + bi[k]; 1438 /* initialize k-th row by the perm[k]-th row of A */ 1439 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1440 for (j = jmin; j < jmax; j++){ 1441 col = riip[aj[j]]; 1442 if (col >= k){ /* only take upper triangular entry */ 1443 rtmp[col] = aa[j]; 1444 *bval++ = 0.0; /* for in-place factorization */ 1445 } 1446 } 1447 /* shift the diagonal of the matrix */ 1448 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1449 1450 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1451 dk = rtmp[k]; 1452 i = jl[k]; /* first row to be added to k_th row */ 1453 1454 while (i < k){ 1455 nexti = jl[i]; /* next row to be added to k_th row */ 1456 1457 /* compute multiplier, update diag(k) and U(i,k) */ 1458 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1459 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1460 dk += uikdi*ba[ili]; 1461 ba[ili] = uikdi; /* -U(i,k) */ 1462 1463 /* add multiple of row i to k-th row */ 1464 jmin = ili + 1; jmax = bi[i+1]; 1465 if (jmin < jmax){ 1466 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1467 /* update il and jl for row i */ 1468 il[i] = jmin; 1469 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1470 } 1471 i = nexti; 1472 } 1473 1474 /* shift the diagonals when zero pivot is detected */ 1475 /* compute rs=sum of abs(off-diagonal) */ 1476 rs = 0.0; 1477 jmin = bi[k]+1; 1478 nz = bi[k+1] - jmin; 1479 bcol = bj + jmin; 1480 while (nz--){ 1481 rs += PetscAbsScalar(rtmp[*bcol]); 1482 bcol++; 1483 } 1484 1485 sctx.rs = rs; 1486 sctx.pv = dk; 1487 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1488 1489 if (newshift == 1) { 1490 if (!sctx.shift_amount) { 1491 sctx.shift_amount = 1e-5; 1492 } 1493 break; 1494 } 1495 1496 /* copy data into U(k,:) */ 1497 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1498 jmin = bi[k]+1; jmax = bi[k+1]; 1499 if (jmin < jmax) { 1500 for (j=jmin; j<jmax; j++){ 1501 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1502 } 1503 /* add the k-th row into il and jl */ 1504 il[k] = jmin; 1505 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1506 } 1507 } 1508 } while (sctx.chshift); 1509 ierr = PetscFree(il);CHKERRQ(ierr); 1510 1511 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1512 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1513 1514 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1515 if (perm_identity){ 1516 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1517 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1518 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1519 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1520 } else { 1521 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1522 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1523 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1524 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1525 } 1526 1527 C->assembled = PETSC_TRUE; 1528 C->preallocated = PETSC_TRUE; 1529 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1530 if (sctx.nshift){ 1531 if (shiftnz) { 1532 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1533 } else if (shiftpd) { 1534 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1535 } 1536 } 1537 PetscFunctionReturn(0); 1538 } 1539 1540 #undef __FUNCT__ 1541 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1542 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1543 { 1544 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1545 Mat_SeqSBAIJ *b; 1546 PetscErrorCode ierr; 1547 PetscTruth perm_identity,missing; 1548 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1549 const PetscInt *rip,*riip; 1550 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1551 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1552 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1553 PetscReal fill=info->fill,levels=info->levels; 1554 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1555 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1556 PetscBT lnkbt; 1557 IS iperm; 1558 1559 PetscFunctionBegin; 1560 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1561 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1562 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1563 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1564 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1565 1566 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1567 ui[0] = 0; 1568 1569 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1570 if (!levels && perm_identity) { 1571 1572 for (i=0; i<am; i++) { 1573 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1574 } 1575 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1576 cols = uj; 1577 for (i=0; i<am; i++) { 1578 aj = a->j + a->diag[i]; 1579 ncols = ui[i+1] - ui[i]; 1580 for (j=0; j<ncols; j++) *cols++ = *aj++; 1581 } 1582 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1583 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1584 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1585 1586 /* initialization */ 1587 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1588 1589 /* jl: linked list for storing indices of the pivot rows 1590 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1591 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1592 il = jl + am; 1593 uj_ptr = (PetscInt**)(il + am); 1594 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1595 for (i=0; i<am; i++){ 1596 jl[i] = am; il[i] = 0; 1597 } 1598 1599 /* create and initialize a linked list for storing column indices of the active row k */ 1600 nlnk = am + 1; 1601 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1602 1603 /* initial FreeSpace size is fill*(ai[am]+1) */ 1604 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1605 current_space = free_space; 1606 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1607 current_space_lvl = free_space_lvl; 1608 1609 for (k=0; k<am; k++){ /* for each active row k */ 1610 /* initialize lnk by the column indices of row rip[k] of A */ 1611 nzk = 0; 1612 ncols = ai[rip[k]+1] - ai[rip[k]]; 1613 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 1614 ncols_upper = 0; 1615 for (j=0; j<ncols; j++){ 1616 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1617 if (riip[i] >= k){ /* only take upper triangular entry */ 1618 ajtmp[ncols_upper] = i; 1619 ncols_upper++; 1620 } 1621 } 1622 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1623 nzk += nlnk; 1624 1625 /* update lnk by computing fill-in for each pivot row to be merged in */ 1626 prow = jl[k]; /* 1st pivot row */ 1627 1628 while (prow < k){ 1629 nextprow = jl[prow]; 1630 1631 /* merge prow into k-th row */ 1632 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1633 jmax = ui[prow+1]; 1634 ncols = jmax-jmin; 1635 i = jmin - ui[prow]; 1636 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1637 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1638 j = *(uj - 1); 1639 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1640 nzk += nlnk; 1641 1642 /* update il and jl for prow */ 1643 if (jmin < jmax){ 1644 il[prow] = jmin; 1645 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1646 } 1647 prow = nextprow; 1648 } 1649 1650 /* if free space is not available, make more free space */ 1651 if (current_space->local_remaining<nzk) { 1652 i = am - k + 1; /* num of unfactored rows */ 1653 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1654 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1655 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1656 reallocs++; 1657 } 1658 1659 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1660 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1661 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1662 1663 /* add the k-th row into il and jl */ 1664 if (nzk > 1){ 1665 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1666 jl[k] = jl[i]; jl[i] = k; 1667 il[k] = ui[k] + 1; 1668 } 1669 uj_ptr[k] = current_space->array; 1670 uj_lvl_ptr[k] = current_space_lvl->array; 1671 1672 current_space->array += nzk; 1673 current_space->local_used += nzk; 1674 current_space->local_remaining -= nzk; 1675 1676 current_space_lvl->array += nzk; 1677 current_space_lvl->local_used += nzk; 1678 current_space_lvl->local_remaining -= nzk; 1679 1680 ui[k+1] = ui[k] + nzk; 1681 } 1682 1683 #if defined(PETSC_USE_INFO) 1684 if (ai[am] != 0) { 1685 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1686 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1687 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1688 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1689 } else { 1690 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1691 } 1692 #endif 1693 1694 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1695 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1696 ierr = PetscFree(jl);CHKERRQ(ierr); 1697 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1698 1699 /* destroy list of free space and other temporary array(s) */ 1700 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1701 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1702 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1703 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1704 1705 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1706 1707 /* put together the new matrix in MATSEQSBAIJ format */ 1708 1709 b = (Mat_SeqSBAIJ*)(fact)->data; 1710 b->singlemalloc = PETSC_FALSE; 1711 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1712 b->j = uj; 1713 b->i = ui; 1714 b->diag = 0; 1715 b->ilen = 0; 1716 b->imax = 0; 1717 b->row = perm; 1718 b->col = perm; 1719 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1720 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1721 b->icol = iperm; 1722 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1723 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1724 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1725 b->maxnz = b->nz = ui[am]; 1726 b->free_a = PETSC_TRUE; 1727 b->free_ij = PETSC_TRUE; 1728 1729 (fact)->info.factor_mallocs = reallocs; 1730 (fact)->info.fill_ratio_given = fill; 1731 if (ai[am] != 0) { 1732 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1733 } else { 1734 (fact)->info.fill_ratio_needed = 0.0; 1735 } 1736 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1737 PetscFunctionReturn(0); 1738 } 1739 1740 #undef __FUNCT__ 1741 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1742 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1743 { 1744 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1745 Mat_SeqSBAIJ *b; 1746 PetscErrorCode ierr; 1747 PetscTruth perm_identity; 1748 PetscReal fill = info->fill; 1749 const PetscInt *rip,*riip; 1750 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1751 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1752 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1753 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1754 PetscBT lnkbt; 1755 IS iperm; 1756 1757 PetscFunctionBegin; 1758 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1759 /* check whether perm is the identity mapping */ 1760 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1761 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1762 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1763 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1764 1765 /* initialization */ 1766 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1767 ui[0] = 0; 1768 1769 /* jl: linked list for storing indices of the pivot rows 1770 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1771 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1772 il = jl + am; 1773 cols = il + am; 1774 ui_ptr = (PetscInt**)(cols + am); 1775 for (i=0; i<am; i++){ 1776 jl[i] = am; il[i] = 0; 1777 } 1778 1779 /* create and initialize a linked list for storing column indices of the active row k */ 1780 nlnk = am + 1; 1781 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1782 1783 /* initial FreeSpace size is fill*(ai[am]+1) */ 1784 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1785 current_space = free_space; 1786 1787 for (k=0; k<am; k++){ /* for each active row k */ 1788 /* initialize lnk by the column indices of row rip[k] of A */ 1789 nzk = 0; 1790 ncols = ai[rip[k]+1] - ai[rip[k]]; 1791 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 1792 ncols_upper = 0; 1793 for (j=0; j<ncols; j++){ 1794 i = riip[*(aj + ai[rip[k]] + j)]; 1795 if (i >= k){ /* only take upper triangular entry */ 1796 cols[ncols_upper] = i; 1797 ncols_upper++; 1798 } 1799 } 1800 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1801 nzk += nlnk; 1802 1803 /* update lnk by computing fill-in for each pivot row to be merged in */ 1804 prow = jl[k]; /* 1st pivot row */ 1805 1806 while (prow < k){ 1807 nextprow = jl[prow]; 1808 /* merge prow into k-th row */ 1809 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1810 jmax = ui[prow+1]; 1811 ncols = jmax-jmin; 1812 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1813 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1814 nzk += nlnk; 1815 1816 /* update il and jl for prow */ 1817 if (jmin < jmax){ 1818 il[prow] = jmin; 1819 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1820 } 1821 prow = nextprow; 1822 } 1823 1824 /* if free space is not available, make more free space */ 1825 if (current_space->local_remaining<nzk) { 1826 i = am - k + 1; /* num of unfactored rows */ 1827 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1828 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1829 reallocs++; 1830 } 1831 1832 /* copy data into free space, then initialize lnk */ 1833 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1834 1835 /* add the k-th row into il and jl */ 1836 if (nzk-1 > 0){ 1837 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1838 jl[k] = jl[i]; jl[i] = k; 1839 il[k] = ui[k] + 1; 1840 } 1841 ui_ptr[k] = current_space->array; 1842 current_space->array += nzk; 1843 current_space->local_used += nzk; 1844 current_space->local_remaining -= nzk; 1845 1846 ui[k+1] = ui[k] + nzk; 1847 } 1848 1849 #if defined(PETSC_USE_INFO) 1850 if (ai[am] != 0) { 1851 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1852 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1853 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1854 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1855 } else { 1856 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1857 } 1858 #endif 1859 1860 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1861 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1862 ierr = PetscFree(jl);CHKERRQ(ierr); 1863 1864 /* destroy list of free space and other temporary array(s) */ 1865 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1866 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1867 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1868 1869 /* put together the new matrix in MATSEQSBAIJ format */ 1870 1871 b = (Mat_SeqSBAIJ*)(fact)->data; 1872 b->singlemalloc = PETSC_FALSE; 1873 b->free_a = PETSC_TRUE; 1874 b->free_ij = PETSC_TRUE; 1875 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1876 b->j = uj; 1877 b->i = ui; 1878 b->diag = 0; 1879 b->ilen = 0; 1880 b->imax = 0; 1881 b->row = perm; 1882 b->col = perm; 1883 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1884 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1885 b->icol = iperm; 1886 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1887 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1888 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1889 b->maxnz = b->nz = ui[am]; 1890 1891 (fact)->info.factor_mallocs = reallocs; 1892 (fact)->info.fill_ratio_given = fill; 1893 if (ai[am] != 0) { 1894 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1895 } else { 1896 (fact)->info.fill_ratio_needed = 0.0; 1897 } 1898 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1899 PetscFunctionReturn(0); 1900 } 1901 1902 #undef __FUNCT__ 1903 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_iludt" 1904 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_iludt(Mat A,Vec bb,Vec xx) 1905 { 1906 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1907 PetscErrorCode ierr; 1908 PetscInt n = A->rmap->n; 1909 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 1910 PetscScalar *x,sum; 1911 const PetscScalar *b; 1912 const MatScalar *aa = a->a,*v; 1913 PetscInt i,nz; 1914 1915 PetscFunctionBegin; 1916 if (!n) PetscFunctionReturn(0); 1917 1918 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1919 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1920 1921 /* forward solve the lower triangular */ 1922 x[0] = b[0]; 1923 v = aa; 1924 vi = aj; 1925 for (i=1; i<n; i++) { 1926 nz = ai[i+1] - ai[i]; 1927 sum = b[i]; 1928 while (nz--) sum -= *v++ * x[*vi++]; 1929 x[i] = sum; 1930 } 1931 1932 /* backward solve the upper triangular */ 1933 v = aa + adiag[n] + 1; 1934 vi = aj + adiag[n] + 1; 1935 for (i=n-1; i>=0; i--){ 1936 nz = adiag[i] - adiag[i+1] - 1; 1937 sum = x[i]; 1938 while (nz--) sum -= *v++ * x[*vi++]; 1939 x[i] = sum*aa[adiag[i]]; 1940 v++; vi++; 1941 } 1942 1943 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1944 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1945 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1946 PetscFunctionReturn(0); 1947 } 1948 1949 #undef __FUNCT__ 1950 #define __FUNCT__ "MatSolve_SeqAIJ_iludt" 1951 PetscErrorCode MatSolve_SeqAIJ_iludt(Mat A,Vec bb,Vec xx) 1952 { 1953 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1954 IS iscol = a->col,isrow = a->row; 1955 PetscErrorCode ierr; 1956 PetscInt i,n=A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag=a->diag; 1957 PetscInt nz; 1958 const PetscInt *rout,*cout,*r,*c; 1959 PetscScalar *x,*tmp,*tmps; 1960 const PetscScalar *b; 1961 const MatScalar *aa = a->a,*v; 1962 1963 PetscFunctionBegin; 1964 if (!n) PetscFunctionReturn(0); 1965 1966 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1967 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1968 tmp = a->solve_work; 1969 1970 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1971 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1972 1973 /* forward solve the lower triangular */ 1974 tmp[0] = b[*r++]; 1975 tmps = tmp; 1976 v = aa; 1977 vi = aj; 1978 for (i=1; i<n; i++) { 1979 nz = ai[i+1] - ai[i]; 1980 tmp[i] = b[*r++]; 1981 SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz); 1982 v += nz; vi += nz; 1983 } 1984 1985 /* backward solve the upper triangular */ 1986 v = aa + adiag[n] + 1; 1987 vi = aj + adiag[n] + 1; 1988 for (i=n-1; i>=0; i--){ 1989 nz = adiag[i] - adiag[i+1] - 1; 1990 SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz); 1991 x[*c--] = tmp[i] = tmp[i]*aa[adiag[i]]; 1992 v += nz+1; vi += nz+1; 1993 } 1994 1995 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1996 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1997 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1998 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1999 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 2000 PetscFunctionReturn(0); 2001 } 2002 2003 #undef __FUNCT__ 2004 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 2005 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 2006 { 2007 Mat B = *fact; 2008 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 2009 IS isicol; 2010 PetscErrorCode ierr; 2011 const PetscInt *r,*ic; 2012 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 2013 PetscInt *bi,*bj,*bdiag; 2014 PetscInt row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au; 2015 PetscInt nlnk,*lnk; 2016 PetscBT lnkbt; 2017 PetscTruth row_identity,icol_identity,both_identity; 2018 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp; 2019 const PetscInt *ics; 2020 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 2021 PetscReal dt=info->dt,shift=info->shiftinblocks; 2022 PetscInt nnz_max; 2023 PetscTruth missing; 2024 2025 PetscFunctionBegin; 2026 /* ------- symbolic factorization, can be reused ---------*/ 2027 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2028 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2029 adiag=a->diag; 2030 2031 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 2032 2033 /* bdiag is location of diagonal in factor */ 2034 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 2035 2036 /* allocate row pointers bi */ 2037 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 2038 2039 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 2040 dtcount = (PetscInt)info->dtcount; 2041 nnz_max = ai[n]+2*n*dtcount+2; 2042 ierr = PetscMalloc(nnz_max*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2043 ierr = PetscMalloc(nnz_max*sizeof(MatScalar),&ba);CHKERRQ(ierr); 2044 2045 /* put together the new matrix */ 2046 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2047 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 2048 b = (Mat_SeqAIJ*)(B)->data; 2049 b->free_a = PETSC_TRUE; 2050 b->free_ij = PETSC_TRUE; 2051 b->singlemalloc = PETSC_FALSE; 2052 b->a = ba; 2053 b->j = bj; 2054 b->i = bi; 2055 b->diag = bdiag; 2056 b->ilen = 0; 2057 b->imax = 0; 2058 b->row = isrow; 2059 b->col = iscol; 2060 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2061 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2062 b->icol = isicol; 2063 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2064 2065 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2066 b->maxnz = nnz_max; 2067 2068 (B)->factor = MAT_FACTOR_ILUDT; 2069 (B)->info.factor_mallocs = 0; 2070 (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 2071 CHKMEMQ; 2072 /* ------- end of symbolic factorization ---------*/ 2073 2074 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2075 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 2076 ics = ic; 2077 2078 /* linked list for storing column indices of the active row */ 2079 nlnk = n + 1; 2080 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2081 2082 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 2083 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 2084 jtmp = im + n; 2085 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 2086 ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 2087 vtmp = rtmp + n; 2088 2089 bi[0] = 0; 2090 bdiag[0] = nnz_max-1; /* location of diagonal in factor B */ 2091 for (i=0; i<n; i++) { 2092 /* printf("row %d ...\n",i); */ 2093 /* copy initial fill into linked list */ 2094 nzi = 0; /* nonzeros for active row i */ 2095 nzi = ai[r[i]+1] - ai[r[i]]; 2096 if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 2097 nzi_al = adiag[r[i]] - ai[r[i]]; 2098 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 2099 ajtmp = aj + ai[r[i]]; 2100 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2101 2102 /* load in initial (unfactored row) */ 2103 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2104 aatmp = a->a + ai[r[i]]; 2105 for (j=0; j<nzi; j++) { 2106 rtmp[ics[*ajtmp++]] = *aatmp++; 2107 } 2108 2109 /* add pivot rows into linked list */ 2110 row = lnk[n]; 2111 while (row < i) { 2112 nzi_bl = bi[row+1] - bi[row] + 1; 2113 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 2114 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 2115 nzi += nlnk; 2116 row = lnk[row]; 2117 } 2118 2119 /* copy data from lnk into jtmp, then initialize lnk */ 2120 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 2121 2122 /* numerical factorization */ 2123 bjtmp = jtmp; 2124 row = *bjtmp++; /* 1st pivot row */ 2125 while (row < i) { 2126 //printf(" prow %d\n",row); 2127 pc = rtmp + row; 2128 /* apply tolerance dropping rule */ 2129 if (PetscAbsScalar(*pc) > dt){ 2130 pv = ba + bdiag[row]; /* diag of the pivot row */ 2131 multiplier = *pc / *pv; 2132 *pc = multiplier; 2133 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 2134 pv = ba + bdiag[row+1] + 1; 2135 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 2136 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 2137 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 2138 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 2139 } 2140 row = *bjtmp++; 2141 } 2142 2143 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 2144 /* printf("row %d:\n",i); */ 2145 nzi_bl = 0; j = 0; 2146 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 2147 vtmp[j] = rtmp[jtmp[j]]; 2148 nzi_bl++; j++; 2149 } 2150 nzi_bu = nzi - nzi_bl -1; 2151 while (j < nzi){ 2152 vtmp[j] = rtmp[jtmp[j]]; 2153 j++; 2154 } 2155 2156 bjtmp = bj + bi[i]; 2157 batmp = ba + bi[i]; 2158 /* apply level dropping rule to L part */ 2159 ncut = nzi_al + dtcount; 2160 if (ncut < nzi_bl){ 2161 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 2162 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 2163 } else { 2164 ncut = nzi_bl; 2165 } 2166 for (j=0; j<ncut; j++){ 2167 bjtmp[j] = jtmp[j]; 2168 batmp[j] = vtmp[j]; 2169 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 2170 } 2171 bi[i+1] = bi[i] + ncut; 2172 nzi = ncut + 1; 2173 2174 /* apply level dropping rule to U part */ 2175 ncut = nzi_au + dtcount; 2176 if (ncut < nzi_bu){ 2177 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 2178 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 2179 } else { 2180 ncut = nzi_bu; 2181 } 2182 nzi += ncut; 2183 2184 /* mark bdiagonal */ 2185 bdiag[i+1] = bdiag[i] - (ncut + 1); 2186 bjtmp = bj + bdiag[i]; 2187 batmp = ba + bdiag[i]; 2188 *bjtmp = i; 2189 *batmp = rtmp[i]; 2190 /* printf(" diag (%d,%g),",*bjtmp,*batmp); */ 2191 2192 2193 bjtmp = bj + bdiag[i+1]+1; 2194 batmp = ba + bdiag[i+1]+1; 2195 for (k=0; k<ncut; k++){ 2196 bjtmp[k] = jtmp[nzi_bl+1+k]; 2197 batmp[k] = vtmp[nzi_bl+1+k]; 2198 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 2199 } 2200 2201 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 2202 /* 2203 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 2204 printf(" ----------------------------\n"); 2205 */ 2206 } /* for (i=0; i<n; i++) */ 2207 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 2208 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 2209 2210 /* invert diagonal entries for simplier triangular solves */ 2211 /* printf(" diag:\n"); */ 2212 for (i=0; i<n; i++) { 2213 if (ba[bdiag[i]] == 0.0) { 2214 /* SHIFT is applied to SINGLE diagonal entry; does this make any sense? */ 2215 ba[bdiag[i]] = 1.0/shift; 2216 } else { 2217 ba[bdiag[i]] = 1.0/ba[bdiag[i]]; 2218 } 2219 } 2220 2221 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2222 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2223 2224 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2225 ierr = PetscFree(im);CHKERRQ(ierr); 2226 ierr = PetscFree(rtmp);CHKERRQ(ierr); 2227 2228 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 2229 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 2230 2231 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 2232 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 2233 both_identity = (PetscTruth) (row_identity && icol_identity); 2234 if (row_identity && icol_identity) { 2235 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_iludt; 2236 } else { 2237 B->ops->solve = MatSolve_SeqAIJ_iludt; 2238 } 2239 2240 B->ops->solveadd = 0; 2241 B->ops->solvetranspose = 0; 2242 B->ops->solvetransposeadd = 0; 2243 B->ops->matsolve = 0; 2244 B->assembled = PETSC_TRUE; 2245 B->preallocated = PETSC_TRUE; 2246 PetscFunctionReturn(0); 2247 } 2248 2249