1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/seq/aij.h" 4 #include "../src/inline/dot.h" 5 #include "../src/inline/spops.h" 6 #include "petscbt.h" 7 #include "../src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,MatScalar*,PetscInt*,MatScalar*,PetscInt*,PetscInt*,MatScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 const PetscInt *c,*r,*ic; 64 PetscInt i,n = A->rmap->n,*cc,*cr; 65 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 66 PetscInt *ordcol,*iwk,*iperm,*jw; 67 PetscInt jmax,lfill,job,*o_i,*o_j; 68 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 69 PetscReal af,dt,dtcount,dtcol,fill; 70 71 PetscFunctionBegin; 72 73 if (info->dt == PETSC_DEFAULT) dt = .005; else dt = info->dt; 74 if (info->dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); else dtcount = info->dtcount; 75 if (info->dtcol == PETSC_DEFAULT) dtcol = .01; else dtcol = info->dtcol; 76 if (info->fill == PETSC_DEFAULT) fill = ((double)(n*(dtcount+1)))/a->nz; else fill = info->fill; 77 lfill = (PetscInt)(dtcount/2.0); 78 jmax = (PetscInt)(fill*a->nz); 79 80 81 /* ------------------------------------------------------------ 82 If reorder=.TRUE., then the original matrix has to be 83 reordered to reflect the user selected ordering scheme, and 84 then de-reordered so it is in it's original format. 85 Because Saad's dperm() is NOT in place, we have to copy 86 the original matrix and allocate more storage. . . 87 ------------------------------------------------------------ 88 */ 89 90 /* set reorder to true if either isrow or iscol is not identity */ 91 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 92 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 93 reorder = PetscNot(reorder); 94 95 96 /* storage for ilu factor */ 97 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 99 ierr = PetscMalloc(jmax*sizeof(MatScalar),&new_a);CHKERRQ(ierr); 100 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 101 102 /* ------------------------------------------------------------ 103 Make sure that everything is Fortran formatted (1-Based) 104 ------------------------------------------------------------ 105 */ 106 for (i=old_i[0];i<old_i[n];i++) { 107 old_j[i]++; 108 } 109 for(i=0;i<n+1;i++) { 110 old_i[i]++; 111 }; 112 113 114 if (reorder) { 115 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 116 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 117 ierr = PetscMalloc2(n,PetscInt,&cc,n,PetscInt,&cr);CHKERRQ(ierr); 118 for(i=0;i<n;i++) { 119 cr[i] = r[i]+1; 120 cc[i] = c[i]+1; 121 } 122 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 123 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 124 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 125 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 126 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(MatScalar),&old_a2);CHKERRQ(ierr); 127 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,cr,cc,&job); 128 ierr = PetscFree2(cc,cr);CHKERRQ(ierr); 129 o_a = old_a2; 130 o_j = old_j2; 131 o_i = old_i2; 132 } else { 133 o_a = old_a; 134 o_j = old_j; 135 o_i = old_i; 136 } 137 138 /* ------------------------------------------------------------ 139 Call Saad's ilutp() routine to generate the factorization 140 ------------------------------------------------------------ 141 */ 142 143 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 145 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 146 147 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)dt,&dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 148 if (sierr) { 149 switch (sierr) { 150 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger fill current fill %G space allocated %D",fill,jmax); 151 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger fill current fill %G space allocated %D",fill,jmax); 152 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 153 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 154 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal fill value %D",jmax); 155 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 156 } 157 } 158 159 ierr = PetscFree(w);CHKERRQ(ierr); 160 ierr = PetscFree(jw);CHKERRQ(ierr); 161 162 /* ------------------------------------------------------------ 163 Saad's routine gives the result in Modified Sparse Row (msr) 164 Convert to Compressed Sparse Row format (csr) 165 ------------------------------------------------------------ 166 */ 167 168 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 169 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 170 171 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 172 173 ierr = PetscFree(iwk);CHKERRQ(ierr); 174 ierr = PetscFree(wk);CHKERRQ(ierr); 175 176 if (reorder) { 177 ierr = PetscFree(old_a2);CHKERRQ(ierr); 178 ierr = PetscFree(old_j2);CHKERRQ(ierr); 179 ierr = PetscFree(old_i2);CHKERRQ(ierr); 180 } else { 181 /* fix permutation of old_j that the factorization introduced */ 182 for (i=old_i[0]; i<old_i[n]; i++) { 183 old_j[i-1] = iperm[old_j[i-1]-1]; 184 } 185 } 186 187 /* get rid of the shift to indices starting at 1 */ 188 for (i=0; i<n+1; i++) { 189 old_i[i]--; 190 } 191 for (i=old_i[0];i<old_i[n];i++) { 192 old_j[i]--; 193 } 194 195 /* Make the factored matrix 0-based */ 196 for (i=0; i<n+1; i++) { 197 new_i[i]--; 198 } 199 for (i=new_i[0];i<new_i[n];i++) { 200 new_j[i]--; 201 } 202 203 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 204 /*-- permute the right-hand-side and solution vectors --*/ 205 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 206 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 207 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 208 for(i=0; i<n; i++) { 209 ordcol[i] = ic[iperm[i]-1]; 210 }; 211 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 212 ierr = ISDestroy(isicol);CHKERRQ(ierr); 213 214 ierr = PetscFree(iperm);CHKERRQ(ierr); 215 216 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 217 ierr = PetscFree(ordcol);CHKERRQ(ierr); 218 219 /*----- put together the new matrix -----*/ 220 221 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 222 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 223 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 224 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 225 (*fact)->factor = MAT_FACTOR_LU; 226 (*fact)->assembled = PETSC_TRUE; 227 228 b = (Mat_SeqAIJ*)(*fact)->data; 229 b->free_a = PETSC_TRUE; 230 b->free_ij = PETSC_TRUE; 231 b->singlemalloc = PETSC_FALSE; 232 b->a = new_a; 233 b->j = new_j; 234 b->i = new_i; 235 b->ilen = 0; 236 b->imax = 0; 237 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 238 b->row = isirow; 239 b->col = iscolf; 240 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 241 b->maxnz = b->nz = new_i[n]; 242 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 243 (*fact)->info.factor_mallocs = 0; 244 245 af = ((double)b->nz)/((double)a->nz) + .001; 246 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",fill,af);CHKERRQ(ierr); 247 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 249 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 250 251 if (reorder) (*fact)->ops->solve = MatSolve_SeqAIJ; 252 else (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 253 (*fact)->ops->solveadd = MatSolveAdd_SeqAIJ; 254 (*fact)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 255 (*fact)->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 256 257 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 258 259 PetscFunctionReturn(0); 260 #endif 261 } 262 263 EXTERN_C_BEGIN 264 #undef __FUNCT__ 265 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 266 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 267 { 268 PetscFunctionBegin; 269 *flg = PETSC_TRUE; 270 PetscFunctionReturn(0); 271 } 272 EXTERN_C_END 273 274 EXTERN_C_BEGIN 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 277 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 278 { 279 PetscInt n = A->rmap->n; 280 PetscErrorCode ierr; 281 282 PetscFunctionBegin; 283 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 284 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 285 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 286 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 287 if (ftype == MAT_FACTOR_ILU) { 288 (*B)->ops->ilufactorsymbolic= MatILUFactorSymbolic_SeqAIJ; 289 } else if(MAT_FACTOR_LU) { 290 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 291 } else { 292 (*B)->ops->iludtfactorsymbolic = MatILUDTFactorSymbolic_SeqAIJ; 293 } 294 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 295 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 296 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 297 if (ftype == MAT_FACTOR_ICC) { 298 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 299 } else { 300 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 301 } 302 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 303 (*B)->factor = ftype; 304 PetscFunctionReturn(0); 305 } 306 EXTERN_C_END 307 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 310 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 311 { 312 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 313 IS isicol; 314 PetscErrorCode ierr; 315 const PetscInt *r,*ic; 316 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 317 PetscInt *bi,*bj,*ajtmp; 318 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 319 PetscReal f; 320 PetscInt nlnk,*lnk,k,**bi_ptr; 321 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 322 PetscBT lnkbt; 323 324 PetscFunctionBegin; 325 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 326 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 327 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 328 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 329 330 /* get new row pointers */ 331 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 332 bi[0] = 0; 333 334 /* bdiag is location of diagonal in factor */ 335 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 336 bdiag[0] = 0; 337 338 /* linked list for storing column indices of the active row */ 339 nlnk = n + 1; 340 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 341 342 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 343 344 /* initial FreeSpace size is f*(ai[n]+1) */ 345 f = info->fill; 346 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 347 current_space = free_space; 348 349 for (i=0; i<n; i++) { 350 /* copy previous fill into linked list */ 351 nzi = 0; 352 nnz = ai[r[i]+1] - ai[r[i]]; 353 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 354 ajtmp = aj + ai[r[i]]; 355 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 356 nzi += nlnk; 357 358 /* add pivot rows into linked list */ 359 row = lnk[n]; 360 while (row < i) { 361 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 362 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 363 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 364 nzi += nlnk; 365 row = lnk[row]; 366 } 367 bi[i+1] = bi[i] + nzi; 368 im[i] = nzi; 369 370 /* mark bdiag */ 371 nzbd = 0; 372 nnz = nzi; 373 k = lnk[n]; 374 while (nnz-- && k < i){ 375 nzbd++; 376 k = lnk[k]; 377 } 378 bdiag[i] = bi[i] + nzbd; 379 380 /* if free space is not available, make more free space */ 381 if (current_space->local_remaining<nzi) { 382 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 383 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 384 reallocs++; 385 } 386 387 /* copy data into free space, then initialize lnk */ 388 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 389 bi_ptr[i] = current_space->array; 390 current_space->array += nzi; 391 current_space->local_used += nzi; 392 current_space->local_remaining -= nzi; 393 } 394 #if defined(PETSC_USE_INFO) 395 if (ai[n] != 0) { 396 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 397 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 398 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 399 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 400 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 401 } else { 402 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 403 } 404 #endif 405 406 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 407 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 408 409 /* destroy list of free space and other temporary array(s) */ 410 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 411 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 412 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 413 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 414 415 /* put together the new matrix */ 416 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 417 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 418 b = (Mat_SeqAIJ*)(B)->data; 419 b->free_a = PETSC_TRUE; 420 b->free_ij = PETSC_TRUE; 421 b->singlemalloc = PETSC_FALSE; 422 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 423 b->j = bj; 424 b->i = bi; 425 b->diag = bdiag; 426 b->ilen = 0; 427 b->imax = 0; 428 b->row = isrow; 429 b->col = iscol; 430 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 431 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 432 b->icol = isicol; 433 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 434 435 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 436 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 437 b->maxnz = b->nz = bi[n] ; 438 439 (B)->factor = MAT_FACTOR_LU; 440 (B)->info.factor_mallocs = reallocs; 441 (B)->info.fill_ratio_given = f; 442 443 if (ai[n]) { 444 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 445 } else { 446 (B)->info.fill_ratio_needed = 0.0; 447 } 448 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 449 (B)->ops->solve = MatSolve_SeqAIJ; 450 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 451 /* switch to inodes if appropriate */ 452 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 453 PetscFunctionReturn(0); 454 } 455 456 /* 457 Trouble in factorization, should we dump the original matrix? 458 */ 459 #undef __FUNCT__ 460 #define __FUNCT__ "MatFactorDumpMatrix" 461 PetscErrorCode MatFactorDumpMatrix(Mat A) 462 { 463 PetscErrorCode ierr; 464 PetscTruth flg = PETSC_FALSE; 465 466 PetscFunctionBegin; 467 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 468 if (flg) { 469 PetscViewer viewer; 470 char filename[PETSC_MAX_PATH_LEN]; 471 472 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 473 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 474 ierr = MatView(A,viewer);CHKERRQ(ierr); 475 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 476 } 477 PetscFunctionReturn(0); 478 } 479 480 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 481 482 /* ----------------------------------------------------------- */ 483 #undef __FUNCT__ 484 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 485 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 486 { 487 Mat C=B; 488 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 489 IS isrow = b->row,isicol = b->icol; 490 PetscErrorCode ierr; 491 const PetscInt *r,*ic,*ics; 492 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 493 PetscInt *ajtmp,*bjtmp,nz,row,*diag_offset = b->diag,diag,*pj; 494 MatScalar *rtmp,*pc,multiplier,*v,*pv,d,*aa=a->a; 495 PetscReal rs = 0.0; 496 LUShift_Ctx sctx; 497 PetscInt newshift,*ddiag; 498 499 PetscFunctionBegin; 500 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 501 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 502 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 503 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 504 ics = ic; 505 506 sctx.shift_top = 0; 507 sctx.nshift_max = 0; 508 sctx.shift_lo = 0; 509 sctx.shift_hi = 0; 510 sctx.shift_fraction = 0; 511 512 /* if both shift schemes are chosen by user, only use info->shiftpd */ 513 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 514 ddiag = a->diag; 515 sctx.shift_top = info->zeropivot; 516 for (i=0; i<n; i++) { 517 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 518 d = (aa)[ddiag[i]]; 519 rs = -PetscAbsScalar(d) - PetscRealPart(d); 520 v = aa+ai[i]; 521 nz = ai[i+1] - ai[i]; 522 for (j=0; j<nz; j++) 523 rs += PetscAbsScalar(v[j]); 524 if (rs>sctx.shift_top) sctx.shift_top = rs; 525 } 526 sctx.shift_top *= 1.1; 527 sctx.nshift_max = 5; 528 sctx.shift_lo = 0.; 529 sctx.shift_hi = 1.; 530 } 531 532 sctx.shift_amount = 0.0; 533 sctx.nshift = 0; 534 do { 535 sctx.lushift = PETSC_FALSE; 536 for (i=0; i<n; i++){ 537 nz = bi[i+1] - bi[i]; 538 bjtmp = bj + bi[i]; 539 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 540 541 /* load in initial (unfactored row) */ 542 nz = ai[r[i]+1] - ai[r[i]]; 543 ajtmp = aj + ai[r[i]]; 544 v = aa + ai[r[i]]; 545 for (j=0; j<nz; j++) { 546 rtmp[ics[ajtmp[j]]] = v[j]; 547 } 548 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 549 550 row = *bjtmp++; 551 while (row < i) { 552 pc = rtmp + row; 553 if (*pc != 0.0) { 554 pv = b->a + diag_offset[row]; 555 pj = b->j + diag_offset[row] + 1; 556 multiplier = *pc / *pv++; 557 *pc = multiplier; 558 nz = bi[row+1] - diag_offset[row] - 1; 559 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 560 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 561 } 562 row = *bjtmp++; 563 } 564 /* finished row so stick it into b->a */ 565 pv = b->a + bi[i] ; 566 pj = b->j + bi[i] ; 567 nz = bi[i+1] - bi[i]; 568 diag = diag_offset[i] - bi[i]; 569 rs = -PetscAbsScalar(pv[diag]); 570 for (j=0; j<nz; j++) { 571 pv[j] = rtmp[pj[j]]; 572 rs += PetscAbsScalar(pv[j]); 573 } 574 575 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 576 sctx.rs = rs; 577 sctx.pv = pv[diag]; 578 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 579 if (newshift == 1) break; 580 } 581 582 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 583 /* 584 * if no shift in this attempt & shifting & started shifting & can refine, 585 * then try lower shift 586 */ 587 sctx.shift_hi = sctx.shift_fraction; 588 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 589 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 590 sctx.lushift = PETSC_TRUE; 591 sctx.nshift++; 592 } 593 } while (sctx.lushift); 594 595 /* invert diagonal entries for simplier triangular solves */ 596 for (i=0; i<n; i++) { 597 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 598 } 599 ierr = PetscFree(rtmp);CHKERRQ(ierr); 600 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 601 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 602 if (b->inode.use) { 603 C->ops->solve = MatSolve_Inode; 604 } else { 605 PetscTruth row_identity, col_identity; 606 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 607 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 608 if (row_identity && col_identity) { 609 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 610 } else { 611 C->ops->solve = MatSolve_SeqAIJ; 612 } 613 } 614 C->ops->solveadd = MatSolveAdd_SeqAIJ; 615 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 616 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 617 C->ops->matsolve = MatMatSolve_SeqAIJ; 618 C->assembled = PETSC_TRUE; 619 C->preallocated = PETSC_TRUE; 620 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 621 if (sctx.nshift){ 622 if (info->shiftpd) { 623 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 624 } else if (info->shiftnz) { 625 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 626 } 627 } 628 PetscFunctionReturn(0); 629 } 630 631 /* 632 This routine implements inplace ILU(0) with row or/and column permutations. 633 Input: 634 A - original matrix 635 Output; 636 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 637 a->j (col index) is permuted by the inverse of colperm, then sorted 638 a->a reordered accordingly with a->j 639 a->diag (ptr to diagonal elements) is updated. 640 */ 641 #undef __FUNCT__ 642 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 643 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 644 { 645 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 646 IS isrow = a->row,isicol = a->icol; 647 PetscErrorCode ierr; 648 const PetscInt *r,*ic,*ics; 649 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 650 PetscInt *ajtmp,nz,row; 651 PetscInt *diag = a->diag,nbdiag,*pj; 652 PetscScalar *rtmp,*pc,multiplier,d; 653 MatScalar *v,*pv; 654 PetscReal rs; 655 LUShift_Ctx sctx; 656 PetscInt newshift; 657 658 PetscFunctionBegin; 659 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 660 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 661 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 662 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 663 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 664 ics = ic; 665 666 sctx.shift_top = 0; 667 sctx.nshift_max = 0; 668 sctx.shift_lo = 0; 669 sctx.shift_hi = 0; 670 sctx.shift_fraction = 0; 671 672 /* if both shift schemes are chosen by user, only use info->shiftpd */ 673 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 674 sctx.shift_top = 0; 675 for (i=0; i<n; i++) { 676 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 677 d = (a->a)[diag[i]]; 678 rs = -PetscAbsScalar(d) - PetscRealPart(d); 679 v = a->a+ai[i]; 680 nz = ai[i+1] - ai[i]; 681 for (j=0; j<nz; j++) 682 rs += PetscAbsScalar(v[j]); 683 if (rs>sctx.shift_top) sctx.shift_top = rs; 684 } 685 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 686 sctx.shift_top *= 1.1; 687 sctx.nshift_max = 5; 688 sctx.shift_lo = 0.; 689 sctx.shift_hi = 1.; 690 } 691 692 sctx.shift_amount = 0; 693 sctx.nshift = 0; 694 do { 695 sctx.lushift = PETSC_FALSE; 696 for (i=0; i<n; i++){ 697 /* load in initial unfactored row */ 698 nz = ai[r[i]+1] - ai[r[i]]; 699 ajtmp = aj + ai[r[i]]; 700 v = a->a + ai[r[i]]; 701 /* sort permuted ajtmp and values v accordingly */ 702 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 703 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 704 705 diag[r[i]] = ai[r[i]]; 706 for (j=0; j<nz; j++) { 707 rtmp[ajtmp[j]] = v[j]; 708 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 709 } 710 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 711 712 row = *ajtmp++; 713 while (row < i) { 714 pc = rtmp + row; 715 if (*pc != 0.0) { 716 pv = a->a + diag[r[row]]; 717 pj = aj + diag[r[row]] + 1; 718 719 multiplier = *pc / *pv++; 720 *pc = multiplier; 721 nz = ai[r[row]+1] - diag[r[row]] - 1; 722 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 723 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 724 } 725 row = *ajtmp++; 726 } 727 /* finished row so overwrite it onto a->a */ 728 pv = a->a + ai[r[i]] ; 729 pj = aj + ai[r[i]] ; 730 nz = ai[r[i]+1] - ai[r[i]]; 731 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 732 733 rs = 0.0; 734 for (j=0; j<nz; j++) { 735 pv[j] = rtmp[pj[j]]; 736 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 737 } 738 739 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 740 sctx.rs = rs; 741 sctx.pv = pv[nbdiag]; 742 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 743 if (newshift == 1) break; 744 } 745 746 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 747 /* 748 * if no shift in this attempt & shifting & started shifting & can refine, 749 * then try lower shift 750 */ 751 sctx.shift_hi = sctx.shift_fraction; 752 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 753 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 754 sctx.lushift = PETSC_TRUE; 755 sctx.nshift++; 756 } 757 } while (sctx.lushift); 758 759 /* invert diagonal entries for simplier triangular solves */ 760 for (i=0; i<n; i++) { 761 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 762 } 763 764 ierr = PetscFree(rtmp);CHKERRQ(ierr); 765 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 766 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 767 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 768 A->ops->solveadd = MatSolveAdd_SeqAIJ; 769 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 770 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 771 A->assembled = PETSC_TRUE; 772 A->preallocated = PETSC_TRUE; 773 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 774 if (sctx.nshift){ 775 if (info->shiftpd) { 776 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 777 } else if (info->shiftnz) { 778 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 779 } 780 } 781 PetscFunctionReturn(0); 782 } 783 784 /* ----------------------------------------------------------- */ 785 #undef __FUNCT__ 786 #define __FUNCT__ "MatLUFactor_SeqAIJ" 787 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 788 { 789 PetscErrorCode ierr; 790 Mat C; 791 792 PetscFunctionBegin; 793 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 794 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 795 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 796 A->ops->solve = C->ops->solve; 797 A->ops->solvetranspose = C->ops->solvetranspose; 798 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 799 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 800 PetscFunctionReturn(0); 801 } 802 /* ----------------------------------------------------------- */ 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatSolve_SeqAIJ" 805 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 806 { 807 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 808 IS iscol = a->col,isrow = a->row; 809 PetscErrorCode ierr; 810 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 811 PetscInt nz; 812 const PetscInt *rout,*cout,*r,*c; 813 PetscScalar *x,*tmp,*tmps,sum; 814 const PetscScalar *b; 815 const MatScalar *aa = a->a,*v; 816 817 PetscFunctionBegin; 818 if (!n) PetscFunctionReturn(0); 819 820 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 821 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 822 tmp = a->solve_work; 823 824 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 825 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 826 827 /* forward solve the lower triangular */ 828 tmp[0] = b[*r++]; 829 tmps = tmp; 830 for (i=1; i<n; i++) { 831 v = aa + ai[i] ; 832 vi = aj + ai[i] ; 833 nz = a->diag[i] - ai[i]; 834 sum = b[*r++]; 835 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 836 tmp[i] = sum; 837 } 838 839 /* backward solve the upper triangular */ 840 for (i=n-1; i>=0; i--){ 841 v = aa + a->diag[i] + 1; 842 vi = aj + a->diag[i] + 1; 843 nz = ai[i+1] - a->diag[i] - 1; 844 sum = tmp[i]; 845 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 846 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 847 } 848 849 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 850 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 851 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 852 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 853 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatMatSolve_SeqAIJ" 859 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 860 { 861 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 862 IS iscol = a->col,isrow = a->row; 863 PetscErrorCode ierr; 864 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 865 PetscInt nz,neq; 866 const PetscInt *rout,*cout,*r,*c; 867 PetscScalar *x,*b,*tmp,*tmps,sum; 868 const MatScalar *aa = a->a,*v; 869 PetscTruth bisdense,xisdense; 870 871 PetscFunctionBegin; 872 if (!n) PetscFunctionReturn(0); 873 874 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 875 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 876 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 877 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 878 879 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 880 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 881 882 tmp = a->solve_work; 883 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 884 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 885 886 for (neq=0; neq<B->cmap->n; neq++){ 887 /* forward solve the lower triangular */ 888 tmp[0] = b[r[0]]; 889 tmps = tmp; 890 for (i=1; i<n; i++) { 891 v = aa + ai[i] ; 892 vi = aj + ai[i] ; 893 nz = a->diag[i] - ai[i]; 894 sum = b[r[i]]; 895 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 896 tmp[i] = sum; 897 } 898 /* backward solve the upper triangular */ 899 for (i=n-1; i>=0; i--){ 900 v = aa + a->diag[i] + 1; 901 vi = aj + a->diag[i] + 1; 902 nz = ai[i+1] - a->diag[i] - 1; 903 sum = tmp[i]; 904 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 905 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 906 } 907 908 b += n; 909 x += n; 910 } 911 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 912 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 913 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 914 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 915 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 916 PetscFunctionReturn(0); 917 } 918 919 #undef __FUNCT__ 920 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 921 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 922 { 923 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 924 IS iscol = a->col,isrow = a->row; 925 PetscErrorCode ierr; 926 const PetscInt *r,*c,*rout,*cout; 927 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 928 PetscInt nz,row; 929 PetscScalar *x,*b,*tmp,*tmps,sum; 930 const MatScalar *aa = a->a,*v; 931 932 PetscFunctionBegin; 933 if (!n) PetscFunctionReturn(0); 934 935 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 936 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 937 tmp = a->solve_work; 938 939 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 940 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 941 942 /* forward solve the lower triangular */ 943 tmp[0] = b[*r++]; 944 tmps = tmp; 945 for (row=1; row<n; row++) { 946 i = rout[row]; /* permuted row */ 947 v = aa + ai[i] ; 948 vi = aj + ai[i] ; 949 nz = a->diag[i] - ai[i]; 950 sum = b[*r++]; 951 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 952 tmp[row] = sum; 953 } 954 955 /* backward solve the upper triangular */ 956 for (row=n-1; row>=0; row--){ 957 i = rout[row]; /* permuted row */ 958 v = aa + a->diag[i] + 1; 959 vi = aj + a->diag[i] + 1; 960 nz = ai[i+1] - a->diag[i] - 1; 961 sum = tmp[row]; 962 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 963 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 964 } 965 966 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 967 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 968 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 969 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 970 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 971 PetscFunctionReturn(0); 972 } 973 974 /* ----------------------------------------------------------- */ 975 #undef __FUNCT__ 976 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 977 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 978 { 979 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 980 PetscErrorCode ierr; 981 PetscInt n = A->rmap->n; 982 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 983 PetscScalar *x; 984 const PetscScalar *b; 985 const MatScalar *aa = a->a; 986 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 987 PetscInt adiag_i,i,nz,ai_i; 988 const MatScalar *v; 989 PetscScalar sum; 990 #endif 991 992 PetscFunctionBegin; 993 if (!n) PetscFunctionReturn(0); 994 995 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 996 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 997 998 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 999 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1000 #else 1001 /* forward solve the lower triangular */ 1002 x[0] = b[0]; 1003 for (i=1; i<n; i++) { 1004 ai_i = ai[i]; 1005 v = aa + ai_i; 1006 vi = aj + ai_i; 1007 nz = adiag[i] - ai_i; 1008 sum = b[i]; 1009 while (nz--) sum -= *v++ * x[*vi++]; 1010 x[i] = sum; 1011 } 1012 1013 /* backward solve the upper triangular */ 1014 for (i=n-1; i>=0; i--){ 1015 adiag_i = adiag[i]; 1016 v = aa + adiag_i + 1; 1017 vi = aj + adiag_i + 1; 1018 nz = ai[i+1] - adiag_i - 1; 1019 sum = x[i]; 1020 while (nz--) sum -= *v++ * x[*vi++]; 1021 x[i] = sum*aa[adiag_i]; 1022 } 1023 #endif 1024 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1025 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1026 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1027 PetscFunctionReturn(0); 1028 } 1029 1030 #undef __FUNCT__ 1031 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1032 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1033 { 1034 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1035 IS iscol = a->col,isrow = a->row; 1036 PetscErrorCode ierr; 1037 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1038 PetscInt nz; 1039 const PetscInt *rout,*cout,*r,*c; 1040 PetscScalar *x,*b,*tmp,sum; 1041 const MatScalar *aa = a->a,*v; 1042 1043 PetscFunctionBegin; 1044 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1045 1046 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1047 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1048 tmp = a->solve_work; 1049 1050 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1051 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1052 1053 /* forward solve the lower triangular */ 1054 tmp[0] = b[*r++]; 1055 for (i=1; i<n; i++) { 1056 v = aa + ai[i] ; 1057 vi = aj + ai[i] ; 1058 nz = a->diag[i] - ai[i]; 1059 sum = b[*r++]; 1060 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1061 tmp[i] = sum; 1062 } 1063 1064 /* backward solve the upper triangular */ 1065 for (i=n-1; i>=0; i--){ 1066 v = aa + a->diag[i] + 1; 1067 vi = aj + a->diag[i] + 1; 1068 nz = ai[i+1] - a->diag[i] - 1; 1069 sum = tmp[i]; 1070 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1071 tmp[i] = sum*aa[a->diag[i]]; 1072 x[*c--] += tmp[i]; 1073 } 1074 1075 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1076 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1077 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1078 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1079 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1080 1081 PetscFunctionReturn(0); 1082 } 1083 /* -------------------------------------------------------------------*/ 1084 #undef __FUNCT__ 1085 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1086 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1087 { 1088 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1089 IS iscol = a->col,isrow = a->row; 1090 PetscErrorCode ierr; 1091 const PetscInt *rout,*cout,*r,*c; 1092 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1093 PetscInt nz,*diag = a->diag; 1094 PetscScalar *x,*b,*tmp,s1; 1095 const MatScalar *aa = a->a,*v; 1096 1097 PetscFunctionBegin; 1098 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1099 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1100 tmp = a->solve_work; 1101 1102 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1103 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1104 1105 /* copy the b into temp work space according to permutation */ 1106 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1107 1108 /* forward solve the U^T */ 1109 for (i=0; i<n; i++) { 1110 v = aa + diag[i] ; 1111 vi = aj + diag[i] + 1; 1112 nz = ai[i+1] - diag[i] - 1; 1113 s1 = tmp[i]; 1114 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1115 while (nz--) { 1116 tmp[*vi++ ] -= (*v++)*s1; 1117 } 1118 tmp[i] = s1; 1119 } 1120 1121 /* backward solve the L^T */ 1122 for (i=n-1; i>=0; i--){ 1123 v = aa + diag[i] - 1 ; 1124 vi = aj + diag[i] - 1 ; 1125 nz = diag[i] - ai[i]; 1126 s1 = tmp[i]; 1127 while (nz--) { 1128 tmp[*vi-- ] -= (*v--)*s1; 1129 } 1130 } 1131 1132 /* copy tmp into x according to permutation */ 1133 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1134 1135 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1136 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1137 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1138 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1139 1140 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 #undef __FUNCT__ 1145 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1146 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1147 { 1148 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1149 IS iscol = a->col,isrow = a->row; 1150 PetscErrorCode ierr; 1151 const PetscInt *r,*c,*rout,*cout; 1152 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1153 PetscInt nz,*diag = a->diag; 1154 PetscScalar *x,*b,*tmp; 1155 const MatScalar *aa = a->a,*v; 1156 1157 PetscFunctionBegin; 1158 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1159 1160 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1161 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1162 tmp = a->solve_work; 1163 1164 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1165 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1166 1167 /* copy the b into temp work space according to permutation */ 1168 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1169 1170 /* forward solve the U^T */ 1171 for (i=0; i<n; i++) { 1172 v = aa + diag[i] ; 1173 vi = aj + diag[i] + 1; 1174 nz = ai[i+1] - diag[i] - 1; 1175 tmp[i] *= *v++; 1176 while (nz--) { 1177 tmp[*vi++ ] -= (*v++)*tmp[i]; 1178 } 1179 } 1180 1181 /* backward solve the L^T */ 1182 for (i=n-1; i>=0; i--){ 1183 v = aa + diag[i] - 1 ; 1184 vi = aj + diag[i] - 1 ; 1185 nz = diag[i] - ai[i]; 1186 while (nz--) { 1187 tmp[*vi-- ] -= (*v--)*tmp[i]; 1188 } 1189 } 1190 1191 /* copy tmp into x according to permutation */ 1192 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1193 1194 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1195 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1196 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1197 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1198 1199 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 /* ----------------------------------------------------------------*/ 1203 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1204 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption); 1205 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1208 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1209 { 1210 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1211 IS isicol; 1212 PetscErrorCode ierr; 1213 const PetscInt *r,*ic; 1214 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1215 PetscInt *bi,*cols,nnz,*cols_lvl; 1216 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1217 PetscInt i,levels,diagonal_fill; 1218 PetscTruth col_identity,row_identity; 1219 PetscReal f; 1220 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1221 PetscBT lnkbt; 1222 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1223 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1224 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1225 PetscTruth missing; 1226 1227 PetscFunctionBegin; 1228 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1229 f = info->fill; 1230 levels = (PetscInt)info->levels; 1231 diagonal_fill = (PetscInt)info->diagonal_fill; 1232 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1233 1234 /* special case that simply copies fill pattern */ 1235 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1236 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1237 if (!levels && row_identity && col_identity) { 1238 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr); 1239 fact->factor = MAT_FACTOR_ILU; 1240 (fact)->info.factor_mallocs = 0; 1241 (fact)->info.fill_ratio_given = info->fill; 1242 (fact)->info.fill_ratio_needed = 1.0; 1243 b = (Mat_SeqAIJ*)(fact)->data; 1244 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1245 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1246 b->row = isrow; 1247 b->col = iscol; 1248 b->icol = isicol; 1249 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1250 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1251 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1252 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1253 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1254 PetscFunctionReturn(0); 1255 } 1256 1257 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1258 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1259 1260 /* get new row pointers */ 1261 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1262 bi[0] = 0; 1263 /* bdiag is location of diagonal in factor */ 1264 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1265 bdiag[0] = 0; 1266 1267 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1268 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1269 1270 /* create a linked list for storing column indices of the active row */ 1271 nlnk = n + 1; 1272 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1273 1274 /* initial FreeSpace size is f*(ai[n]+1) */ 1275 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1276 current_space = free_space; 1277 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1278 current_space_lvl = free_space_lvl; 1279 1280 for (i=0; i<n; i++) { 1281 nzi = 0; 1282 /* copy current row into linked list */ 1283 nnz = ai[r[i]+1] - ai[r[i]]; 1284 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1285 cols = aj + ai[r[i]]; 1286 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1287 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1288 nzi += nlnk; 1289 1290 /* make sure diagonal entry is included */ 1291 if (diagonal_fill && lnk[i] == -1) { 1292 fm = n; 1293 while (lnk[fm] < i) fm = lnk[fm]; 1294 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1295 lnk[fm] = i; 1296 lnk_lvl[i] = 0; 1297 nzi++; dcount++; 1298 } 1299 1300 /* add pivot rows into the active row */ 1301 nzbd = 0; 1302 prow = lnk[n]; 1303 while (prow < i) { 1304 nnz = bdiag[prow]; 1305 cols = bj_ptr[prow] + nnz + 1; 1306 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1307 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1308 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1309 nzi += nlnk; 1310 prow = lnk[prow]; 1311 nzbd++; 1312 } 1313 bdiag[i] = nzbd; 1314 bi[i+1] = bi[i] + nzi; 1315 1316 /* if free space is not available, make more free space */ 1317 if (current_space->local_remaining<nzi) { 1318 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1319 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1320 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1321 reallocs++; 1322 } 1323 1324 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1325 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1326 bj_ptr[i] = current_space->array; 1327 bjlvl_ptr[i] = current_space_lvl->array; 1328 1329 /* make sure the active row i has diagonal entry */ 1330 if (*(bj_ptr[i]+bdiag[i]) != i) { 1331 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1332 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1333 } 1334 1335 current_space->array += nzi; 1336 current_space->local_used += nzi; 1337 current_space->local_remaining -= nzi; 1338 current_space_lvl->array += nzi; 1339 current_space_lvl->local_used += nzi; 1340 current_space_lvl->local_remaining -= nzi; 1341 } 1342 1343 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1344 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1345 1346 /* destroy list of free space and other temporary arrays */ 1347 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1348 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1349 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1350 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1351 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1352 1353 #if defined(PETSC_USE_INFO) 1354 { 1355 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1356 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1357 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1358 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1359 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1360 if (diagonal_fill) { 1361 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1362 } 1363 } 1364 #endif 1365 1366 /* put together the new matrix */ 1367 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1368 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1369 b = (Mat_SeqAIJ*)(fact)->data; 1370 b->free_a = PETSC_TRUE; 1371 b->free_ij = PETSC_TRUE; 1372 b->singlemalloc = PETSC_FALSE; 1373 ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1374 b->j = bj; 1375 b->i = bi; 1376 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1377 b->diag = bdiag; 1378 b->ilen = 0; 1379 b->imax = 0; 1380 b->row = isrow; 1381 b->col = iscol; 1382 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1383 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1384 b->icol = isicol; 1385 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1386 /* In b structure: Free imax, ilen, old a, old j. 1387 Allocate bdiag, solve_work, new a, new j */ 1388 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1389 b->maxnz = b->nz = bi[n] ; 1390 (fact)->info.factor_mallocs = reallocs; 1391 (fact)->info.fill_ratio_given = f; 1392 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1393 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1394 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1395 PetscFunctionReturn(0); 1396 } 1397 1398 #include "../src/mat/impls/sbaij/seq/sbaij.h" 1399 #undef __FUNCT__ 1400 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1401 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 1402 { 1403 Mat C = B; 1404 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1405 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1406 IS ip=b->row,iip = b->icol; 1407 PetscErrorCode ierr; 1408 const PetscInt *rip,*riip; 1409 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1410 PetscInt *ai=a->i,*aj=a->j; 1411 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1412 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1413 PetscReal zeropivot,rs,shiftnz; 1414 PetscReal shiftpd; 1415 ChShift_Ctx sctx; 1416 PetscInt newshift; 1417 PetscTruth perm_identity; 1418 1419 PetscFunctionBegin; 1420 1421 shiftnz = info->shiftnz; 1422 shiftpd = info->shiftpd; 1423 zeropivot = info->zeropivot; 1424 1425 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1426 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1427 1428 /* initialization */ 1429 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1430 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1431 jl = il + mbs; 1432 rtmp = (MatScalar*)(jl + mbs); 1433 1434 sctx.shift_amount = 0; 1435 sctx.nshift = 0; 1436 do { 1437 sctx.chshift = PETSC_FALSE; 1438 for (i=0; i<mbs; i++) { 1439 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1440 } 1441 1442 for (k = 0; k<mbs; k++){ 1443 bval = ba + bi[k]; 1444 /* initialize k-th row by the perm[k]-th row of A */ 1445 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1446 for (j = jmin; j < jmax; j++){ 1447 col = riip[aj[j]]; 1448 if (col >= k){ /* only take upper triangular entry */ 1449 rtmp[col] = aa[j]; 1450 *bval++ = 0.0; /* for in-place factorization */ 1451 } 1452 } 1453 /* shift the diagonal of the matrix */ 1454 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1455 1456 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1457 dk = rtmp[k]; 1458 i = jl[k]; /* first row to be added to k_th row */ 1459 1460 while (i < k){ 1461 nexti = jl[i]; /* next row to be added to k_th row */ 1462 1463 /* compute multiplier, update diag(k) and U(i,k) */ 1464 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1465 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1466 dk += uikdi*ba[ili]; 1467 ba[ili] = uikdi; /* -U(i,k) */ 1468 1469 /* add multiple of row i to k-th row */ 1470 jmin = ili + 1; jmax = bi[i+1]; 1471 if (jmin < jmax){ 1472 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1473 /* update il and jl for row i */ 1474 il[i] = jmin; 1475 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1476 } 1477 i = nexti; 1478 } 1479 1480 /* shift the diagonals when zero pivot is detected */ 1481 /* compute rs=sum of abs(off-diagonal) */ 1482 rs = 0.0; 1483 jmin = bi[k]+1; 1484 nz = bi[k+1] - jmin; 1485 bcol = bj + jmin; 1486 while (nz--){ 1487 rs += PetscAbsScalar(rtmp[*bcol]); 1488 bcol++; 1489 } 1490 1491 sctx.rs = rs; 1492 sctx.pv = dk; 1493 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1494 1495 if (newshift == 1) { 1496 if (!sctx.shift_amount) { 1497 sctx.shift_amount = 1e-5; 1498 } 1499 break; 1500 } 1501 1502 /* copy data into U(k,:) */ 1503 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1504 jmin = bi[k]+1; jmax = bi[k+1]; 1505 if (jmin < jmax) { 1506 for (j=jmin; j<jmax; j++){ 1507 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1508 } 1509 /* add the k-th row into il and jl */ 1510 il[k] = jmin; 1511 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1512 } 1513 } 1514 } while (sctx.chshift); 1515 ierr = PetscFree(il);CHKERRQ(ierr); 1516 1517 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1518 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1519 1520 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1521 if (perm_identity){ 1522 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1523 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1524 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1525 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1526 } else { 1527 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1528 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1529 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1530 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1531 } 1532 1533 C->assembled = PETSC_TRUE; 1534 C->preallocated = PETSC_TRUE; 1535 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1536 if (sctx.nshift){ 1537 if (shiftnz) { 1538 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1539 } else if (shiftpd) { 1540 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1541 } 1542 } 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1548 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1549 { 1550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1551 Mat_SeqSBAIJ *b; 1552 PetscErrorCode ierr; 1553 PetscTruth perm_identity,missing; 1554 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1555 const PetscInt *rip,*riip; 1556 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1557 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1558 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1559 PetscReal fill=info->fill,levels=info->levels; 1560 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1561 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1562 PetscBT lnkbt; 1563 IS iperm; 1564 1565 PetscFunctionBegin; 1566 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1567 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1568 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1569 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1570 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1571 1572 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1573 ui[0] = 0; 1574 1575 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1576 if (!levels && perm_identity) { 1577 1578 for (i=0; i<am; i++) { 1579 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1580 } 1581 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1582 cols = uj; 1583 for (i=0; i<am; i++) { 1584 aj = a->j + a->diag[i]; 1585 ncols = ui[i+1] - ui[i]; 1586 for (j=0; j<ncols; j++) *cols++ = *aj++; 1587 } 1588 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1589 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1590 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1591 1592 /* initialization */ 1593 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1594 1595 /* jl: linked list for storing indices of the pivot rows 1596 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1597 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1598 il = jl + am; 1599 uj_ptr = (PetscInt**)(il + am); 1600 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1601 for (i=0; i<am; i++){ 1602 jl[i] = am; il[i] = 0; 1603 } 1604 1605 /* create and initialize a linked list for storing column indices of the active row k */ 1606 nlnk = am + 1; 1607 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1608 1609 /* initial FreeSpace size is fill*(ai[am]+1) */ 1610 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1611 current_space = free_space; 1612 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1613 current_space_lvl = free_space_lvl; 1614 1615 for (k=0; k<am; k++){ /* for each active row k */ 1616 /* initialize lnk by the column indices of row rip[k] of A */ 1617 nzk = 0; 1618 ncols = ai[rip[k]+1] - ai[rip[k]]; 1619 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 1620 ncols_upper = 0; 1621 for (j=0; j<ncols; j++){ 1622 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1623 if (riip[i] >= k){ /* only take upper triangular entry */ 1624 ajtmp[ncols_upper] = i; 1625 ncols_upper++; 1626 } 1627 } 1628 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1629 nzk += nlnk; 1630 1631 /* update lnk by computing fill-in for each pivot row to be merged in */ 1632 prow = jl[k]; /* 1st pivot row */ 1633 1634 while (prow < k){ 1635 nextprow = jl[prow]; 1636 1637 /* merge prow into k-th row */ 1638 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1639 jmax = ui[prow+1]; 1640 ncols = jmax-jmin; 1641 i = jmin - ui[prow]; 1642 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1643 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1644 j = *(uj - 1); 1645 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1646 nzk += nlnk; 1647 1648 /* update il and jl for prow */ 1649 if (jmin < jmax){ 1650 il[prow] = jmin; 1651 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1652 } 1653 prow = nextprow; 1654 } 1655 1656 /* if free space is not available, make more free space */ 1657 if (current_space->local_remaining<nzk) { 1658 i = am - k + 1; /* num of unfactored rows */ 1659 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1660 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1661 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1662 reallocs++; 1663 } 1664 1665 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1666 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1667 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1668 1669 /* add the k-th row into il and jl */ 1670 if (nzk > 1){ 1671 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1672 jl[k] = jl[i]; jl[i] = k; 1673 il[k] = ui[k] + 1; 1674 } 1675 uj_ptr[k] = current_space->array; 1676 uj_lvl_ptr[k] = current_space_lvl->array; 1677 1678 current_space->array += nzk; 1679 current_space->local_used += nzk; 1680 current_space->local_remaining -= nzk; 1681 1682 current_space_lvl->array += nzk; 1683 current_space_lvl->local_used += nzk; 1684 current_space_lvl->local_remaining -= nzk; 1685 1686 ui[k+1] = ui[k] + nzk; 1687 } 1688 1689 #if defined(PETSC_USE_INFO) 1690 if (ai[am] != 0) { 1691 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1692 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1693 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1694 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1695 } else { 1696 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1697 } 1698 #endif 1699 1700 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1701 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1702 ierr = PetscFree(jl);CHKERRQ(ierr); 1703 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1704 1705 /* destroy list of free space and other temporary array(s) */ 1706 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1707 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1708 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1709 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1710 1711 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1712 1713 /* put together the new matrix in MATSEQSBAIJ format */ 1714 1715 b = (Mat_SeqSBAIJ*)(fact)->data; 1716 b->singlemalloc = PETSC_FALSE; 1717 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1718 b->j = uj; 1719 b->i = ui; 1720 b->diag = 0; 1721 b->ilen = 0; 1722 b->imax = 0; 1723 b->row = perm; 1724 b->col = perm; 1725 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1726 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1727 b->icol = iperm; 1728 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1729 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1730 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1731 b->maxnz = b->nz = ui[am]; 1732 b->free_a = PETSC_TRUE; 1733 b->free_ij = PETSC_TRUE; 1734 1735 (fact)->info.factor_mallocs = reallocs; 1736 (fact)->info.fill_ratio_given = fill; 1737 if (ai[am] != 0) { 1738 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1739 } else { 1740 (fact)->info.fill_ratio_needed = 0.0; 1741 } 1742 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1743 PetscFunctionReturn(0); 1744 } 1745 1746 #undef __FUNCT__ 1747 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1748 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1749 { 1750 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1751 Mat_SeqSBAIJ *b; 1752 PetscErrorCode ierr; 1753 PetscTruth perm_identity; 1754 PetscReal fill = info->fill; 1755 const PetscInt *rip,*riip; 1756 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1757 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1758 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1759 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1760 PetscBT lnkbt; 1761 IS iperm; 1762 1763 PetscFunctionBegin; 1764 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1765 /* check whether perm is the identity mapping */ 1766 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1767 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1768 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1769 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1770 1771 /* initialization */ 1772 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1773 ui[0] = 0; 1774 1775 /* jl: linked list for storing indices of the pivot rows 1776 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1777 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1778 il = jl + am; 1779 cols = il + am; 1780 ui_ptr = (PetscInt**)(cols + am); 1781 for (i=0; i<am; i++){ 1782 jl[i] = am; il[i] = 0; 1783 } 1784 1785 /* create and initialize a linked list for storing column indices of the active row k */ 1786 nlnk = am + 1; 1787 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1788 1789 /* initial FreeSpace size is fill*(ai[am]+1) */ 1790 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1791 current_space = free_space; 1792 1793 for (k=0; k<am; k++){ /* for each active row k */ 1794 /* initialize lnk by the column indices of row rip[k] of A */ 1795 nzk = 0; 1796 ncols = ai[rip[k]+1] - ai[rip[k]]; 1797 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 1798 ncols_upper = 0; 1799 for (j=0; j<ncols; j++){ 1800 i = riip[*(aj + ai[rip[k]] + j)]; 1801 if (i >= k){ /* only take upper triangular entry */ 1802 cols[ncols_upper] = i; 1803 ncols_upper++; 1804 } 1805 } 1806 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1807 nzk += nlnk; 1808 1809 /* update lnk by computing fill-in for each pivot row to be merged in */ 1810 prow = jl[k]; /* 1st pivot row */ 1811 1812 while (prow < k){ 1813 nextprow = jl[prow]; 1814 /* merge prow into k-th row */ 1815 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1816 jmax = ui[prow+1]; 1817 ncols = jmax-jmin; 1818 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1819 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1820 nzk += nlnk; 1821 1822 /* update il and jl for prow */ 1823 if (jmin < jmax){ 1824 il[prow] = jmin; 1825 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1826 } 1827 prow = nextprow; 1828 } 1829 1830 /* if free space is not available, make more free space */ 1831 if (current_space->local_remaining<nzk) { 1832 i = am - k + 1; /* num of unfactored rows */ 1833 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1834 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1835 reallocs++; 1836 } 1837 1838 /* copy data into free space, then initialize lnk */ 1839 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1840 1841 /* add the k-th row into il and jl */ 1842 if (nzk-1 > 0){ 1843 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1844 jl[k] = jl[i]; jl[i] = k; 1845 il[k] = ui[k] + 1; 1846 } 1847 ui_ptr[k] = current_space->array; 1848 current_space->array += nzk; 1849 current_space->local_used += nzk; 1850 current_space->local_remaining -= nzk; 1851 1852 ui[k+1] = ui[k] + nzk; 1853 } 1854 1855 #if defined(PETSC_USE_INFO) 1856 if (ai[am] != 0) { 1857 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1858 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1859 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1860 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1861 } else { 1862 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1863 } 1864 #endif 1865 1866 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1867 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1868 ierr = PetscFree(jl);CHKERRQ(ierr); 1869 1870 /* destroy list of free space and other temporary array(s) */ 1871 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1872 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1873 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1874 1875 /* put together the new matrix in MATSEQSBAIJ format */ 1876 1877 b = (Mat_SeqSBAIJ*)(fact)->data; 1878 b->singlemalloc = PETSC_FALSE; 1879 b->free_a = PETSC_TRUE; 1880 b->free_ij = PETSC_TRUE; 1881 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1882 b->j = uj; 1883 b->i = ui; 1884 b->diag = 0; 1885 b->ilen = 0; 1886 b->imax = 0; 1887 b->row = perm; 1888 b->col = perm; 1889 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1890 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1891 b->icol = iperm; 1892 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1893 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1894 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1895 b->maxnz = b->nz = ui[am]; 1896 1897 (fact)->info.factor_mallocs = reallocs; 1898 (fact)->info.fill_ratio_given = fill; 1899 if (ai[am] != 0) { 1900 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1901 } else { 1902 (fact)->info.fill_ratio_needed = 0.0; 1903 } 1904 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1905 PetscFunctionReturn(0); 1906 } 1907 1908 #undef __FUNCT__ 1909 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 1910 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1911 { 1912 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1913 IS isicol; 1914 PetscErrorCode ierr; 1915 PetscInt d,n=A->rmap->n,*ai=a->i; 1916 PetscInt *bi,*bj,*bdiag,nnz,dtcount; 1917 PetscScalar *ba; 1918 PetscTruth missing; 1919 1920 PetscFunctionBegin; 1921 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 1922 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1923 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1924 1925 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1926 1927 /* get new row pointers */ 1928 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1929 1930 /* bdiag is location of diagonal in factor */ 1931 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1932 1933 /* max num of nonzero entries is (ai[n]+2*n*dtcount+1) */ 1934 dtcount = (PetscInt)info->dtcount; 1935 nnz = ai[n]+2*n*dtcount+1; 1936 ierr = PetscMalloc2(nnz,PetscInt,&bj,nnz,PetscScalar,&ba);CHKERRQ(ierr); 1937 /* printf("nnz %d\n",nnz); */ 1938 1939 /* put together the new matrix */ 1940 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1941 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 1942 b = (Mat_SeqAIJ*)(B)->data; 1943 b->free_a = PETSC_TRUE; 1944 b->free_ij = PETSC_TRUE; 1945 b->singlemalloc = PETSC_FALSE; 1946 b->a = ba; 1947 b->j = bj; 1948 b->i = bi; 1949 b->diag = bdiag; 1950 b->ilen = 0; 1951 b->imax = 0; 1952 b->row = isrow; 1953 b->col = iscol; 1954 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1955 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1956 b->icol = isicol; 1957 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1958 1959 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1960 1961 b->maxnz = b->nz = nnz; 1962 1963 (B)->factor = MAT_FACTOR_ILUDT; 1964 (B)->info.factor_mallocs = 0; 1965 (B)->info.fill_ratio_given = ((PetscReal)nnz)/((PetscReal)ai[n]); 1966 B->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 1967 PetscFunctionReturn(0); 1968 } 1969 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 1972 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 1973 { 1974 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data; 1975 IS isrow=b->row,isicol=b->icol; 1976 PetscErrorCode ierr; 1977 const PetscInt *r,*ic; 1978 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag=a->diag; 1979 PetscInt *bi=b->i,*bj=b->j,*bdiag=b->diag; 1980 PetscInt row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au; 1981 PetscInt nlnk,*lnk; 1982 PetscBT lnkbt; 1983 PetscTruth row_identity,icol_identity,both_identity; 1984 MatScalar *v,*pv,*batmp,*ba=b->a,*rtmp,*pc,multiplier,*vtmp; 1985 const PetscInt *ics; 1986 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 1987 PetscReal dt=info->dt; 1988 1989 PetscFunctionBegin; 1990 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1991 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1992 ics = ic; 1993 1994 /* linked list for storing column indices of the active row */ 1995 nlnk = n + 1; 1996 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1997 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 1998 ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1999 vtmp = rtmp + n; 2000 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&jtmp);CHKERRQ(ierr); 2001 2002 dtcount = (PetscInt)info->dtcount; 2003 2004 bi[0] = 0; 2005 bdiag[0] = 0; /* location of diagonal in factor B */ 2006 for (i=0; i<n; i++) { 2007 /* copy initial fill into linked list */ 2008 nzi = 0; /* nonzeros for active row i */ 2009 nzi = ai[r[i]+1] - ai[r[i]]; 2010 if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 2011 nzi_al = adiag[r[i]] - ai[r[i]]; 2012 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 2013 ajtmp = aj + ai[r[i]]; 2014 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2015 2016 /* load in initial (unfactored row) */ 2017 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2018 v = a->a + ai[r[i]]; 2019 for (j=0; j<nzi; j++) { 2020 rtmp[ics[ajtmp[j]]] = v[j]; 2021 } 2022 2023 /* add pivot rows into linked list */ 2024 row = lnk[n]; 2025 while (row < i) { 2026 nzi_bl = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 2027 ajtmp = bj + bi[row] + nzi_bl; /* points to the column next to the diagonal */ 2028 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 2029 nzi += nlnk; 2030 row = lnk[row]; 2031 } 2032 2033 /* copy data from lnk into jtmp, then initialize lnk */ 2034 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 2035 2036 /* numerical factorization */ 2037 bjtmp = jtmp; 2038 row = *bjtmp++; /* 1st pivot row */ 2039 while (row < i) { 2040 pc = rtmp + row; 2041 /* apply tolerance dropping rule */ 2042 if (PetscAbsScalar(*pc) > dt){ 2043 pv = ba + bdiag[row]; /* diag of the pivot row */ 2044 pj = bj + bdiag[row] + 1; 2045 multiplier = *pc / *pv++; 2046 *pc = multiplier; 2047 nz = bi[row+1] - bdiag[row] - 1; 2048 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 2049 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 2050 } 2051 row = *bjtmp++; 2052 } 2053 2054 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 2055 /* printf("row %d:\n",i); */ 2056 nzi_bl = 0; j = 0; 2057 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 2058 vtmp[j] = rtmp[jtmp[j]]; 2059 nzi_bl++; j++; 2060 } 2061 nzi_bu = nzi - nzi_bl -1; 2062 while (j < nzi){ 2063 vtmp[j] = rtmp[jtmp[j]]; 2064 j++; 2065 } 2066 2067 bjtmp = bj + bi[i]; 2068 batmp = ba + bi[i]; 2069 /* apply level dropping rule to L part */ 2070 ncut = nzi_al + dtcount; 2071 if (ncut < nzi_bl){ 2072 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 2073 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 2074 } else { 2075 ncut = nzi_bl; 2076 } 2077 for (j=0; j<ncut; j++){ 2078 bjtmp[j] = jtmp[j]; 2079 batmp[j] = vtmp[j]; 2080 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 2081 } 2082 2083 /* mark bdiagonal */ 2084 bdiag[i] = bi[i] + ncut; 2085 bjtmp[ncut] = i; 2086 batmp[ncut] = rtmp[i]; 2087 /* printf(" diag(%d,%g),",bjtmp[ncut],batmp[ncut]); */ 2088 j++; 2089 nzi = ncut + 1; 2090 2091 /* apply level dropping rule to U part */ 2092 ncut = nzi_au + dtcount; 2093 if (ncut < nzi_bu){ 2094 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 2095 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 2096 } else { 2097 ncut = nzi_bu; 2098 } 2099 nzi += ncut; 2100 /* printf(" \nU level dropping ..."); */ 2101 for (k=0; k<ncut; k++){ 2102 bjtmp[j] = jtmp[nzi_bl+1+k]; 2103 batmp[j] = vtmp[nzi_bl+1+k]; 2104 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 2105 j++; 2106 } 2107 2108 bi[i+1] = bi[i] + nzi; 2109 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 2110 /* 2111 printf("bi[%d] = %d\n",i+1,bi[i+1]); 2112 printf(" ----------------------------\n"); 2113 */ 2114 } /* for (i=0; i<n; i++) */ 2115 2116 /* invert diagonal entries for simplier triangular solves */ 2117 for (i=0; i<n; i++) { 2118 ba[bdiag[i]] = 1.0/ba[bdiag[i]]; 2119 } 2120 2121 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2122 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2123 2124 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2125 ierr = PetscFree(im);CHKERRQ(ierr); 2126 ierr = PetscFree(rtmp);CHKERRQ(ierr); 2127 ierr = PetscFree(jtmp);CHKERRQ(ierr); 2128 2129 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 2130 2131 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 2132 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 2133 both_identity = (PetscTruth) (row_identity && icol_identity); 2134 2135 if (row_identity && icol_identity) { 2136 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 2137 } else { 2138 B->ops->solve = MatSolve_SeqAIJ; 2139 } 2140 2141 B->ops->solveadd = MatSolveAdd_SeqAIJ; 2142 B->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 2143 B->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 2144 B->ops->matsolve = MatMatSolve_SeqAIJ; 2145 B->assembled = PETSC_TRUE; 2146 B->preallocated = PETSC_TRUE; 2147 PetscFunctionReturn(0); 2148 } 2149 2150