1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 260 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 261 { 262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 263 IS isicol; 264 PetscErrorCode ierr; 265 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 266 PetscInt *bi,*bj,*ajtmp; 267 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 268 PetscReal f; 269 PetscInt nlnk,*lnk,k,**bi_ptr; 270 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 271 PetscBT lnkbt; 272 273 PetscFunctionBegin; 274 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 275 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 276 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 277 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 278 279 /* get new row pointers */ 280 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 281 bi[0] = 0; 282 283 /* bdiag is location of diagonal in factor */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 285 bdiag[0] = 0; 286 287 /* linked list for storing column indices of the active row */ 288 nlnk = n + 1; 289 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 290 291 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 292 293 /* initial FreeSpace size is f*(ai[n]+1) */ 294 f = info->fill; 295 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 296 current_space = free_space; 297 298 for (i=0; i<n; i++) { 299 /* copy previous fill into linked list */ 300 nzi = 0; 301 nnz = ai[r[i]+1] - ai[r[i]]; 302 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 303 ajtmp = aj + ai[r[i]]; 304 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 305 nzi += nlnk; 306 307 /* add pivot rows into linked list */ 308 row = lnk[n]; 309 while (row < i) { 310 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 311 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 312 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 313 nzi += nlnk; 314 row = lnk[row]; 315 } 316 bi[i+1] = bi[i] + nzi; 317 im[i] = nzi; 318 319 /* mark bdiag */ 320 nzbd = 0; 321 nnz = nzi; 322 k = lnk[n]; 323 while (nnz-- && k < i){ 324 nzbd++; 325 k = lnk[k]; 326 } 327 bdiag[i] = bi[i] + nzbd; 328 329 /* if free space is not available, make more free space */ 330 if (current_space->local_remaining<nzi) { 331 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 332 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 333 reallocs++; 334 } 335 336 /* copy data into free space, then initialize lnk */ 337 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 338 bi_ptr[i] = current_space->array; 339 current_space->array += nzi; 340 current_space->local_used += nzi; 341 current_space->local_remaining -= nzi; 342 } 343 #if defined(PETSC_USE_INFO) 344 if (ai[n] != 0) { 345 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 346 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 347 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 348 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 349 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 350 } else { 351 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 352 } 353 #endif 354 355 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 356 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 357 358 /* destroy list of free space and other temporary array(s) */ 359 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 360 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 361 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 362 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 363 364 /* put together the new matrix */ 365 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 366 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 367 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 368 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 369 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 370 b = (Mat_SeqAIJ*)(*B)->data; 371 b->free_a = PETSC_TRUE; 372 b->free_ij = PETSC_TRUE; 373 b->singlemalloc = PETSC_FALSE; 374 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 375 b->j = bj; 376 b->i = bi; 377 b->diag = bdiag; 378 b->ilen = 0; 379 b->imax = 0; 380 b->row = isrow; 381 b->col = iscol; 382 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 383 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 384 b->icol = isicol; 385 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 386 387 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 388 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 389 b->maxnz = b->nz = bi[n] ; 390 391 (*B)->factor = FACTOR_LU; 392 (*B)->info.factor_mallocs = reallocs; 393 (*B)->info.fill_ratio_given = f; 394 395 if (ai[n] != 0) { 396 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 397 } else { 398 (*B)->info.fill_ratio_needed = 0.0; 399 } 400 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 401 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 402 PetscFunctionReturn(0); 403 } 404 405 /* 406 Trouble in factorization, should we dump the original matrix? 407 */ 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatFactorDumpMatrix" 410 PetscErrorCode MatFactorDumpMatrix(Mat A) 411 { 412 PetscErrorCode ierr; 413 PetscTruth flg; 414 415 PetscFunctionBegin; 416 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 417 if (flg) { 418 PetscViewer viewer; 419 char filename[PETSC_MAX_PATH_LEN]; 420 421 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 422 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 423 ierr = MatView(A,viewer);CHKERRQ(ierr); 424 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 /* ----------------------------------------------------------- */ 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 432 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 433 { 434 Mat C=*B; 435 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 436 IS isrow = b->row,isicol = b->icol; 437 PetscErrorCode ierr; 438 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 439 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 440 PetscInt *diag_offset = b->diag,diag,*pj; 441 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 442 PetscScalar d; 443 PetscReal rs; 444 LUShift_Ctx sctx; 445 PetscInt newshift,*ddiag; 446 447 PetscFunctionBegin; 448 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 449 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 450 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 451 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 452 rtmps = rtmp; ics = ic; 453 454 sctx.shift_top = 0; 455 sctx.nshift_max = 0; 456 sctx.shift_lo = 0; 457 sctx.shift_hi = 0; 458 459 /* if both shift schemes are chosen by user, only use info->shiftpd */ 460 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 461 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 462 PetscInt *aai = a->i; 463 ddiag = a->diag; 464 sctx.shift_top = 0; 465 for (i=0; i<n; i++) { 466 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 467 d = (a->a)[ddiag[i]]; 468 rs = -PetscAbsScalar(d) - PetscRealPart(d); 469 v = a->a+aai[i]; 470 nz = aai[i+1] - aai[i]; 471 for (j=0; j<nz; j++) 472 rs += PetscAbsScalar(v[j]); 473 if (rs>sctx.shift_top) sctx.shift_top = rs; 474 } 475 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 476 sctx.shift_top *= 1.1; 477 sctx.nshift_max = 5; 478 sctx.shift_lo = 0.; 479 sctx.shift_hi = 1.; 480 } 481 482 sctx.shift_amount = 0; 483 sctx.nshift = 0; 484 do { 485 sctx.lushift = PETSC_FALSE; 486 for (i=0; i<n; i++){ 487 nz = bi[i+1] - bi[i]; 488 bjtmp = bj + bi[i]; 489 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 490 491 /* load in initial (unfactored row) */ 492 nz = a->i[r[i]+1] - a->i[r[i]]; 493 ajtmp = a->j + a->i[r[i]]; 494 v = a->a + a->i[r[i]]; 495 for (j=0; j<nz; j++) { 496 rtmp[ics[ajtmp[j]]] = v[j]; 497 } 498 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 499 500 row = *bjtmp++; 501 while (row < i) { 502 pc = rtmp + row; 503 if (*pc != 0.0) { 504 pv = b->a + diag_offset[row]; 505 pj = b->j + diag_offset[row] + 1; 506 multiplier = *pc / *pv++; 507 *pc = multiplier; 508 nz = bi[row+1] - diag_offset[row] - 1; 509 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 510 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 511 } 512 row = *bjtmp++; 513 } 514 /* finished row so stick it into b->a */ 515 pv = b->a + bi[i] ; 516 pj = b->j + bi[i] ; 517 nz = bi[i+1] - bi[i]; 518 diag = diag_offset[i] - bi[i]; 519 rs = 0.0; 520 for (j=0; j<nz; j++) { 521 pv[j] = rtmps[pj[j]]; 522 if (j != diag) rs += PetscAbsScalar(pv[j]); 523 } 524 525 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 526 sctx.rs = rs; 527 sctx.pv = pv[diag]; 528 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 529 if (newshift == 1) break; 530 } 531 532 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 533 /* 534 * if no shift in this attempt & shifting & started shifting & can refine, 535 * then try lower shift 536 */ 537 sctx.shift_hi = info->shift_fraction; 538 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 539 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 540 sctx.lushift = PETSC_TRUE; 541 sctx.nshift++; 542 } 543 } while (sctx.lushift); 544 545 /* invert diagonal entries for simplier triangular solves */ 546 for (i=0; i<n; i++) { 547 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 548 } 549 550 ierr = PetscFree(rtmp);CHKERRQ(ierr); 551 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 553 C->factor = FACTOR_LU; 554 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 555 C->assembled = PETSC_TRUE; 556 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 557 if (sctx.nshift){ 558 if (info->shiftnz) { 559 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 560 } else if (info->shiftpd) { 561 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 562 } 563 } 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 This routine implements inplace ILU(0) with row or/and column permutations. 569 Input: 570 A - original matrix 571 Output; 572 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 573 a->j (col index) is permuted by the inverse of colperm, then sorted 574 a->a reordered accordingly with a->j 575 a->diag (ptr to diagonal elements) is updated. 576 */ 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 579 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 580 { 581 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 582 IS isrow = a->row,isicol = a->icol; 583 PetscErrorCode ierr; 584 PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j; 585 PetscInt *ajtmp,nz,row,*ics; 586 PetscInt *diag = a->diag,nbdiag,*pj; 587 PetscScalar *rtmp,*v,*pc,multiplier,*pv,d; 588 PetscReal rs; 589 LUShift_Ctx sctx; 590 PetscInt newshift; 591 592 PetscFunctionBegin; 593 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 594 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 595 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 596 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 597 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 598 ics = ic; 599 600 sctx.shift_top = 0; 601 sctx.nshift_max = 0; 602 sctx.shift_lo = 0; 603 sctx.shift_hi = 0; 604 605 /* if both shift schemes are chosen by user, only use info->shiftpd */ 606 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 607 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 608 sctx.shift_top = 0; 609 for (i=0; i<n; i++) { 610 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 611 d = (a->a)[diag[i]]; 612 rs = -PetscAbsScalar(d) - PetscRealPart(d); 613 v = a->a+ai[i]; 614 nz = ai[i+1] - ai[i]; 615 for (j=0; j<nz; j++) 616 rs += PetscAbsScalar(v[j]); 617 if (rs>sctx.shift_top) sctx.shift_top = rs; 618 } 619 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 620 sctx.shift_top *= 1.1; 621 sctx.nshift_max = 5; 622 sctx.shift_lo = 0.; 623 sctx.shift_hi = 1.; 624 } 625 626 sctx.shift_amount = 0; 627 sctx.nshift = 0; 628 do { 629 sctx.lushift = PETSC_FALSE; 630 for (i=0; i<n; i++){ 631 /* load in initial unfactored row */ 632 nz = ai[r[i]+1] - ai[r[i]]; 633 ajtmp = aj + ai[r[i]]; 634 v = a->a + ai[r[i]]; 635 /* sort permuted ajtmp and values v accordingly */ 636 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 637 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 638 639 diag[r[i]] = ai[r[i]]; 640 for (j=0; j<nz; j++) { 641 rtmp[ajtmp[j]] = v[j]; 642 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 643 } 644 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 645 646 row = *ajtmp++; 647 while (row < i) { 648 pc = rtmp + row; 649 if (*pc != 0.0) { 650 pv = a->a + diag[r[row]]; 651 pj = aj + diag[r[row]] + 1; 652 653 multiplier = *pc / *pv++; 654 *pc = multiplier; 655 nz = ai[r[row]+1] - diag[r[row]] - 1; 656 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 657 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 658 } 659 row = *ajtmp++; 660 } 661 /* finished row so overwrite it onto a->a */ 662 pv = a->a + ai[r[i]] ; 663 pj = aj + ai[r[i]] ; 664 nz = ai[r[i]+1] - ai[r[i]]; 665 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 666 667 rs = 0.0; 668 for (j=0; j<nz; j++) { 669 pv[j] = rtmp[pj[j]]; 670 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 671 } 672 673 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 674 sctx.rs = rs; 675 sctx.pv = pv[nbdiag]; 676 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 677 if (newshift == 1) break; 678 } 679 680 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 681 /* 682 * if no shift in this attempt & shifting & started shifting & can refine, 683 * then try lower shift 684 */ 685 sctx.shift_hi = info->shift_fraction; 686 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 687 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 688 sctx.lushift = PETSC_TRUE; 689 sctx.nshift++; 690 } 691 } while (sctx.lushift); 692 693 /* invert diagonal entries for simplier triangular solves */ 694 for (i=0; i<n; i++) { 695 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 696 } 697 698 ierr = PetscFree(rtmp);CHKERRQ(ierr); 699 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 700 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 701 A->factor = FACTOR_LU; 702 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 703 A->assembled = PETSC_TRUE; 704 ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr); 705 if (sctx.nshift){ 706 if (info->shiftnz) { 707 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 708 } else if (info->shiftpd) { 709 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 710 } 711 } 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 717 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 718 { 719 PetscFunctionBegin; 720 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 721 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 722 PetscFunctionReturn(0); 723 } 724 725 726 /* ----------------------------------------------------------- */ 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatLUFactor_SeqAIJ" 729 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 730 { 731 PetscErrorCode ierr; 732 Mat C; 733 734 PetscFunctionBegin; 735 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 736 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 737 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 738 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 /* ----------------------------------------------------------- */ 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatSolve_SeqAIJ" 744 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 745 { 746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 747 IS iscol = a->col,isrow = a->row; 748 PetscErrorCode ierr; 749 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 750 PetscInt nz,*rout,*cout; 751 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 752 753 PetscFunctionBegin; 754 if (!n) PetscFunctionReturn(0); 755 756 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 757 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 758 tmp = a->solve_work; 759 760 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 761 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 762 763 /* forward solve the lower triangular */ 764 tmp[0] = b[*r++]; 765 tmps = tmp; 766 for (i=1; i<n; i++) { 767 v = aa + ai[i] ; 768 vi = aj + ai[i] ; 769 nz = a->diag[i] - ai[i]; 770 sum = b[*r++]; 771 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 772 tmp[i] = sum; 773 } 774 775 /* backward solve the upper triangular */ 776 for (i=n-1; i>=0; i--){ 777 v = aa + a->diag[i] + 1; 778 vi = aj + a->diag[i] + 1; 779 nz = ai[i+1] - a->diag[i] - 1; 780 sum = tmp[i]; 781 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 782 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 783 } 784 785 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 786 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 787 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 788 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 789 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatMatSolve_SeqAIJ" 795 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 796 { 797 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 798 IS iscol = a->col,isrow = a->row; 799 PetscErrorCode ierr; 800 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 801 PetscInt nz,*rout,*cout,neq; 802 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 803 804 PetscFunctionBegin; 805 if (!n) PetscFunctionReturn(0); 806 807 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 808 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 809 810 tmp = a->solve_work; 811 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 812 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 813 814 for (neq=0; neq<n; neq++){ 815 /* forward solve the lower triangular */ 816 tmp[0] = b[r[0]]; 817 tmps = tmp; 818 for (i=1; i<n; i++) { 819 v = aa + ai[i] ; 820 vi = aj + ai[i] ; 821 nz = a->diag[i] - ai[i]; 822 sum = b[r[i]]; 823 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 824 tmp[i] = sum; 825 } 826 /* backward solve the upper triangular */ 827 for (i=n-1; i>=0; i--){ 828 v = aa + a->diag[i] + 1; 829 vi = aj + a->diag[i] + 1; 830 nz = ai[i+1] - a->diag[i] - 1; 831 sum = tmp[i]; 832 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 833 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 834 } 835 836 b += n; 837 x += n; 838 } 839 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 840 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 841 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 842 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 843 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 844 PetscFunctionReturn(0); 845 } 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 849 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 850 { 851 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 852 IS iscol = a->col,isrow = a->row; 853 PetscErrorCode ierr; 854 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 855 PetscInt nz,*rout,*cout,row; 856 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 857 858 PetscFunctionBegin; 859 if (!n) PetscFunctionReturn(0); 860 861 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 862 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 863 tmp = a->solve_work; 864 865 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 866 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 867 868 /* forward solve the lower triangular */ 869 tmp[0] = b[*r++]; 870 tmps = tmp; 871 for (row=1; row<n; row++) { 872 i = rout[row]; /* permuted row */ 873 v = aa + ai[i] ; 874 vi = aj + ai[i] ; 875 nz = a->diag[i] - ai[i]; 876 sum = b[*r++]; 877 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 878 tmp[row] = sum; 879 } 880 881 /* backward solve the upper triangular */ 882 for (row=n-1; row>=0; row--){ 883 i = rout[row]; /* permuted row */ 884 v = aa + a->diag[i] + 1; 885 vi = aj + a->diag[i] + 1; 886 nz = ai[i+1] - a->diag[i] - 1; 887 sum = tmp[row]; 888 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 889 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 890 } 891 892 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 893 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 894 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 895 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 896 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 /* ----------------------------------------------------------- */ 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 903 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 904 { 905 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 906 PetscErrorCode ierr; 907 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 908 PetscScalar *x,*b,*aa = a->a; 909 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 910 PetscInt adiag_i,i,*vi,nz,ai_i; 911 PetscScalar *v,sum; 912 #endif 913 914 PetscFunctionBegin; 915 if (!n) PetscFunctionReturn(0); 916 917 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 918 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 919 920 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 921 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 922 #else 923 /* forward solve the lower triangular */ 924 x[0] = b[0]; 925 for (i=1; i<n; i++) { 926 ai_i = ai[i]; 927 v = aa + ai_i; 928 vi = aj + ai_i; 929 nz = adiag[i] - ai_i; 930 sum = b[i]; 931 while (nz--) sum -= *v++ * x[*vi++]; 932 x[i] = sum; 933 } 934 935 /* backward solve the upper triangular */ 936 for (i=n-1; i>=0; i--){ 937 adiag_i = adiag[i]; 938 v = aa + adiag_i + 1; 939 vi = aj + adiag_i + 1; 940 nz = ai[i+1] - adiag_i - 1; 941 sum = x[i]; 942 while (nz--) sum -= *v++ * x[*vi++]; 943 x[i] = sum*aa[adiag_i]; 944 } 945 #endif 946 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 947 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 948 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 954 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 955 { 956 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 957 IS iscol = a->col,isrow = a->row; 958 PetscErrorCode ierr; 959 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 960 PetscInt nz,*rout,*cout; 961 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 962 963 PetscFunctionBegin; 964 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 965 966 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 967 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 968 tmp = a->solve_work; 969 970 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 971 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 972 973 /* forward solve the lower triangular */ 974 tmp[0] = b[*r++]; 975 for (i=1; i<n; i++) { 976 v = aa + ai[i] ; 977 vi = aj + ai[i] ; 978 nz = a->diag[i] - ai[i]; 979 sum = b[*r++]; 980 while (nz--) sum -= *v++ * tmp[*vi++ ]; 981 tmp[i] = sum; 982 } 983 984 /* backward solve the upper triangular */ 985 for (i=n-1; i>=0; i--){ 986 v = aa + a->diag[i] + 1; 987 vi = aj + a->diag[i] + 1; 988 nz = ai[i+1] - a->diag[i] - 1; 989 sum = tmp[i]; 990 while (nz--) sum -= *v++ * tmp[*vi++ ]; 991 tmp[i] = sum*aa[a->diag[i]]; 992 x[*c--] += tmp[i]; 993 } 994 995 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 996 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 997 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 998 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 999 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1000 1001 PetscFunctionReturn(0); 1002 } 1003 /* -------------------------------------------------------------------*/ 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1006 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1007 { 1008 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1009 IS iscol = a->col,isrow = a->row; 1010 PetscErrorCode ierr; 1011 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1012 PetscInt nz,*rout,*cout,*diag = a->diag; 1013 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 1014 1015 PetscFunctionBegin; 1016 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1017 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1018 tmp = a->solve_work; 1019 1020 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1021 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1022 1023 /* copy the b into temp work space according to permutation */ 1024 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1025 1026 /* forward solve the U^T */ 1027 for (i=0; i<n; i++) { 1028 v = aa + diag[i] ; 1029 vi = aj + diag[i] + 1; 1030 nz = ai[i+1] - diag[i] - 1; 1031 s1 = tmp[i]; 1032 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1033 while (nz--) { 1034 tmp[*vi++ ] -= (*v++)*s1; 1035 } 1036 tmp[i] = s1; 1037 } 1038 1039 /* backward solve the L^T */ 1040 for (i=n-1; i>=0; i--){ 1041 v = aa + diag[i] - 1 ; 1042 vi = aj + diag[i] - 1 ; 1043 nz = diag[i] - ai[i]; 1044 s1 = tmp[i]; 1045 while (nz--) { 1046 tmp[*vi-- ] -= (*v--)*s1; 1047 } 1048 } 1049 1050 /* copy tmp into x according to permutation */ 1051 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1052 1053 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1054 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1055 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1056 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1057 1058 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1064 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1065 { 1066 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1067 IS iscol = a->col,isrow = a->row; 1068 PetscErrorCode ierr; 1069 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1070 PetscInt nz,*rout,*cout,*diag = a->diag; 1071 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 1072 1073 PetscFunctionBegin; 1074 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1075 1076 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1077 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1078 tmp = a->solve_work; 1079 1080 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1081 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1082 1083 /* copy the b into temp work space according to permutation */ 1084 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1085 1086 /* forward solve the U^T */ 1087 for (i=0; i<n; i++) { 1088 v = aa + diag[i] ; 1089 vi = aj + diag[i] + 1; 1090 nz = ai[i+1] - diag[i] - 1; 1091 tmp[i] *= *v++; 1092 while (nz--) { 1093 tmp[*vi++ ] -= (*v++)*tmp[i]; 1094 } 1095 } 1096 1097 /* backward solve the L^T */ 1098 for (i=n-1; i>=0; i--){ 1099 v = aa + diag[i] - 1 ; 1100 vi = aj + diag[i] - 1 ; 1101 nz = diag[i] - ai[i]; 1102 while (nz--) { 1103 tmp[*vi-- ] -= (*v--)*tmp[i]; 1104 } 1105 } 1106 1107 /* copy tmp into x according to permutation */ 1108 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1109 1110 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1111 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1112 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1113 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1114 1115 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 /* ----------------------------------------------------------------*/ 1119 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1120 1121 #undef __FUNCT__ 1122 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1123 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1124 { 1125 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1126 IS isicol; 1127 PetscErrorCode ierr; 1128 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 1129 PetscInt *bi,*cols,nnz,*cols_lvl; 1130 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1131 PetscInt i,levels,diagonal_fill; 1132 PetscTruth col_identity,row_identity; 1133 PetscReal f; 1134 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1135 PetscBT lnkbt; 1136 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1137 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1138 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1139 PetscTruth missing; 1140 1141 PetscFunctionBegin; 1142 f = info->fill; 1143 levels = (PetscInt)info->levels; 1144 diagonal_fill = (PetscInt)info->diagonal_fill; 1145 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1146 1147 /* special case that simply copies fill pattern */ 1148 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1149 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1150 if (!levels && row_identity && col_identity) { 1151 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1152 (*fact)->factor = FACTOR_LU; 1153 (*fact)->info.factor_mallocs = 0; 1154 (*fact)->info.fill_ratio_given = info->fill; 1155 (*fact)->info.fill_ratio_needed = 1.0; 1156 b = (Mat_SeqAIJ*)(*fact)->data; 1157 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1158 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1159 b->row = isrow; 1160 b->col = iscol; 1161 b->icol = isicol; 1162 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1163 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1164 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1165 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1166 ierr = Mat_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1171 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1172 1173 /* get new row pointers */ 1174 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1175 bi[0] = 0; 1176 /* bdiag is location of diagonal in factor */ 1177 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1178 bdiag[0] = 0; 1179 1180 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1181 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1182 1183 /* create a linked list for storing column indices of the active row */ 1184 nlnk = n + 1; 1185 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1186 1187 /* initial FreeSpace size is f*(ai[n]+1) */ 1188 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1189 current_space = free_space; 1190 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1191 current_space_lvl = free_space_lvl; 1192 1193 for (i=0; i<n; i++) { 1194 nzi = 0; 1195 /* copy current row into linked list */ 1196 nnz = ai[r[i]+1] - ai[r[i]]; 1197 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1198 cols = aj + ai[r[i]]; 1199 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1200 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1201 nzi += nlnk; 1202 1203 /* make sure diagonal entry is included */ 1204 if (diagonal_fill && lnk[i] == -1) { 1205 fm = n; 1206 while (lnk[fm] < i) fm = lnk[fm]; 1207 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1208 lnk[fm] = i; 1209 lnk_lvl[i] = 0; 1210 nzi++; dcount++; 1211 } 1212 1213 /* add pivot rows into the active row */ 1214 nzbd = 0; 1215 prow = lnk[n]; 1216 while (prow < i) { 1217 nnz = bdiag[prow]; 1218 cols = bj_ptr[prow] + nnz + 1; 1219 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1220 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1221 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1222 nzi += nlnk; 1223 prow = lnk[prow]; 1224 nzbd++; 1225 } 1226 bdiag[i] = nzbd; 1227 bi[i+1] = bi[i] + nzi; 1228 1229 /* if free space is not available, make more free space */ 1230 if (current_space->local_remaining<nzi) { 1231 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1232 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1233 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1234 reallocs++; 1235 } 1236 1237 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1238 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1239 bj_ptr[i] = current_space->array; 1240 bjlvl_ptr[i] = current_space_lvl->array; 1241 1242 /* make sure the active row i has diagonal entry */ 1243 if (*(bj_ptr[i]+bdiag[i]) != i) { 1244 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1245 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1246 } 1247 1248 current_space->array += nzi; 1249 current_space->local_used += nzi; 1250 current_space->local_remaining -= nzi; 1251 current_space_lvl->array += nzi; 1252 current_space_lvl->local_used += nzi; 1253 current_space_lvl->local_remaining -= nzi; 1254 } 1255 1256 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1257 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1258 1259 /* destroy list of free space and other temporary arrays */ 1260 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1261 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1262 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1263 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1264 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1265 1266 #if defined(PETSC_USE_INFO) 1267 { 1268 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1269 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1270 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1271 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1272 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1273 if (diagonal_fill) { 1274 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1275 } 1276 } 1277 #endif 1278 1279 /* put together the new matrix */ 1280 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 1281 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1282 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 1283 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1284 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1285 b = (Mat_SeqAIJ*)(*fact)->data; 1286 b->free_a = PETSC_TRUE; 1287 b->free_ij = PETSC_TRUE; 1288 b->singlemalloc = PETSC_FALSE; 1289 len = (bi[n] )*sizeof(PetscScalar); 1290 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1291 b->j = bj; 1292 b->i = bi; 1293 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1294 b->diag = bdiag; 1295 b->ilen = 0; 1296 b->imax = 0; 1297 b->row = isrow; 1298 b->col = iscol; 1299 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1300 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1301 b->icol = isicol; 1302 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1303 /* In b structure: Free imax, ilen, old a, old j. 1304 Allocate bdiag, solve_work, new a, new j */ 1305 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1306 b->maxnz = b->nz = bi[n] ; 1307 (*fact)->factor = FACTOR_LU; 1308 (*fact)->info.factor_mallocs = reallocs; 1309 (*fact)->info.fill_ratio_given = f; 1310 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1311 1312 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1313 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1314 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #include "src/mat/impls/sbaij/seq/sbaij.h" 1319 #undef __FUNCT__ 1320 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1321 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1322 { 1323 Mat C = *B; 1324 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1325 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1326 IS ip=b->row,iip = b->icol; 1327 PetscErrorCode ierr; 1328 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1329 PetscInt *ai=a->i,*aj=a->j; 1330 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1331 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1332 PetscReal zeropivot,rs,shiftnz; 1333 PetscReal shiftpd; 1334 ChShift_Ctx sctx; 1335 PetscInt newshift; 1336 1337 PetscFunctionBegin; 1338 1339 shiftnz = info->shiftnz; 1340 shiftpd = info->shiftpd; 1341 zeropivot = info->zeropivot; 1342 1343 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1344 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1345 1346 /* initialization */ 1347 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1348 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1349 jl = il + mbs; 1350 rtmp = (MatScalar*)(jl + mbs); 1351 1352 sctx.shift_amount = 0; 1353 sctx.nshift = 0; 1354 do { 1355 sctx.chshift = PETSC_FALSE; 1356 for (i=0; i<mbs; i++) { 1357 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1358 } 1359 1360 for (k = 0; k<mbs; k++){ 1361 bval = ba + bi[k]; 1362 /* initialize k-th row by the perm[k]-th row of A */ 1363 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1364 for (j = jmin; j < jmax; j++){ 1365 col = riip[aj[j]]; 1366 if (col >= k){ /* only take upper triangular entry */ 1367 rtmp[col] = aa[j]; 1368 *bval++ = 0.0; /* for in-place factorization */ 1369 } 1370 } 1371 /* shift the diagonal of the matrix */ 1372 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1373 1374 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1375 dk = rtmp[k]; 1376 i = jl[k]; /* first row to be added to k_th row */ 1377 1378 while (i < k){ 1379 nexti = jl[i]; /* next row to be added to k_th row */ 1380 1381 /* compute multiplier, update diag(k) and U(i,k) */ 1382 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1383 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1384 dk += uikdi*ba[ili]; 1385 ba[ili] = uikdi; /* -U(i,k) */ 1386 1387 /* add multiple of row i to k-th row */ 1388 jmin = ili + 1; jmax = bi[i+1]; 1389 if (jmin < jmax){ 1390 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1391 /* update il and jl for row i */ 1392 il[i] = jmin; 1393 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1394 } 1395 i = nexti; 1396 } 1397 1398 /* shift the diagonals when zero pivot is detected */ 1399 /* compute rs=sum of abs(off-diagonal) */ 1400 rs = 0.0; 1401 jmin = bi[k]+1; 1402 nz = bi[k+1] - jmin; 1403 bcol = bj + jmin; 1404 while (nz--){ 1405 rs += PetscAbsScalar(rtmp[*bcol]); 1406 bcol++; 1407 } 1408 1409 sctx.rs = rs; 1410 sctx.pv = dk; 1411 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1412 1413 if (newshift == 1) { 1414 if (!sctx.shift_amount) { 1415 sctx.shift_amount = 1e-5; 1416 } 1417 break; 1418 } 1419 1420 /* copy data into U(k,:) */ 1421 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1422 jmin = bi[k]+1; jmax = bi[k+1]; 1423 if (jmin < jmax) { 1424 for (j=jmin; j<jmax; j++){ 1425 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1426 } 1427 /* add the k-th row into il and jl */ 1428 il[k] = jmin; 1429 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1430 } 1431 } 1432 } while (sctx.chshift); 1433 ierr = PetscFree(il);CHKERRQ(ierr); 1434 1435 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1436 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1437 C->factor = FACTOR_CHOLESKY; 1438 C->assembled = PETSC_TRUE; 1439 C->preallocated = PETSC_TRUE; 1440 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1441 if (sctx.nshift){ 1442 if (shiftnz) { 1443 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1444 } else if (shiftpd) { 1445 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1446 } 1447 } 1448 PetscFunctionReturn(0); 1449 } 1450 1451 #undef __FUNCT__ 1452 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1453 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1454 { 1455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1456 Mat_SeqSBAIJ *b; 1457 Mat B; 1458 PetscErrorCode ierr; 1459 PetscTruth perm_identity,missing; 1460 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1461 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1462 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1463 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1464 PetscReal fill=info->fill,levels=info->levels; 1465 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1466 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1467 PetscBT lnkbt; 1468 IS iperm; 1469 1470 PetscFunctionBegin; 1471 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1472 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1473 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1474 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1475 1476 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1477 ui[0] = 0; 1478 1479 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1480 if (!levels && perm_identity) { 1481 1482 for (i=0; i<am; i++) { 1483 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1484 } 1485 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1486 cols = uj; 1487 for (i=0; i<am; i++) { 1488 aj = a->j + a->diag[i]; 1489 ncols = ui[i+1] - ui[i]; 1490 for (j=0; j<ncols; j++) *cols++ = *aj++; 1491 } 1492 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1493 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1494 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1495 1496 /* initialization */ 1497 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1498 1499 /* jl: linked list for storing indices of the pivot rows 1500 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1501 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1502 il = jl + am; 1503 uj_ptr = (PetscInt**)(il + am); 1504 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1505 for (i=0; i<am; i++){ 1506 jl[i] = am; il[i] = 0; 1507 } 1508 1509 /* create and initialize a linked list for storing column indices of the active row k */ 1510 nlnk = am + 1; 1511 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1512 1513 /* initial FreeSpace size is fill*(ai[am]+1) */ 1514 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1515 current_space = free_space; 1516 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1517 current_space_lvl = free_space_lvl; 1518 1519 for (k=0; k<am; k++){ /* for each active row k */ 1520 /* initialize lnk by the column indices of row rip[k] of A */ 1521 nzk = 0; 1522 ncols = ai[rip[k]+1] - ai[rip[k]]; 1523 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1524 ncols_upper = 0; 1525 for (j=0; j<ncols; j++){ 1526 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1527 if (riip[i] >= k){ /* only take upper triangular entry */ 1528 ajtmp[ncols_upper] = i; 1529 ncols_upper++; 1530 } 1531 } 1532 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1533 nzk += nlnk; 1534 1535 /* update lnk by computing fill-in for each pivot row to be merged in */ 1536 prow = jl[k]; /* 1st pivot row */ 1537 1538 while (prow < k){ 1539 nextprow = jl[prow]; 1540 1541 /* merge prow into k-th row */ 1542 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1543 jmax = ui[prow+1]; 1544 ncols = jmax-jmin; 1545 i = jmin - ui[prow]; 1546 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1547 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1548 j = *(uj - 1); 1549 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1550 nzk += nlnk; 1551 1552 /* update il and jl for prow */ 1553 if (jmin < jmax){ 1554 il[prow] = jmin; 1555 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1556 } 1557 prow = nextprow; 1558 } 1559 1560 /* if free space is not available, make more free space */ 1561 if (current_space->local_remaining<nzk) { 1562 i = am - k + 1; /* num of unfactored rows */ 1563 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1564 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1565 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1566 reallocs++; 1567 } 1568 1569 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1570 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1571 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1572 1573 /* add the k-th row into il and jl */ 1574 if (nzk > 1){ 1575 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1576 jl[k] = jl[i]; jl[i] = k; 1577 il[k] = ui[k] + 1; 1578 } 1579 uj_ptr[k] = current_space->array; 1580 uj_lvl_ptr[k] = current_space_lvl->array; 1581 1582 current_space->array += nzk; 1583 current_space->local_used += nzk; 1584 current_space->local_remaining -= nzk; 1585 1586 current_space_lvl->array += nzk; 1587 current_space_lvl->local_used += nzk; 1588 current_space_lvl->local_remaining -= nzk; 1589 1590 ui[k+1] = ui[k] + nzk; 1591 } 1592 1593 #if defined(PETSC_USE_INFO) 1594 if (ai[am] != 0) { 1595 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1596 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1597 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1598 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1599 } else { 1600 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1601 } 1602 #endif 1603 1604 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1605 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1606 ierr = PetscFree(jl);CHKERRQ(ierr); 1607 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1608 1609 /* destroy list of free space and other temporary array(s) */ 1610 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1611 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1612 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1613 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1614 1615 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1616 1617 /* put together the new matrix in MATSEQSBAIJ format */ 1618 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1619 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1620 B = *fact; 1621 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1622 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1623 1624 b = (Mat_SeqSBAIJ*)B->data; 1625 b->singlemalloc = PETSC_FALSE; 1626 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1627 b->j = uj; 1628 b->i = ui; 1629 b->diag = 0; 1630 b->ilen = 0; 1631 b->imax = 0; 1632 b->row = perm; 1633 b->col = perm; 1634 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1635 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1636 b->icol = iperm; 1637 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1638 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1639 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1640 b->maxnz = b->nz = ui[am]; 1641 b->free_a = PETSC_TRUE; 1642 b->free_ij = PETSC_TRUE; 1643 1644 B->factor = FACTOR_CHOLESKY; 1645 B->info.factor_mallocs = reallocs; 1646 B->info.fill_ratio_given = fill; 1647 if (ai[am] != 0) { 1648 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1649 } else { 1650 B->info.fill_ratio_needed = 0.0; 1651 } 1652 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1653 if (perm_identity){ 1654 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1655 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1656 } 1657 PetscFunctionReturn(0); 1658 } 1659 1660 #undef __FUNCT__ 1661 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1662 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1663 { 1664 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1665 Mat_SeqSBAIJ *b; 1666 Mat B; 1667 PetscErrorCode ierr; 1668 PetscTruth perm_identity; 1669 PetscReal fill = info->fill; 1670 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1671 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1672 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1673 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1674 PetscBT lnkbt; 1675 IS iperm; 1676 1677 PetscFunctionBegin; 1678 /* check whether perm is the identity mapping */ 1679 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1680 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1681 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1682 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1683 1684 /* initialization */ 1685 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1686 ui[0] = 0; 1687 1688 /* jl: linked list for storing indices of the pivot rows 1689 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1690 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1691 il = jl + am; 1692 cols = il + am; 1693 ui_ptr = (PetscInt**)(cols + am); 1694 for (i=0; i<am; i++){ 1695 jl[i] = am; il[i] = 0; 1696 } 1697 1698 /* create and initialize a linked list for storing column indices of the active row k */ 1699 nlnk = am + 1; 1700 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1701 1702 /* initial FreeSpace size is fill*(ai[am]+1) */ 1703 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1704 current_space = free_space; 1705 1706 for (k=0; k<am; k++){ /* for each active row k */ 1707 /* initialize lnk by the column indices of row rip[k] of A */ 1708 nzk = 0; 1709 ncols = ai[rip[k]+1] - ai[rip[k]]; 1710 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1711 ncols_upper = 0; 1712 for (j=0; j<ncols; j++){ 1713 i = riip[*(aj + ai[rip[k]] + j)]; 1714 if (i >= k){ /* only take upper triangular entry */ 1715 cols[ncols_upper] = i; 1716 ncols_upper++; 1717 } 1718 } 1719 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1720 nzk += nlnk; 1721 1722 /* update lnk by computing fill-in for each pivot row to be merged in */ 1723 prow = jl[k]; /* 1st pivot row */ 1724 1725 while (prow < k){ 1726 nextprow = jl[prow]; 1727 /* merge prow into k-th row */ 1728 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1729 jmax = ui[prow+1]; 1730 ncols = jmax-jmin; 1731 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1732 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1733 nzk += nlnk; 1734 1735 /* update il and jl for prow */ 1736 if (jmin < jmax){ 1737 il[prow] = jmin; 1738 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1739 } 1740 prow = nextprow; 1741 } 1742 1743 /* if free space is not available, make more free space */ 1744 if (current_space->local_remaining<nzk) { 1745 i = am - k + 1; /* num of unfactored rows */ 1746 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1747 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1748 reallocs++; 1749 } 1750 1751 /* copy data into free space, then initialize lnk */ 1752 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1753 1754 /* add the k-th row into il and jl */ 1755 if (nzk-1 > 0){ 1756 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1757 jl[k] = jl[i]; jl[i] = k; 1758 il[k] = ui[k] + 1; 1759 } 1760 ui_ptr[k] = current_space->array; 1761 current_space->array += nzk; 1762 current_space->local_used += nzk; 1763 current_space->local_remaining -= nzk; 1764 1765 ui[k+1] = ui[k] + nzk; 1766 } 1767 1768 #if defined(PETSC_USE_INFO) 1769 if (ai[am] != 0) { 1770 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1771 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1772 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1773 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1774 } else { 1775 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1776 } 1777 #endif 1778 1779 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1780 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1781 ierr = PetscFree(jl);CHKERRQ(ierr); 1782 1783 /* destroy list of free space and other temporary array(s) */ 1784 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1785 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1786 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1787 1788 /* put together the new matrix in MATSEQSBAIJ format */ 1789 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1790 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1791 B = *fact; 1792 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1793 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1794 1795 b = (Mat_SeqSBAIJ*)B->data; 1796 b->singlemalloc = PETSC_FALSE; 1797 b->free_a = PETSC_TRUE; 1798 b->free_ij = PETSC_TRUE; 1799 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1800 b->j = uj; 1801 b->i = ui; 1802 b->diag = 0; 1803 b->ilen = 0; 1804 b->imax = 0; 1805 b->row = perm; 1806 b->col = perm; 1807 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1808 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1809 b->icol = iperm; 1810 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1811 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1812 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1813 b->maxnz = b->nz = ui[am]; 1814 1815 B->factor = FACTOR_CHOLESKY; 1816 B->info.factor_mallocs = reallocs; 1817 B->info.fill_ratio_given = fill; 1818 if (ai[am] != 0) { 1819 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1820 } else { 1821 B->info.fill_ratio_needed = 0.0; 1822 } 1823 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1824 if (perm_identity){ 1825 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1826 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1827 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1828 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1829 } else { 1830 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1; 1831 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1832 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1833 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1834 } 1835 PetscFunctionReturn(0); 1836 } 1837