1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 260 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 261 { 262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 263 IS isicol; 264 PetscErrorCode ierr; 265 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 266 PetscInt *bi,*bj,*ajtmp; 267 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 268 PetscReal f; 269 PetscInt nlnk,*lnk,k,**bi_ptr; 270 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 271 PetscBT lnkbt; 272 273 PetscFunctionBegin; 274 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 275 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 276 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 277 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 278 279 /* get new row pointers */ 280 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 281 bi[0] = 0; 282 283 /* bdiag is location of diagonal in factor */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 285 bdiag[0] = 0; 286 287 /* linked list for storing column indices of the active row */ 288 nlnk = n + 1; 289 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 290 291 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 292 293 /* initial FreeSpace size is f*(ai[n]+1) */ 294 f = info->fill; 295 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 296 current_space = free_space; 297 298 for (i=0; i<n; i++) { 299 /* copy previous fill into linked list */ 300 nzi = 0; 301 nnz = ai[r[i]+1] - ai[r[i]]; 302 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 303 ajtmp = aj + ai[r[i]]; 304 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 305 nzi += nlnk; 306 307 /* add pivot rows into linked list */ 308 row = lnk[n]; 309 while (row < i) { 310 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 311 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 312 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 313 nzi += nlnk; 314 row = lnk[row]; 315 } 316 bi[i+1] = bi[i] + nzi; 317 im[i] = nzi; 318 319 /* mark bdiag */ 320 nzbd = 0; 321 nnz = nzi; 322 k = lnk[n]; 323 while (nnz-- && k < i){ 324 nzbd++; 325 k = lnk[k]; 326 } 327 bdiag[i] = bi[i] + nzbd; 328 329 /* if free space is not available, make more free space */ 330 if (current_space->local_remaining<nzi) { 331 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 332 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 333 reallocs++; 334 } 335 336 /* copy data into free space, then initialize lnk */ 337 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 338 bi_ptr[i] = current_space->array; 339 current_space->array += nzi; 340 current_space->local_used += nzi; 341 current_space->local_remaining -= nzi; 342 } 343 #if defined(PETSC_USE_INFO) 344 if (ai[n] != 0) { 345 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 346 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 347 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 348 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 349 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 350 } else { 351 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 352 } 353 #endif 354 355 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 356 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 357 358 /* destroy list of free space and other temporary array(s) */ 359 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 360 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 361 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 362 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 363 364 /* put together the new matrix */ 365 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 366 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 367 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 368 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 369 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 370 b = (Mat_SeqAIJ*)(*B)->data; 371 b->free_a = PETSC_TRUE; 372 b->free_ij = PETSC_TRUE; 373 b->singlemalloc = PETSC_FALSE; 374 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 375 b->j = bj; 376 b->i = bi; 377 b->diag = bdiag; 378 b->ilen = 0; 379 b->imax = 0; 380 b->row = isrow; 381 b->col = iscol; 382 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 383 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 384 b->icol = isicol; 385 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 386 387 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 388 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 389 b->maxnz = b->nz = bi[n] ; 390 391 (*B)->factor = FACTOR_LU; 392 (*B)->info.factor_mallocs = reallocs; 393 (*B)->info.fill_ratio_given = f; 394 395 if (ai[n] != 0) { 396 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 397 } else { 398 (*B)->info.fill_ratio_needed = 0.0; 399 } 400 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 401 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 402 PetscFunctionReturn(0); 403 } 404 405 /* 406 Trouble in factorization, should we dump the original matrix? 407 */ 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatFactorDumpMatrix" 410 PetscErrorCode MatFactorDumpMatrix(Mat A) 411 { 412 PetscErrorCode ierr; 413 PetscTruth flg; 414 415 PetscFunctionBegin; 416 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 417 if (flg) { 418 PetscViewer viewer; 419 char filename[PETSC_MAX_PATH_LEN]; 420 421 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 422 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 423 ierr = MatView(A,viewer);CHKERRQ(ierr); 424 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 /* ----------------------------------------------------------- */ 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 432 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 433 { 434 Mat C=*B; 435 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 436 IS isrow = b->row,isicol = b->icol; 437 PetscErrorCode ierr; 438 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 439 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 440 PetscInt *diag_offset = b->diag,diag,*pj; 441 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 442 PetscScalar d; 443 PetscReal rs; 444 LUShift_Ctx sctx; 445 PetscInt newshift,*ddiag; 446 447 PetscFunctionBegin; 448 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 449 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 450 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 451 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 452 rtmps = rtmp; ics = ic; 453 454 sctx.shift_top = 0; 455 sctx.nshift_max = 0; 456 sctx.shift_lo = 0; 457 sctx.shift_hi = 0; 458 459 /* if both shift schemes are chosen by user, only use info->shiftpd */ 460 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 461 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 462 PetscInt *aai = a->i; 463 ddiag = a->diag; 464 sctx.shift_top = 0; 465 for (i=0; i<n; i++) { 466 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 467 d = (a->a)[ddiag[i]]; 468 rs = -PetscAbsScalar(d) - PetscRealPart(d); 469 v = a->a+aai[i]; 470 nz = aai[i+1] - aai[i]; 471 for (j=0; j<nz; j++) 472 rs += PetscAbsScalar(v[j]); 473 if (rs>sctx.shift_top) sctx.shift_top = rs; 474 } 475 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 476 sctx.shift_top *= 1.1; 477 sctx.nshift_max = 5; 478 sctx.shift_lo = 0.; 479 sctx.shift_hi = 1.; 480 } 481 482 sctx.shift_amount = 0; 483 sctx.nshift = 0; 484 do { 485 sctx.lushift = PETSC_FALSE; 486 for (i=0; i<n; i++){ 487 nz = bi[i+1] - bi[i]; 488 bjtmp = bj + bi[i]; 489 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 490 491 /* load in initial (unfactored row) */ 492 nz = a->i[r[i]+1] - a->i[r[i]]; 493 ajtmp = a->j + a->i[r[i]]; 494 v = a->a + a->i[r[i]]; 495 for (j=0; j<nz; j++) { 496 rtmp[ics[ajtmp[j]]] = v[j]; 497 } 498 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 499 500 row = *bjtmp++; 501 while (row < i) { 502 pc = rtmp + row; 503 if (*pc != 0.0) { 504 pv = b->a + diag_offset[row]; 505 pj = b->j + diag_offset[row] + 1; 506 multiplier = *pc / *pv++; 507 *pc = multiplier; 508 nz = bi[row+1] - diag_offset[row] - 1; 509 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 510 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 511 } 512 row = *bjtmp++; 513 } 514 /* finished row so stick it into b->a */ 515 pv = b->a + bi[i] ; 516 pj = b->j + bi[i] ; 517 nz = bi[i+1] - bi[i]; 518 diag = diag_offset[i] - bi[i]; 519 rs = 0.0; 520 for (j=0; j<nz; j++) { 521 pv[j] = rtmps[pj[j]]; 522 if (j != diag) rs += PetscAbsScalar(pv[j]); 523 } 524 525 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 526 sctx.rs = rs; 527 sctx.pv = pv[diag]; 528 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 529 if (newshift == 1) break; 530 } 531 532 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 533 /* 534 * if no shift in this attempt & shifting & started shifting & can refine, 535 * then try lower shift 536 */ 537 sctx.shift_hi = info->shift_fraction; 538 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 539 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 540 sctx.lushift = PETSC_TRUE; 541 sctx.nshift++; 542 } 543 } while (sctx.lushift); 544 545 /* invert diagonal entries for simplier triangular solves */ 546 for (i=0; i<n; i++) { 547 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 548 } 549 550 ierr = PetscFree(rtmp);CHKERRQ(ierr); 551 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 553 C->factor = FACTOR_LU; 554 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 555 C->assembled = PETSC_TRUE; 556 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 557 if (sctx.nshift){ 558 if (info->shiftnz) { 559 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 560 } else if (info->shiftpd) { 561 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 562 } 563 } 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 This routine implements inplace ILU(0) with row or/and column permutations. 569 Input: 570 A - original matrix 571 Output; 572 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 573 a->j (col index) is permuted by the inverse of colperm, then sorted 574 a->a reordered accordingly with a->j 575 a->diag (ptr to diagonal elements) is updated. 576 */ 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 579 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 580 { 581 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 582 IS isrow = a->row,isicol = a->icol; 583 PetscErrorCode ierr; 584 PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j; 585 PetscInt *ajtmp,nz,row,*ics; 586 PetscInt *diag = a->diag,nbdiag,*pj; 587 PetscScalar *rtmp,*v,*pc,multiplier,*pv,d; 588 PetscReal rs; 589 LUShift_Ctx sctx; 590 PetscInt newshift; 591 592 PetscFunctionBegin; 593 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 594 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 595 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 596 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 597 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 598 ics = ic; 599 600 sctx.shift_top = 0; 601 sctx.nshift_max = 0; 602 sctx.shift_lo = 0; 603 sctx.shift_hi = 0; 604 605 /* if both shift schemes are chosen by user, only use info->shiftpd */ 606 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 607 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 608 sctx.shift_top = 0; 609 for (i=0; i<n; i++) { 610 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 611 d = (a->a)[diag[i]]; 612 rs = -PetscAbsScalar(d) - PetscRealPart(d); 613 v = a->a+ai[i]; 614 nz = ai[i+1] - ai[i]; 615 for (j=0; j<nz; j++) 616 rs += PetscAbsScalar(v[j]); 617 if (rs>sctx.shift_top) sctx.shift_top = rs; 618 } 619 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 620 sctx.shift_top *= 1.1; 621 sctx.nshift_max = 5; 622 sctx.shift_lo = 0.; 623 sctx.shift_hi = 1.; 624 } 625 626 sctx.shift_amount = 0; 627 sctx.nshift = 0; 628 do { 629 sctx.lushift = PETSC_FALSE; 630 for (i=0; i<n; i++){ 631 /* load in initial unfactored row */ 632 nz = ai[r[i]+1] - ai[r[i]]; 633 ajtmp = aj + ai[r[i]]; 634 v = a->a + ai[r[i]]; 635 /* sort permuted ajtmp and values v accordingly */ 636 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 637 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 638 639 diag[r[i]] = ai[r[i]]; 640 for (j=0; j<nz; j++) { 641 rtmp[ajtmp[j]] = v[j]; 642 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 643 } 644 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 645 646 row = *ajtmp++; 647 while (row < i) { 648 pc = rtmp + row; 649 if (*pc != 0.0) { 650 pv = a->a + diag[r[row]]; 651 pj = aj + diag[r[row]] + 1; 652 653 multiplier = *pc / *pv++; 654 *pc = multiplier; 655 nz = ai[r[row]+1] - diag[r[row]] - 1; 656 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 657 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 658 } 659 row = *ajtmp++; 660 } 661 /* finished row so overwrite it onto a->a */ 662 pv = a->a + ai[r[i]] ; 663 pj = aj + ai[r[i]] ; 664 nz = ai[r[i]+1] - ai[r[i]]; 665 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 666 667 rs = 0.0; 668 for (j=0; j<nz; j++) { 669 pv[j] = rtmp[pj[j]]; 670 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 671 } 672 673 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 674 sctx.rs = rs; 675 sctx.pv = pv[nbdiag]; 676 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 677 if (newshift == 1) break; 678 } 679 680 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 681 /* 682 * if no shift in this attempt & shifting & started shifting & can refine, 683 * then try lower shift 684 */ 685 sctx.shift_hi = info->shift_fraction; 686 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 687 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 688 sctx.lushift = PETSC_TRUE; 689 sctx.nshift++; 690 } 691 } while (sctx.lushift); 692 693 /* invert diagonal entries for simplier triangular solves */ 694 for (i=0; i<n; i++) { 695 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 696 } 697 698 ierr = PetscFree(rtmp);CHKERRQ(ierr); 699 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 700 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 701 A->factor = FACTOR_LU; 702 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 703 A->assembled = PETSC_TRUE; 704 ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr); 705 if (sctx.nshift){ 706 if (info->shiftnz) { 707 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 708 } else if (info->shiftpd) { 709 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 710 } 711 } 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 717 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 718 { 719 PetscFunctionBegin; 720 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 721 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 722 PetscFunctionReturn(0); 723 } 724 725 726 /* ----------------------------------------------------------- */ 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatLUFactor_SeqAIJ" 729 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 730 { 731 PetscErrorCode ierr; 732 Mat C; 733 734 PetscFunctionBegin; 735 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 736 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 737 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 738 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 /* ----------------------------------------------------------- */ 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatSolve_SeqAIJ" 744 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 745 { 746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 747 IS iscol = a->col,isrow = a->row; 748 PetscErrorCode ierr; 749 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 750 PetscInt nz,*rout,*cout; 751 PetscScalar *x,*tmp,*tmps,*aa = a->a,sum,*v; 752 const PetscScalar *b; 753 754 PetscFunctionBegin; 755 if (!n) PetscFunctionReturn(0); 756 757 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 758 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 759 tmp = a->solve_work; 760 761 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 762 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 763 764 /* forward solve the lower triangular */ 765 tmp[0] = b[*r++]; 766 tmps = tmp; 767 for (i=1; i<n; i++) { 768 v = aa + ai[i] ; 769 vi = aj + ai[i] ; 770 nz = a->diag[i] - ai[i]; 771 sum = b[*r++]; 772 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 773 tmp[i] = sum; 774 } 775 776 /* backward solve the upper triangular */ 777 for (i=n-1; i>=0; i--){ 778 v = aa + a->diag[i] + 1; 779 vi = aj + a->diag[i] + 1; 780 nz = ai[i+1] - a->diag[i] - 1; 781 sum = tmp[i]; 782 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 783 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 784 } 785 786 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 787 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 788 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 789 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 790 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 791 PetscFunctionReturn(0); 792 } 793 794 #undef __FUNCT__ 795 #define __FUNCT__ "MatMatSolve_SeqAIJ" 796 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 797 { 798 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 799 IS iscol = a->col,isrow = a->row; 800 PetscErrorCode ierr; 801 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 802 PetscInt nz,*rout,*cout,neq; 803 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 804 805 PetscFunctionBegin; 806 if (!n) PetscFunctionReturn(0); 807 808 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 809 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 810 811 tmp = a->solve_work; 812 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 813 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 814 815 for (neq=0; neq<n; neq++){ 816 /* forward solve the lower triangular */ 817 tmp[0] = b[r[0]]; 818 tmps = tmp; 819 for (i=1; i<n; i++) { 820 v = aa + ai[i] ; 821 vi = aj + ai[i] ; 822 nz = a->diag[i] - ai[i]; 823 sum = b[r[i]]; 824 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 825 tmp[i] = sum; 826 } 827 /* backward solve the upper triangular */ 828 for (i=n-1; i>=0; i--){ 829 v = aa + a->diag[i] + 1; 830 vi = aj + a->diag[i] + 1; 831 nz = ai[i+1] - a->diag[i] - 1; 832 sum = tmp[i]; 833 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 834 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 835 } 836 837 b += n; 838 x += n; 839 } 840 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 841 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 842 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 843 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 844 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 845 PetscFunctionReturn(0); 846 } 847 848 #undef __FUNCT__ 849 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 850 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 851 { 852 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 853 IS iscol = a->col,isrow = a->row; 854 PetscErrorCode ierr; 855 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 856 PetscInt nz,*rout,*cout,row; 857 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 858 859 PetscFunctionBegin; 860 if (!n) PetscFunctionReturn(0); 861 862 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 863 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 864 tmp = a->solve_work; 865 866 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 867 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 868 869 /* forward solve the lower triangular */ 870 tmp[0] = b[*r++]; 871 tmps = tmp; 872 for (row=1; row<n; row++) { 873 i = rout[row]; /* permuted row */ 874 v = aa + ai[i] ; 875 vi = aj + ai[i] ; 876 nz = a->diag[i] - ai[i]; 877 sum = b[*r++]; 878 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 879 tmp[row] = sum; 880 } 881 882 /* backward solve the upper triangular */ 883 for (row=n-1; row>=0; row--){ 884 i = rout[row]; /* permuted row */ 885 v = aa + a->diag[i] + 1; 886 vi = aj + a->diag[i] + 1; 887 nz = ai[i+1] - a->diag[i] - 1; 888 sum = tmp[row]; 889 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 890 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 891 } 892 893 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 894 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 895 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 896 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 897 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 898 PetscFunctionReturn(0); 899 } 900 901 /* ----------------------------------------------------------- */ 902 #undef __FUNCT__ 903 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 904 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 905 { 906 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 907 PetscErrorCode ierr; 908 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 909 PetscScalar *x,*b,*aa = a->a; 910 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 911 PetscInt adiag_i,i,*vi,nz,ai_i; 912 PetscScalar *v,sum; 913 #endif 914 915 PetscFunctionBegin; 916 if (!n) PetscFunctionReturn(0); 917 918 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 919 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 920 921 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 922 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 923 #else 924 /* forward solve the lower triangular */ 925 x[0] = b[0]; 926 for (i=1; i<n; i++) { 927 ai_i = ai[i]; 928 v = aa + ai_i; 929 vi = aj + ai_i; 930 nz = adiag[i] - ai_i; 931 sum = b[i]; 932 while (nz--) sum -= *v++ * x[*vi++]; 933 x[i] = sum; 934 } 935 936 /* backward solve the upper triangular */ 937 for (i=n-1; i>=0; i--){ 938 adiag_i = adiag[i]; 939 v = aa + adiag_i + 1; 940 vi = aj + adiag_i + 1; 941 nz = ai[i+1] - adiag_i - 1; 942 sum = x[i]; 943 while (nz--) sum -= *v++ * x[*vi++]; 944 x[i] = sum*aa[adiag_i]; 945 } 946 #endif 947 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 948 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 949 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 950 PetscFunctionReturn(0); 951 } 952 953 #undef __FUNCT__ 954 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 955 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 956 { 957 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 958 IS iscol = a->col,isrow = a->row; 959 PetscErrorCode ierr; 960 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 961 PetscInt nz,*rout,*cout; 962 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 963 964 PetscFunctionBegin; 965 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 966 967 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 968 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 969 tmp = a->solve_work; 970 971 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 972 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 973 974 /* forward solve the lower triangular */ 975 tmp[0] = b[*r++]; 976 for (i=1; i<n; i++) { 977 v = aa + ai[i] ; 978 vi = aj + ai[i] ; 979 nz = a->diag[i] - ai[i]; 980 sum = b[*r++]; 981 while (nz--) sum -= *v++ * tmp[*vi++ ]; 982 tmp[i] = sum; 983 } 984 985 /* backward solve the upper triangular */ 986 for (i=n-1; i>=0; i--){ 987 v = aa + a->diag[i] + 1; 988 vi = aj + a->diag[i] + 1; 989 nz = ai[i+1] - a->diag[i] - 1; 990 sum = tmp[i]; 991 while (nz--) sum -= *v++ * tmp[*vi++ ]; 992 tmp[i] = sum*aa[a->diag[i]]; 993 x[*c--] += tmp[i]; 994 } 995 996 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 997 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 998 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 999 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1000 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1001 1002 PetscFunctionReturn(0); 1003 } 1004 /* -------------------------------------------------------------------*/ 1005 #undef __FUNCT__ 1006 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1007 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1008 { 1009 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1010 IS iscol = a->col,isrow = a->row; 1011 PetscErrorCode ierr; 1012 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1013 PetscInt nz,*rout,*cout,*diag = a->diag; 1014 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 1015 1016 PetscFunctionBegin; 1017 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1018 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1019 tmp = a->solve_work; 1020 1021 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1022 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1023 1024 /* copy the b into temp work space according to permutation */ 1025 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1026 1027 /* forward solve the U^T */ 1028 for (i=0; i<n; i++) { 1029 v = aa + diag[i] ; 1030 vi = aj + diag[i] + 1; 1031 nz = ai[i+1] - diag[i] - 1; 1032 s1 = tmp[i]; 1033 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1034 while (nz--) { 1035 tmp[*vi++ ] -= (*v++)*s1; 1036 } 1037 tmp[i] = s1; 1038 } 1039 1040 /* backward solve the L^T */ 1041 for (i=n-1; i>=0; i--){ 1042 v = aa + diag[i] - 1 ; 1043 vi = aj + diag[i] - 1 ; 1044 nz = diag[i] - ai[i]; 1045 s1 = tmp[i]; 1046 while (nz--) { 1047 tmp[*vi-- ] -= (*v--)*s1; 1048 } 1049 } 1050 1051 /* copy tmp into x according to permutation */ 1052 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1053 1054 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1055 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1056 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1057 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1058 1059 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 1060 PetscFunctionReturn(0); 1061 } 1062 1063 #undef __FUNCT__ 1064 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1065 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1066 { 1067 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1068 IS iscol = a->col,isrow = a->row; 1069 PetscErrorCode ierr; 1070 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1071 PetscInt nz,*rout,*cout,*diag = a->diag; 1072 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 1073 1074 PetscFunctionBegin; 1075 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1076 1077 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1078 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1079 tmp = a->solve_work; 1080 1081 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1082 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1083 1084 /* copy the b into temp work space according to permutation */ 1085 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1086 1087 /* forward solve the U^T */ 1088 for (i=0; i<n; i++) { 1089 v = aa + diag[i] ; 1090 vi = aj + diag[i] + 1; 1091 nz = ai[i+1] - diag[i] - 1; 1092 tmp[i] *= *v++; 1093 while (nz--) { 1094 tmp[*vi++ ] -= (*v++)*tmp[i]; 1095 } 1096 } 1097 1098 /* backward solve the L^T */ 1099 for (i=n-1; i>=0; i--){ 1100 v = aa + diag[i] - 1 ; 1101 vi = aj + diag[i] - 1 ; 1102 nz = diag[i] - ai[i]; 1103 while (nz--) { 1104 tmp[*vi-- ] -= (*v--)*tmp[i]; 1105 } 1106 } 1107 1108 /* copy tmp into x according to permutation */ 1109 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1110 1111 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1112 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1113 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1114 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1115 1116 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 /* ----------------------------------------------------------------*/ 1120 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1121 1122 #undef __FUNCT__ 1123 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1124 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1125 { 1126 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1127 IS isicol; 1128 PetscErrorCode ierr; 1129 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 1130 PetscInt *bi,*cols,nnz,*cols_lvl; 1131 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1132 PetscInt i,levels,diagonal_fill; 1133 PetscTruth col_identity,row_identity; 1134 PetscReal f; 1135 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1136 PetscBT lnkbt; 1137 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1138 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1139 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1140 PetscTruth missing; 1141 1142 PetscFunctionBegin; 1143 f = info->fill; 1144 levels = (PetscInt)info->levels; 1145 diagonal_fill = (PetscInt)info->diagonal_fill; 1146 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1147 1148 /* special case that simply copies fill pattern */ 1149 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1150 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1151 if (!levels && row_identity && col_identity) { 1152 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1153 (*fact)->factor = FACTOR_LU; 1154 (*fact)->info.factor_mallocs = 0; 1155 (*fact)->info.fill_ratio_given = info->fill; 1156 (*fact)->info.fill_ratio_needed = 1.0; 1157 b = (Mat_SeqAIJ*)(*fact)->data; 1158 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1159 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1160 b->row = isrow; 1161 b->col = iscol; 1162 b->icol = isicol; 1163 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1164 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1165 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1166 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1167 ierr = Mat_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1168 PetscFunctionReturn(0); 1169 } 1170 1171 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1172 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1173 1174 /* get new row pointers */ 1175 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1176 bi[0] = 0; 1177 /* bdiag is location of diagonal in factor */ 1178 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1179 bdiag[0] = 0; 1180 1181 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1182 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1183 1184 /* create a linked list for storing column indices of the active row */ 1185 nlnk = n + 1; 1186 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1187 1188 /* initial FreeSpace size is f*(ai[n]+1) */ 1189 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1190 current_space = free_space; 1191 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1192 current_space_lvl = free_space_lvl; 1193 1194 for (i=0; i<n; i++) { 1195 nzi = 0; 1196 /* copy current row into linked list */ 1197 nnz = ai[r[i]+1] - ai[r[i]]; 1198 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1199 cols = aj + ai[r[i]]; 1200 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1201 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1202 nzi += nlnk; 1203 1204 /* make sure diagonal entry is included */ 1205 if (diagonal_fill && lnk[i] == -1) { 1206 fm = n; 1207 while (lnk[fm] < i) fm = lnk[fm]; 1208 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1209 lnk[fm] = i; 1210 lnk_lvl[i] = 0; 1211 nzi++; dcount++; 1212 } 1213 1214 /* add pivot rows into the active row */ 1215 nzbd = 0; 1216 prow = lnk[n]; 1217 while (prow < i) { 1218 nnz = bdiag[prow]; 1219 cols = bj_ptr[prow] + nnz + 1; 1220 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1221 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1222 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1223 nzi += nlnk; 1224 prow = lnk[prow]; 1225 nzbd++; 1226 } 1227 bdiag[i] = nzbd; 1228 bi[i+1] = bi[i] + nzi; 1229 1230 /* if free space is not available, make more free space */ 1231 if (current_space->local_remaining<nzi) { 1232 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1233 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1234 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1235 reallocs++; 1236 } 1237 1238 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1239 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1240 bj_ptr[i] = current_space->array; 1241 bjlvl_ptr[i] = current_space_lvl->array; 1242 1243 /* make sure the active row i has diagonal entry */ 1244 if (*(bj_ptr[i]+bdiag[i]) != i) { 1245 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1246 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1247 } 1248 1249 current_space->array += nzi; 1250 current_space->local_used += nzi; 1251 current_space->local_remaining -= nzi; 1252 current_space_lvl->array += nzi; 1253 current_space_lvl->local_used += nzi; 1254 current_space_lvl->local_remaining -= nzi; 1255 } 1256 1257 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1258 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1259 1260 /* destroy list of free space and other temporary arrays */ 1261 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1262 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1263 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1264 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1265 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1266 1267 #if defined(PETSC_USE_INFO) 1268 { 1269 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1270 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1271 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1272 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1273 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1274 if (diagonal_fill) { 1275 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1276 } 1277 } 1278 #endif 1279 1280 /* put together the new matrix */ 1281 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 1282 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1283 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 1284 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1285 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1286 b = (Mat_SeqAIJ*)(*fact)->data; 1287 b->free_a = PETSC_TRUE; 1288 b->free_ij = PETSC_TRUE; 1289 b->singlemalloc = PETSC_FALSE; 1290 len = (bi[n] )*sizeof(PetscScalar); 1291 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1292 b->j = bj; 1293 b->i = bi; 1294 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1295 b->diag = bdiag; 1296 b->ilen = 0; 1297 b->imax = 0; 1298 b->row = isrow; 1299 b->col = iscol; 1300 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1301 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1302 b->icol = isicol; 1303 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1304 /* In b structure: Free imax, ilen, old a, old j. 1305 Allocate bdiag, solve_work, new a, new j */ 1306 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1307 b->maxnz = b->nz = bi[n] ; 1308 (*fact)->factor = FACTOR_LU; 1309 (*fact)->info.factor_mallocs = reallocs; 1310 (*fact)->info.fill_ratio_given = f; 1311 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1312 1313 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1314 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1315 1316 PetscFunctionReturn(0); 1317 } 1318 1319 #include "src/mat/impls/sbaij/seq/sbaij.h" 1320 #undef __FUNCT__ 1321 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1322 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1323 { 1324 Mat C = *B; 1325 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1326 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1327 IS ip=b->row,iip = b->icol; 1328 PetscErrorCode ierr; 1329 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1330 PetscInt *ai=a->i,*aj=a->j; 1331 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1332 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1333 PetscReal zeropivot,rs,shiftnz; 1334 PetscReal shiftpd; 1335 ChShift_Ctx sctx; 1336 PetscInt newshift; 1337 1338 PetscFunctionBegin; 1339 1340 shiftnz = info->shiftnz; 1341 shiftpd = info->shiftpd; 1342 zeropivot = info->zeropivot; 1343 1344 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1345 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1346 1347 /* initialization */ 1348 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1349 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1350 jl = il + mbs; 1351 rtmp = (MatScalar*)(jl + mbs); 1352 1353 sctx.shift_amount = 0; 1354 sctx.nshift = 0; 1355 do { 1356 sctx.chshift = PETSC_FALSE; 1357 for (i=0; i<mbs; i++) { 1358 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1359 } 1360 1361 for (k = 0; k<mbs; k++){ 1362 bval = ba + bi[k]; 1363 /* initialize k-th row by the perm[k]-th row of A */ 1364 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1365 for (j = jmin; j < jmax; j++){ 1366 col = riip[aj[j]]; 1367 if (col >= k){ /* only take upper triangular entry */ 1368 rtmp[col] = aa[j]; 1369 *bval++ = 0.0; /* for in-place factorization */ 1370 } 1371 } 1372 /* shift the diagonal of the matrix */ 1373 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1374 1375 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1376 dk = rtmp[k]; 1377 i = jl[k]; /* first row to be added to k_th row */ 1378 1379 while (i < k){ 1380 nexti = jl[i]; /* next row to be added to k_th row */ 1381 1382 /* compute multiplier, update diag(k) and U(i,k) */ 1383 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1384 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1385 dk += uikdi*ba[ili]; 1386 ba[ili] = uikdi; /* -U(i,k) */ 1387 1388 /* add multiple of row i to k-th row */ 1389 jmin = ili + 1; jmax = bi[i+1]; 1390 if (jmin < jmax){ 1391 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1392 /* update il and jl for row i */ 1393 il[i] = jmin; 1394 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1395 } 1396 i = nexti; 1397 } 1398 1399 /* shift the diagonals when zero pivot is detected */ 1400 /* compute rs=sum of abs(off-diagonal) */ 1401 rs = 0.0; 1402 jmin = bi[k]+1; 1403 nz = bi[k+1] - jmin; 1404 bcol = bj + jmin; 1405 while (nz--){ 1406 rs += PetscAbsScalar(rtmp[*bcol]); 1407 bcol++; 1408 } 1409 1410 sctx.rs = rs; 1411 sctx.pv = dk; 1412 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1413 1414 if (newshift == 1) { 1415 if (!sctx.shift_amount) { 1416 sctx.shift_amount = 1e-5; 1417 } 1418 break; 1419 } 1420 1421 /* copy data into U(k,:) */ 1422 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1423 jmin = bi[k]+1; jmax = bi[k+1]; 1424 if (jmin < jmax) { 1425 for (j=jmin; j<jmax; j++){ 1426 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1427 } 1428 /* add the k-th row into il and jl */ 1429 il[k] = jmin; 1430 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1431 } 1432 } 1433 } while (sctx.chshift); 1434 ierr = PetscFree(il);CHKERRQ(ierr); 1435 1436 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1437 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1438 C->factor = FACTOR_CHOLESKY; 1439 C->assembled = PETSC_TRUE; 1440 C->preallocated = PETSC_TRUE; 1441 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1442 if (sctx.nshift){ 1443 if (shiftnz) { 1444 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1445 } else if (shiftpd) { 1446 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1447 } 1448 } 1449 PetscFunctionReturn(0); 1450 } 1451 1452 #undef __FUNCT__ 1453 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1454 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1455 { 1456 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1457 Mat_SeqSBAIJ *b; 1458 Mat B; 1459 PetscErrorCode ierr; 1460 PetscTruth perm_identity,missing; 1461 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1462 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1463 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1464 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1465 PetscReal fill=info->fill,levels=info->levels; 1466 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1467 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1468 PetscBT lnkbt; 1469 IS iperm; 1470 1471 PetscFunctionBegin; 1472 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1473 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1474 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1475 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1476 1477 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1478 ui[0] = 0; 1479 1480 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1481 if (!levels && perm_identity) { 1482 1483 for (i=0; i<am; i++) { 1484 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1485 } 1486 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1487 cols = uj; 1488 for (i=0; i<am; i++) { 1489 aj = a->j + a->diag[i]; 1490 ncols = ui[i+1] - ui[i]; 1491 for (j=0; j<ncols; j++) *cols++ = *aj++; 1492 } 1493 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1494 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1495 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1496 1497 /* initialization */ 1498 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1499 1500 /* jl: linked list for storing indices of the pivot rows 1501 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1502 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1503 il = jl + am; 1504 uj_ptr = (PetscInt**)(il + am); 1505 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1506 for (i=0; i<am; i++){ 1507 jl[i] = am; il[i] = 0; 1508 } 1509 1510 /* create and initialize a linked list for storing column indices of the active row k */ 1511 nlnk = am + 1; 1512 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1513 1514 /* initial FreeSpace size is fill*(ai[am]+1) */ 1515 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1516 current_space = free_space; 1517 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1518 current_space_lvl = free_space_lvl; 1519 1520 for (k=0; k<am; k++){ /* for each active row k */ 1521 /* initialize lnk by the column indices of row rip[k] of A */ 1522 nzk = 0; 1523 ncols = ai[rip[k]+1] - ai[rip[k]]; 1524 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1525 ncols_upper = 0; 1526 for (j=0; j<ncols; j++){ 1527 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1528 if (riip[i] >= k){ /* only take upper triangular entry */ 1529 ajtmp[ncols_upper] = i; 1530 ncols_upper++; 1531 } 1532 } 1533 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1534 nzk += nlnk; 1535 1536 /* update lnk by computing fill-in for each pivot row to be merged in */ 1537 prow = jl[k]; /* 1st pivot row */ 1538 1539 while (prow < k){ 1540 nextprow = jl[prow]; 1541 1542 /* merge prow into k-th row */ 1543 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1544 jmax = ui[prow+1]; 1545 ncols = jmax-jmin; 1546 i = jmin - ui[prow]; 1547 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1548 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1549 j = *(uj - 1); 1550 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1551 nzk += nlnk; 1552 1553 /* update il and jl for prow */ 1554 if (jmin < jmax){ 1555 il[prow] = jmin; 1556 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1557 } 1558 prow = nextprow; 1559 } 1560 1561 /* if free space is not available, make more free space */ 1562 if (current_space->local_remaining<nzk) { 1563 i = am - k + 1; /* num of unfactored rows */ 1564 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1565 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1566 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1567 reallocs++; 1568 } 1569 1570 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1571 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1572 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1573 1574 /* add the k-th row into il and jl */ 1575 if (nzk > 1){ 1576 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1577 jl[k] = jl[i]; jl[i] = k; 1578 il[k] = ui[k] + 1; 1579 } 1580 uj_ptr[k] = current_space->array; 1581 uj_lvl_ptr[k] = current_space_lvl->array; 1582 1583 current_space->array += nzk; 1584 current_space->local_used += nzk; 1585 current_space->local_remaining -= nzk; 1586 1587 current_space_lvl->array += nzk; 1588 current_space_lvl->local_used += nzk; 1589 current_space_lvl->local_remaining -= nzk; 1590 1591 ui[k+1] = ui[k] + nzk; 1592 } 1593 1594 #if defined(PETSC_USE_INFO) 1595 if (ai[am] != 0) { 1596 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1597 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1598 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1599 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1600 } else { 1601 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1602 } 1603 #endif 1604 1605 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1606 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1607 ierr = PetscFree(jl);CHKERRQ(ierr); 1608 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1609 1610 /* destroy list of free space and other temporary array(s) */ 1611 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1612 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1613 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1614 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1615 1616 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1617 1618 /* put together the new matrix in MATSEQSBAIJ format */ 1619 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1620 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1621 B = *fact; 1622 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1623 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1624 1625 b = (Mat_SeqSBAIJ*)B->data; 1626 b->singlemalloc = PETSC_FALSE; 1627 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1628 b->j = uj; 1629 b->i = ui; 1630 b->diag = 0; 1631 b->ilen = 0; 1632 b->imax = 0; 1633 b->row = perm; 1634 b->col = perm; 1635 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1636 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1637 b->icol = iperm; 1638 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1639 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1640 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1641 b->maxnz = b->nz = ui[am]; 1642 b->free_a = PETSC_TRUE; 1643 b->free_ij = PETSC_TRUE; 1644 1645 B->factor = FACTOR_CHOLESKY; 1646 B->info.factor_mallocs = reallocs; 1647 B->info.fill_ratio_given = fill; 1648 if (ai[am] != 0) { 1649 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1650 } else { 1651 B->info.fill_ratio_needed = 0.0; 1652 } 1653 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1654 if (perm_identity){ 1655 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1656 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1657 } 1658 PetscFunctionReturn(0); 1659 } 1660 1661 #undef __FUNCT__ 1662 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1663 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1664 { 1665 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1666 Mat_SeqSBAIJ *b; 1667 Mat B; 1668 PetscErrorCode ierr; 1669 PetscTruth perm_identity; 1670 PetscReal fill = info->fill; 1671 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1672 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1673 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1674 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1675 PetscBT lnkbt; 1676 IS iperm; 1677 1678 PetscFunctionBegin; 1679 /* check whether perm is the identity mapping */ 1680 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1681 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1682 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1683 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1684 1685 /* initialization */ 1686 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1687 ui[0] = 0; 1688 1689 /* jl: linked list for storing indices of the pivot rows 1690 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1691 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1692 il = jl + am; 1693 cols = il + am; 1694 ui_ptr = (PetscInt**)(cols + am); 1695 for (i=0; i<am; i++){ 1696 jl[i] = am; il[i] = 0; 1697 } 1698 1699 /* create and initialize a linked list for storing column indices of the active row k */ 1700 nlnk = am + 1; 1701 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1702 1703 /* initial FreeSpace size is fill*(ai[am]+1) */ 1704 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1705 current_space = free_space; 1706 1707 for (k=0; k<am; k++){ /* for each active row k */ 1708 /* initialize lnk by the column indices of row rip[k] of A */ 1709 nzk = 0; 1710 ncols = ai[rip[k]+1] - ai[rip[k]]; 1711 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1712 ncols_upper = 0; 1713 for (j=0; j<ncols; j++){ 1714 i = riip[*(aj + ai[rip[k]] + j)]; 1715 if (i >= k){ /* only take upper triangular entry */ 1716 cols[ncols_upper] = i; 1717 ncols_upper++; 1718 } 1719 } 1720 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1721 nzk += nlnk; 1722 1723 /* update lnk by computing fill-in for each pivot row to be merged in */ 1724 prow = jl[k]; /* 1st pivot row */ 1725 1726 while (prow < k){ 1727 nextprow = jl[prow]; 1728 /* merge prow into k-th row */ 1729 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1730 jmax = ui[prow+1]; 1731 ncols = jmax-jmin; 1732 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1733 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1734 nzk += nlnk; 1735 1736 /* update il and jl for prow */ 1737 if (jmin < jmax){ 1738 il[prow] = jmin; 1739 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1740 } 1741 prow = nextprow; 1742 } 1743 1744 /* if free space is not available, make more free space */ 1745 if (current_space->local_remaining<nzk) { 1746 i = am - k + 1; /* num of unfactored rows */ 1747 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1748 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1749 reallocs++; 1750 } 1751 1752 /* copy data into free space, then initialize lnk */ 1753 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1754 1755 /* add the k-th row into il and jl */ 1756 if (nzk-1 > 0){ 1757 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1758 jl[k] = jl[i]; jl[i] = k; 1759 il[k] = ui[k] + 1; 1760 } 1761 ui_ptr[k] = current_space->array; 1762 current_space->array += nzk; 1763 current_space->local_used += nzk; 1764 current_space->local_remaining -= nzk; 1765 1766 ui[k+1] = ui[k] + nzk; 1767 } 1768 1769 #if defined(PETSC_USE_INFO) 1770 if (ai[am] != 0) { 1771 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1772 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1773 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1774 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1775 } else { 1776 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1777 } 1778 #endif 1779 1780 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1781 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1782 ierr = PetscFree(jl);CHKERRQ(ierr); 1783 1784 /* destroy list of free space and other temporary array(s) */ 1785 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1786 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1787 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1788 1789 /* put together the new matrix in MATSEQSBAIJ format */ 1790 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1791 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1792 B = *fact; 1793 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1794 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1795 1796 b = (Mat_SeqSBAIJ*)B->data; 1797 b->singlemalloc = PETSC_FALSE; 1798 b->free_a = PETSC_TRUE; 1799 b->free_ij = PETSC_TRUE; 1800 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1801 b->j = uj; 1802 b->i = ui; 1803 b->diag = 0; 1804 b->ilen = 0; 1805 b->imax = 0; 1806 b->row = perm; 1807 b->col = perm; 1808 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1809 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1810 b->icol = iperm; 1811 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1812 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1813 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1814 b->maxnz = b->nz = ui[am]; 1815 1816 B->factor = FACTOR_CHOLESKY; 1817 B->info.factor_mallocs = reallocs; 1818 B->info.fill_ratio_given = fill; 1819 if (ai[am] != 0) { 1820 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1821 } else { 1822 B->info.fill_ratio_needed = 0.0; 1823 } 1824 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1825 if (perm_identity){ 1826 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1827 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1828 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1829 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1830 } else { 1831 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1; 1832 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1833 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1834 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1835 } 1836 PetscFunctionReturn(0); 1837 } 1838