1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->sorted = PETSC_FALSE; 233 b->singlemalloc = PETSC_FALSE; 234 b->a = new_a; 235 b->j = new_j; 236 b->i = new_i; 237 b->ilen = 0; 238 b->imax = 0; 239 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 240 b->row = isirow; 241 b->col = iscolf; 242 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 243 b->maxnz = b->nz = new_i[n]; 244 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 245 (*fact)->info.factor_mallocs = 0; 246 247 af = ((double)b->nz)/((double)a->nz) + .001; 248 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 250 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 251 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 252 253 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 254 255 PetscFunctionReturn(0); 256 #endif 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 261 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 262 { 263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 264 IS isicol; 265 PetscErrorCode ierr; 266 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 267 PetscInt *bi,*bj,*ajtmp; 268 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 269 PetscReal f; 270 PetscInt nlnk,*lnk,k,**bi_ptr; 271 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 272 PetscBT lnkbt; 273 274 PetscFunctionBegin; 275 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 276 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 277 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 278 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 279 280 /* get new row pointers */ 281 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 282 bi[0] = 0; 283 284 /* bdiag is location of diagonal in factor */ 285 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 286 bdiag[0] = 0; 287 288 /* linked list for storing column indices of the active row */ 289 nlnk = n + 1; 290 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 291 292 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 293 294 /* initial FreeSpace size is f*(ai[n]+1) */ 295 f = info->fill; 296 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 297 current_space = free_space; 298 299 for (i=0; i<n; i++) { 300 /* copy previous fill into linked list */ 301 nzi = 0; 302 nnz = ai[r[i]+1] - ai[r[i]]; 303 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 304 ajtmp = aj + ai[r[i]]; 305 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 306 nzi += nlnk; 307 308 /* add pivot rows into linked list */ 309 row = lnk[n]; 310 while (row < i) { 311 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 312 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 313 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 314 nzi += nlnk; 315 row = lnk[row]; 316 } 317 bi[i+1] = bi[i] + nzi; 318 im[i] = nzi; 319 320 /* mark bdiag */ 321 nzbd = 0; 322 nnz = nzi; 323 k = lnk[n]; 324 while (nnz-- && k < i){ 325 nzbd++; 326 k = lnk[k]; 327 } 328 bdiag[i] = bi[i] + nzbd; 329 330 /* if free space is not available, make more free space */ 331 if (current_space->local_remaining<nzi) { 332 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 333 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 334 reallocs++; 335 } 336 337 /* copy data into free space, then initialize lnk */ 338 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 339 bi_ptr[i] = current_space->array; 340 current_space->array += nzi; 341 current_space->local_used += nzi; 342 current_space->local_remaining -= nzi; 343 } 344 #if defined(PETSC_USE_INFO) 345 if (ai[n] != 0) { 346 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 347 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 348 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 349 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 350 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 351 } else { 352 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 353 } 354 #endif 355 356 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 357 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 358 359 /* destroy list of free space and other temporary array(s) */ 360 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 361 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 362 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 363 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 364 365 /* put together the new matrix */ 366 ierr = MatCreate(A->comm,B);CHKERRQ(ierr); 367 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 368 ierr = MatSetType(*B,A->type_name);CHKERRQ(ierr); 369 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 370 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 371 b = (Mat_SeqAIJ*)(*B)->data; 372 b->free_a = PETSC_TRUE; 373 b->free_ij = PETSC_TRUE; 374 b->singlemalloc = PETSC_FALSE; 375 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 376 b->j = bj; 377 b->i = bi; 378 b->diag = bdiag; 379 b->ilen = 0; 380 b->imax = 0; 381 b->row = isrow; 382 b->col = iscol; 383 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 384 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 385 b->icol = isicol; 386 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 387 388 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 389 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 390 b->maxnz = b->nz = bi[n] ; 391 392 (*B)->factor = FACTOR_LU; 393 (*B)->info.factor_mallocs = reallocs; 394 (*B)->info.fill_ratio_given = f; 395 396 if (ai[n] != 0) { 397 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 398 } else { 399 (*B)->info.fill_ratio_needed = 0.0; 400 } 401 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 402 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 403 PetscFunctionReturn(0); 404 } 405 406 /* 407 Trouble in factorization, should we dump the original matrix? 408 */ 409 #undef __FUNCT__ 410 #define __FUNCT__ "MatFactorDumpMatrix" 411 PetscErrorCode MatFactorDumpMatrix(Mat A) 412 { 413 PetscErrorCode ierr; 414 PetscTruth flg; 415 416 PetscFunctionBegin; 417 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 418 if (flg) { 419 PetscViewer viewer; 420 char filename[PETSC_MAX_PATH_LEN]; 421 422 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 423 ierr = PetscViewerBinaryOpen(A->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 424 ierr = MatView(A,viewer);CHKERRQ(ierr); 425 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 426 } 427 PetscFunctionReturn(0); 428 } 429 430 /* ----------------------------------------------------------- */ 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 433 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 434 { 435 Mat C=*B; 436 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 437 IS isrow = b->row,isicol = b->icol; 438 PetscErrorCode ierr; 439 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 440 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 441 PetscInt *diag_offset = b->diag,diag,*pj; 442 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 443 PetscScalar d; 444 PetscReal rs; 445 LUShift_Ctx sctx; 446 PetscInt newshift,*ddiag; 447 448 PetscFunctionBegin; 449 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 450 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 451 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 452 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 453 rtmps = rtmp; ics = ic; 454 455 sctx.shift_top = 0; 456 sctx.nshift_max = 0; 457 sctx.shift_lo = 0; 458 sctx.shift_hi = 0; 459 460 /* if both shift schemes are chosen by user, only use info->shiftpd */ 461 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 462 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 463 PetscInt *aai = a->i; 464 ddiag = a->diag; 465 sctx.shift_top = 0; 466 for (i=0; i<n; i++) { 467 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 468 d = (a->a)[ddiag[i]]; 469 rs = -PetscAbsScalar(d) - PetscRealPart(d); 470 v = a->a+aai[i]; 471 nz = aai[i+1] - aai[i]; 472 for (j=0; j<nz; j++) 473 rs += PetscAbsScalar(v[j]); 474 if (rs>sctx.shift_top) sctx.shift_top = rs; 475 } 476 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 477 sctx.shift_top *= 1.1; 478 sctx.nshift_max = 5; 479 sctx.shift_lo = 0.; 480 sctx.shift_hi = 1.; 481 } 482 483 sctx.shift_amount = 0; 484 sctx.nshift = 0; 485 do { 486 sctx.lushift = PETSC_FALSE; 487 for (i=0; i<n; i++){ 488 nz = bi[i+1] - bi[i]; 489 bjtmp = bj + bi[i]; 490 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 491 492 /* load in initial (unfactored row) */ 493 nz = a->i[r[i]+1] - a->i[r[i]]; 494 ajtmp = a->j + a->i[r[i]]; 495 v = a->a + a->i[r[i]]; 496 for (j=0; j<nz; j++) { 497 rtmp[ics[ajtmp[j]]] = v[j]; 498 } 499 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 500 501 row = *bjtmp++; 502 while (row < i) { 503 pc = rtmp + row; 504 if (*pc != 0.0) { 505 pv = b->a + diag_offset[row]; 506 pj = b->j + diag_offset[row] + 1; 507 multiplier = *pc / *pv++; 508 *pc = multiplier; 509 nz = bi[row+1] - diag_offset[row] - 1; 510 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 511 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 512 } 513 row = *bjtmp++; 514 } 515 /* finished row so stick it into b->a */ 516 pv = b->a + bi[i] ; 517 pj = b->j + bi[i] ; 518 nz = bi[i+1] - bi[i]; 519 diag = diag_offset[i] - bi[i]; 520 rs = 0.0; 521 for (j=0; j<nz; j++) { 522 pv[j] = rtmps[pj[j]]; 523 if (j != diag) rs += PetscAbsScalar(pv[j]); 524 } 525 526 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 527 sctx.rs = rs; 528 sctx.pv = pv[diag]; 529 ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr); 530 if (newshift == 1) break; 531 } 532 533 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 534 /* 535 * if no shift in this attempt & shifting & started shifting & can refine, 536 * then try lower shift 537 */ 538 sctx.shift_hi = info->shift_fraction; 539 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 540 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 541 sctx.lushift = PETSC_TRUE; 542 sctx.nshift++; 543 } 544 } while (sctx.lushift); 545 546 /* invert diagonal entries for simplier triangular solves */ 547 for (i=0; i<n; i++) { 548 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 549 } 550 551 ierr = PetscFree(rtmp);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 553 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 554 C->factor = FACTOR_LU; 555 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 556 C->assembled = PETSC_TRUE; 557 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 558 if (sctx.nshift){ 559 if (info->shiftnz) { 560 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 561 } else if (info->shiftpd) { 562 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 563 } 564 } 565 PetscFunctionReturn(0); 566 } 567 568 /* 569 This routine implements inplace ILU(0) with row or/and column permutations. 570 Input: 571 A - original matrix 572 Output; 573 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 574 a->j (col index) is permuted by the inverse of colperm, then sorted 575 a->a reordered accordingly with a->j 576 a->diag (ptr to diagonal elements) is updated. 577 */ 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 580 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 581 { 582 Mat C=*B; 583 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 584 IS isrow = b->row,isicol = b->icol; 585 PetscErrorCode ierr; 586 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 587 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 588 PetscInt *diag_offset = b->diag,diag,*pj; 589 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 590 PetscScalar d; 591 PetscReal rs; 592 LUShift_Ctx sctx; 593 PetscInt newshift,*ddiag; 594 595 PetscFunctionBegin; 596 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 597 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 598 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 599 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 600 rtmps = rtmp; ics = ic; 601 602 sctx.shift_top = 0; 603 sctx.nshift_max = 0; 604 sctx.shift_lo = 0; 605 sctx.shift_hi = 0; 606 607 /* if both shift schemes are chosen by user, only use info->shiftpd */ 608 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 609 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 610 PetscInt *aai = a->i; 611 ddiag = a->diag; 612 sctx.shift_top = 0; 613 for (i=0; i<n; i++) { 614 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 615 d = (a->a)[ddiag[i]]; 616 rs = -PetscAbsScalar(d) - PetscRealPart(d); 617 v = a->a+aai[i]; 618 nz = aai[i+1] - aai[i]; 619 for (j=0; j<nz; j++) 620 rs += PetscAbsScalar(v[j]); 621 if (rs>sctx.shift_top) sctx.shift_top = rs; 622 } 623 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 624 sctx.shift_top *= 1.1; 625 sctx.nshift_max = 5; 626 sctx.shift_lo = 0.; 627 sctx.shift_hi = 1.; 628 } 629 630 sctx.shift_amount = 0; 631 sctx.nshift = 0; 632 do { 633 sctx.lushift = PETSC_FALSE; 634 for (i=0; i<n; i++){ 635 nz = bi[r[i]+1] - bi[r[i]]; 636 bjtmp = bj + bi[r[i]]; 637 for (j=0; j<nz; j++) rtmps[ics[bjtmp[j]]] = 0.0; /* intializing sorted column */ 638 639 /* load in initial (unfactored row) */ 640 nz = a->i[r[i]+1] - a->i[r[i]]; 641 ajtmp = a->j + a->i[r[i]]; 642 v = a->a + a->i[r[i]]; 643 /* sort permuted ajtmp and values v accordingly */ 644 for (j=0; j<nz; j++) { 645 ajtmp[j] = ics[ajtmp[j]]; 646 } 647 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 648 649 diag_offset[r[i]] = bi[r[i]]; 650 for (j=0; j<nz; j++) { 651 rtmp[ajtmp[j]] = v[j]; 652 if (ajtmp[j] < i) diag_offset[r[i]]++; /* update diag_offset */ 653 } 654 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 655 656 row = *bjtmp++; 657 while (row < i) { 658 pc = rtmp + row; 659 if (*pc != 0.0) { 660 pv = b->a + diag_offset[r[row]]; 661 pj = b->j + diag_offset[r[row]] + 1; 662 663 multiplier = *pc / *pv++; 664 *pc = multiplier; 665 nz = bi[r[row]+1] - diag_offset[r[row]] - 1; 666 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 667 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 668 } 669 row = *bjtmp++; 670 } 671 /* finished row so stick it into b->a */ 672 pv = b->a + bi[r[i]] ; 673 pj = b->j + bi[r[i]] ; 674 nz = bi[r[i]+1] - bi[r[i]]; 675 diag = diag_offset[r[i]] - bi[r[i]]; 676 677 rs = 0.0; 678 for (j=0; j<nz; j++) { 679 pv[j] = rtmps[pj[j]]; 680 if (j != diag) rs += PetscAbsScalar(pv[j]); 681 } 682 683 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 684 sctx.rs = rs; 685 sctx.pv = pv[diag]; 686 ierr = MatLUCheckShift_inline(info,sctx,i,a->diag,newshift);CHKERRQ(ierr); 687 if (newshift == 1) break; 688 } 689 690 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 691 /* 692 * if no shift in this attempt & shifting & started shifting & can refine, 693 * then try lower shift 694 */ 695 sctx.shift_hi = info->shift_fraction; 696 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 697 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 698 sctx.lushift = PETSC_TRUE; 699 sctx.nshift++; 700 } 701 } while (sctx.lushift); 702 703 /* invert diagonal entries for simplier triangular solves */ 704 for (i=0; i<n; i++) { 705 b->a[diag_offset[r[i]]] = 1.0/b->a[diag_offset[r[i]]]; 706 } 707 708 ierr = PetscFree(rtmp);CHKERRQ(ierr); 709 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 710 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 711 C->factor = FACTOR_LU; 712 (*B)->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 713 C->assembled = PETSC_TRUE; 714 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 715 if (sctx.nshift){ 716 if (info->shiftnz) { 717 ierr = PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 718 } else if (info->shiftpd) { 719 ierr = PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 720 } 721 } 722 PetscFunctionReturn(0); 723 } 724 725 #undef __FUNCT__ 726 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 727 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 728 { 729 PetscFunctionBegin; 730 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 731 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 732 PetscFunctionReturn(0); 733 } 734 735 736 /* ----------------------------------------------------------- */ 737 #undef __FUNCT__ 738 #define __FUNCT__ "MatLUFactor_SeqAIJ" 739 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 740 { 741 PetscErrorCode ierr; 742 Mat C; 743 744 PetscFunctionBegin; 745 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 746 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 747 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 748 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 749 PetscFunctionReturn(0); 750 } 751 /* ----------------------------------------------------------- */ 752 #undef __FUNCT__ 753 #define __FUNCT__ "MatSolve_SeqAIJ" 754 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 755 { 756 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 757 IS iscol = a->col,isrow = a->row; 758 PetscErrorCode ierr; 759 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 760 PetscInt nz,*rout,*cout; 761 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 762 763 PetscFunctionBegin; 764 if (!n) PetscFunctionReturn(0); 765 766 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 767 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 768 tmp = a->solve_work; 769 770 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 771 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 772 773 /* forward solve the lower triangular */ 774 tmp[0] = b[*r++]; 775 tmps = tmp; 776 for (i=1; i<n; i++) { 777 v = aa + ai[i] ; 778 vi = aj + ai[i] ; 779 nz = a->diag[i] - ai[i]; 780 sum = b[*r++]; 781 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 782 tmp[i] = sum; 783 } 784 785 /* backward solve the upper triangular */ 786 for (i=n-1; i>=0; i--){ 787 v = aa + a->diag[i] + 1; 788 vi = aj + a->diag[i] + 1; 789 nz = ai[i+1] - a->diag[i] - 1; 790 sum = tmp[i]; 791 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 792 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 793 } 794 795 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 796 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 797 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 798 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 799 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 800 PetscFunctionReturn(0); 801 } 802 803 #undef __FUNCT__ 804 #define __FUNCT__ "MatMatSolve_SeqAIJ" 805 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 806 { 807 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 808 IS iscol = a->col,isrow = a->row; 809 PetscErrorCode ierr; 810 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 811 PetscInt nz,*rout,*cout,neq; 812 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 813 814 PetscFunctionBegin; 815 if (!n) PetscFunctionReturn(0); 816 817 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 818 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 819 820 tmp = a->solve_work; 821 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 822 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 823 824 for (neq=0; neq<n; neq++){ 825 /* forward solve the lower triangular */ 826 tmp[0] = b[r[0]]; 827 tmps = tmp; 828 for (i=1; i<n; i++) { 829 v = aa + ai[i] ; 830 vi = aj + ai[i] ; 831 nz = a->diag[i] - ai[i]; 832 sum = b[r[i]]; 833 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 834 tmp[i] = sum; 835 } 836 /* backward solve the upper triangular */ 837 for (i=n-1; i>=0; i--){ 838 v = aa + a->diag[i] + 1; 839 vi = aj + a->diag[i] + 1; 840 nz = ai[i+1] - a->diag[i] - 1; 841 sum = tmp[i]; 842 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 843 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 844 } 845 846 b += n; 847 x += n; 848 } 849 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 850 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 851 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 852 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 853 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 859 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 860 { 861 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 862 IS iscol = a->col,isrow = a->row; 863 PetscErrorCode ierr; 864 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 865 PetscInt nz,*rout,*cout,row; 866 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 867 868 PetscFunctionBegin; 869 if (!n) PetscFunctionReturn(0); 870 871 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 872 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 873 tmp = a->solve_work; 874 875 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 876 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 877 878 /* forward solve the lower triangular */ 879 tmp[0] = b[*r++]; 880 tmps = tmp; 881 for (row=1; row<n; row++) { 882 i = rout[row]; /* permuted row */ 883 v = aa + ai[i] ; 884 vi = aj + ai[i] ; 885 nz = a->diag[i] - ai[i]; 886 sum = b[*r++]; 887 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 888 tmp[row] = sum; 889 } 890 891 /* backward solve the upper triangular */ 892 for (row=n-1; row>=0; row--){ 893 i = rout[row]; /* permuted row */ 894 v = aa + a->diag[i] + 1; 895 vi = aj + a->diag[i] + 1; 896 nz = ai[i+1] - a->diag[i] - 1; 897 sum = tmp[row]; 898 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 899 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 900 } 901 902 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 903 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 904 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 905 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 906 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 /* ----------------------------------------------------------- */ 911 #undef __FUNCT__ 912 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 913 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 914 { 915 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 916 PetscErrorCode ierr; 917 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 918 PetscScalar *x,*b,*aa = a->a; 919 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 920 PetscInt adiag_i,i,*vi,nz,ai_i; 921 PetscScalar *v,sum; 922 #endif 923 924 PetscFunctionBegin; 925 if (!n) PetscFunctionReturn(0); 926 927 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 928 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 929 930 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 931 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 932 #else 933 /* forward solve the lower triangular */ 934 x[0] = b[0]; 935 for (i=1; i<n; i++) { 936 ai_i = ai[i]; 937 v = aa + ai_i; 938 vi = aj + ai_i; 939 nz = adiag[i] - ai_i; 940 sum = b[i]; 941 while (nz--) sum -= *v++ * x[*vi++]; 942 x[i] = sum; 943 } 944 945 /* backward solve the upper triangular */ 946 for (i=n-1; i>=0; i--){ 947 adiag_i = adiag[i]; 948 v = aa + adiag_i + 1; 949 vi = aj + adiag_i + 1; 950 nz = ai[i+1] - adiag_i - 1; 951 sum = x[i]; 952 while (nz--) sum -= *v++ * x[*vi++]; 953 x[i] = sum*aa[adiag_i]; 954 } 955 #endif 956 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 957 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 958 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 #undef __FUNCT__ 963 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 964 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 965 { 966 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 967 IS iscol = a->col,isrow = a->row; 968 PetscErrorCode ierr; 969 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 970 PetscInt nz,*rout,*cout; 971 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 972 973 PetscFunctionBegin; 974 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 975 976 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 977 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 978 tmp = a->solve_work; 979 980 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 981 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 982 983 /* forward solve the lower triangular */ 984 tmp[0] = b[*r++]; 985 for (i=1; i<n; i++) { 986 v = aa + ai[i] ; 987 vi = aj + ai[i] ; 988 nz = a->diag[i] - ai[i]; 989 sum = b[*r++]; 990 while (nz--) sum -= *v++ * tmp[*vi++ ]; 991 tmp[i] = sum; 992 } 993 994 /* backward solve the upper triangular */ 995 for (i=n-1; i>=0; i--){ 996 v = aa + a->diag[i] + 1; 997 vi = aj + a->diag[i] + 1; 998 nz = ai[i+1] - a->diag[i] - 1; 999 sum = tmp[i]; 1000 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1001 tmp[i] = sum*aa[a->diag[i]]; 1002 x[*c--] += tmp[i]; 1003 } 1004 1005 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1006 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1007 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1008 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1009 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1010 1011 PetscFunctionReturn(0); 1012 } 1013 /* -------------------------------------------------------------------*/ 1014 #undef __FUNCT__ 1015 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1016 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1017 { 1018 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1019 IS iscol = a->col,isrow = a->row; 1020 PetscErrorCode ierr; 1021 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1022 PetscInt nz,*rout,*cout,*diag = a->diag; 1023 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 1024 1025 PetscFunctionBegin; 1026 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1027 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1028 tmp = a->solve_work; 1029 1030 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1031 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1032 1033 /* copy the b into temp work space according to permutation */ 1034 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1035 1036 /* forward solve the U^T */ 1037 for (i=0; i<n; i++) { 1038 v = aa + diag[i] ; 1039 vi = aj + diag[i] + 1; 1040 nz = ai[i+1] - diag[i] - 1; 1041 s1 = tmp[i]; 1042 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1043 while (nz--) { 1044 tmp[*vi++ ] -= (*v++)*s1; 1045 } 1046 tmp[i] = s1; 1047 } 1048 1049 /* backward solve the L^T */ 1050 for (i=n-1; i>=0; i--){ 1051 v = aa + diag[i] - 1 ; 1052 vi = aj + diag[i] - 1 ; 1053 nz = diag[i] - ai[i]; 1054 s1 = tmp[i]; 1055 while (nz--) { 1056 tmp[*vi-- ] -= (*v--)*s1; 1057 } 1058 } 1059 1060 /* copy tmp into x according to permutation */ 1061 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1062 1063 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1064 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1065 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1066 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1067 1068 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 1069 PetscFunctionReturn(0); 1070 } 1071 1072 #undef __FUNCT__ 1073 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1074 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1075 { 1076 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1077 IS iscol = a->col,isrow = a->row; 1078 PetscErrorCode ierr; 1079 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1080 PetscInt nz,*rout,*cout,*diag = a->diag; 1081 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 1082 1083 PetscFunctionBegin; 1084 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1085 1086 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1087 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1088 tmp = a->solve_work; 1089 1090 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1091 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1092 1093 /* copy the b into temp work space according to permutation */ 1094 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1095 1096 /* forward solve the U^T */ 1097 for (i=0; i<n; i++) { 1098 v = aa + diag[i] ; 1099 vi = aj + diag[i] + 1; 1100 nz = ai[i+1] - diag[i] - 1; 1101 tmp[i] *= *v++; 1102 while (nz--) { 1103 tmp[*vi++ ] -= (*v++)*tmp[i]; 1104 } 1105 } 1106 1107 /* backward solve the L^T */ 1108 for (i=n-1; i>=0; i--){ 1109 v = aa + diag[i] - 1 ; 1110 vi = aj + diag[i] - 1 ; 1111 nz = diag[i] - ai[i]; 1112 while (nz--) { 1113 tmp[*vi-- ] -= (*v--)*tmp[i]; 1114 } 1115 } 1116 1117 /* copy tmp into x according to permutation */ 1118 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1119 1120 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1121 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1122 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1123 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1124 1125 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1126 PetscFunctionReturn(0); 1127 } 1128 /* ----------------------------------------------------------------*/ 1129 EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat,PetscTruth*,PetscInt*); 1130 1131 #undef __FUNCT__ 1132 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1133 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1134 { 1135 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1136 IS isicol; 1137 PetscErrorCode ierr; 1138 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 1139 PetscInt *bi,*cols,nnz,*cols_lvl; 1140 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1141 PetscInt i,levels,diagonal_fill; 1142 PetscTruth col_identity,row_identity; 1143 PetscReal f; 1144 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1145 PetscBT lnkbt; 1146 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1147 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1148 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1149 PetscTruth missing; 1150 1151 PetscFunctionBegin; 1152 f = info->fill; 1153 levels = (PetscInt)info->levels; 1154 diagonal_fill = (PetscInt)info->diagonal_fill; 1155 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1156 1157 /* special case that simply copies fill pattern */ 1158 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1159 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1160 if (!levels && row_identity && col_identity) { 1161 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1162 (*fact)->factor = FACTOR_LU; 1163 (*fact)->info.factor_mallocs = 0; 1164 (*fact)->info.fill_ratio_given = info->fill; 1165 (*fact)->info.fill_ratio_needed = 1.0; 1166 b = (Mat_SeqAIJ*)(*fact)->data; 1167 ierr = MatMissingDiagonal_SeqAIJ(*fact,&missing,&d);CHKERRQ(ierr); 1168 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1169 b->row = isrow; 1170 b->col = iscol; 1171 b->icol = isicol; 1172 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1173 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1174 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1175 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1180 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1181 1182 /* get new row pointers */ 1183 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1184 bi[0] = 0; 1185 /* bdiag is location of diagonal in factor */ 1186 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1187 bdiag[0] = 0; 1188 1189 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1190 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1191 1192 /* create a linked list for storing column indices of the active row */ 1193 nlnk = n + 1; 1194 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1195 1196 /* initial FreeSpace size is f*(ai[n]+1) */ 1197 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1198 current_space = free_space; 1199 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1200 current_space_lvl = free_space_lvl; 1201 1202 for (i=0; i<n; i++) { 1203 nzi = 0; 1204 /* copy current row into linked list */ 1205 nnz = ai[r[i]+1] - ai[r[i]]; 1206 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1207 cols = aj + ai[r[i]]; 1208 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1209 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1210 nzi += nlnk; 1211 1212 /* make sure diagonal entry is included */ 1213 if (diagonal_fill && lnk[i] == -1) { 1214 fm = n; 1215 while (lnk[fm] < i) fm = lnk[fm]; 1216 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1217 lnk[fm] = i; 1218 lnk_lvl[i] = 0; 1219 nzi++; dcount++; 1220 } 1221 1222 /* add pivot rows into the active row */ 1223 nzbd = 0; 1224 prow = lnk[n]; 1225 while (prow < i) { 1226 nnz = bdiag[prow]; 1227 cols = bj_ptr[prow] + nnz + 1; 1228 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1229 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1230 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1231 nzi += nlnk; 1232 prow = lnk[prow]; 1233 nzbd++; 1234 } 1235 bdiag[i] = nzbd; 1236 bi[i+1] = bi[i] + nzi; 1237 1238 /* if free space is not available, make more free space */ 1239 if (current_space->local_remaining<nzi) { 1240 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1241 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1242 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1243 reallocs++; 1244 } 1245 1246 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1247 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1248 bj_ptr[i] = current_space->array; 1249 bjlvl_ptr[i] = current_space_lvl->array; 1250 1251 /* make sure the active row i has diagonal entry */ 1252 if (*(bj_ptr[i]+bdiag[i]) != i) { 1253 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1254 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1255 } 1256 1257 current_space->array += nzi; 1258 current_space->local_used += nzi; 1259 current_space->local_remaining -= nzi; 1260 current_space_lvl->array += nzi; 1261 current_space_lvl->local_used += nzi; 1262 current_space_lvl->local_remaining -= nzi; 1263 } 1264 1265 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1266 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1267 1268 /* destroy list of free space and other temporary arrays */ 1269 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1270 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1271 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1272 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1273 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1274 1275 #if defined(PETSC_USE_INFO) 1276 { 1277 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1278 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1279 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1280 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1281 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1282 if (diagonal_fill) { 1283 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1284 } 1285 } 1286 #endif 1287 1288 /* put together the new matrix */ 1289 ierr = MatCreate(A->comm,fact);CHKERRQ(ierr); 1290 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1291 ierr = MatSetType(*fact,A->type_name);CHKERRQ(ierr); 1292 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1293 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1294 b = (Mat_SeqAIJ*)(*fact)->data; 1295 b->free_a = PETSC_TRUE; 1296 b->free_ij = PETSC_TRUE; 1297 b->singlemalloc = PETSC_FALSE; 1298 len = (bi[n] )*sizeof(PetscScalar); 1299 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1300 b->j = bj; 1301 b->i = bi; 1302 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1303 b->diag = bdiag; 1304 b->ilen = 0; 1305 b->imax = 0; 1306 b->row = isrow; 1307 b->col = iscol; 1308 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1309 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1310 b->icol = isicol; 1311 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1312 /* In b structure: Free imax, ilen, old a, old j. 1313 Allocate bdiag, solve_work, new a, new j */ 1314 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1315 b->maxnz = b->nz = bi[n] ; 1316 (*fact)->factor = FACTOR_LU; 1317 (*fact)->info.factor_mallocs = reallocs; 1318 (*fact)->info.fill_ratio_given = f; 1319 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1320 1321 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1322 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1323 1324 PetscFunctionReturn(0); 1325 } 1326 1327 #include "src/mat/impls/sbaij/seq/sbaij.h" 1328 #undef __FUNCT__ 1329 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1330 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1331 { 1332 Mat C = *B; 1333 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1334 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1335 IS ip=b->row,iip = b->icol; 1336 PetscErrorCode ierr; 1337 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1338 PetscInt *ai=a->i,*aj=a->j; 1339 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1340 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1341 PetscReal zeropivot,rs,shiftnz; 1342 PetscReal shiftpd; 1343 ChShift_Ctx sctx; 1344 PetscInt newshift; 1345 1346 PetscFunctionBegin; 1347 shiftnz = info->shiftnz; 1348 shiftpd = info->shiftpd; 1349 zeropivot = info->zeropivot; 1350 1351 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1352 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1353 1354 /* initialization */ 1355 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1356 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1357 jl = il + mbs; 1358 rtmp = (MatScalar*)(jl + mbs); 1359 1360 sctx.shift_amount = 0; 1361 sctx.nshift = 0; 1362 do { 1363 sctx.chshift = PETSC_FALSE; 1364 for (i=0; i<mbs; i++) { 1365 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1366 } 1367 1368 for (k = 0; k<mbs; k++){ 1369 bval = ba + bi[k]; 1370 /* initialize k-th row by the perm[k]-th row of A */ 1371 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1372 for (j = jmin; j < jmax; j++){ 1373 col = riip[aj[j]]; 1374 if (col >= k){ /* only take upper triangular entry */ 1375 rtmp[col] = aa[j]; 1376 *bval++ = 0.0; /* for in-place factorization */ 1377 } 1378 } 1379 /* shift the diagonal of the matrix */ 1380 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1381 1382 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1383 dk = rtmp[k]; 1384 i = jl[k]; /* first row to be added to k_th row */ 1385 1386 while (i < k){ 1387 nexti = jl[i]; /* next row to be added to k_th row */ 1388 1389 /* compute multiplier, update diag(k) and U(i,k) */ 1390 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1391 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1392 dk += uikdi*ba[ili]; 1393 ba[ili] = uikdi; /* -U(i,k) */ 1394 1395 /* add multiple of row i to k-th row */ 1396 jmin = ili + 1; jmax = bi[i+1]; 1397 if (jmin < jmax){ 1398 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1399 /* update il and jl for row i */ 1400 il[i] = jmin; 1401 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1402 } 1403 i = nexti; 1404 } 1405 1406 /* shift the diagonals when zero pivot is detected */ 1407 /* compute rs=sum of abs(off-diagonal) */ 1408 rs = 0.0; 1409 jmin = bi[k]+1; 1410 nz = bi[k+1] - jmin; 1411 if (nz){ 1412 bcol = bj + jmin; 1413 while (nz--){ 1414 rs += PetscAbsScalar(rtmp[*bcol]); 1415 bcol++; 1416 } 1417 } 1418 1419 sctx.rs = rs; 1420 sctx.pv = dk; 1421 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1422 if (newshift == 1) break; 1423 1424 /* copy data into U(k,:) */ 1425 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1426 jmin = bi[k]+1; jmax = bi[k+1]; 1427 if (jmin < jmax) { 1428 for (j=jmin; j<jmax; j++){ 1429 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1430 } 1431 /* add the k-th row into il and jl */ 1432 il[k] = jmin; 1433 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1434 } 1435 } 1436 } while (sctx.chshift); 1437 ierr = PetscFree(il);CHKERRQ(ierr); 1438 1439 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1440 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1441 C->factor = FACTOR_CHOLESKY; 1442 C->assembled = PETSC_TRUE; 1443 C->preallocated = PETSC_TRUE; 1444 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1445 if (sctx.nshift){ 1446 if (shiftnz) { 1447 ierr = PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1448 } else if (shiftpd) { 1449 ierr = PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1450 } 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 #undef __FUNCT__ 1456 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1457 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1458 { 1459 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1460 Mat_SeqSBAIJ *b; 1461 Mat B; 1462 PetscErrorCode ierr; 1463 PetscTruth perm_identity; 1464 PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1465 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1466 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1467 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1468 PetscReal fill=info->fill,levels=info->levels; 1469 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1470 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1471 PetscBT lnkbt; 1472 1473 PetscFunctionBegin; 1474 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1475 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1476 1477 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1478 ui[0] = 0; 1479 1480 /* special case that simply copies fill pattern */ 1481 if (!levels && perm_identity) { 1482 for (i=0; i<am; i++) { 1483 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1484 } 1485 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1486 cols = uj; 1487 for (i=0; i<am; i++) { 1488 aj = a->j + a->diag[i]; 1489 ncols = ui[i+1] - ui[i]; 1490 for (j=0; j<ncols; j++) *cols++ = *aj++; 1491 } 1492 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1493 /* initialization */ 1494 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1495 1496 /* jl: linked list for storing indices of the pivot rows 1497 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1498 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1499 il = jl + am; 1500 uj_ptr = (PetscInt**)(il + am); 1501 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1502 for (i=0; i<am; i++){ 1503 jl[i] = am; il[i] = 0; 1504 } 1505 1506 /* create and initialize a linked list for storing column indices of the active row k */ 1507 nlnk = am + 1; 1508 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1509 1510 /* initial FreeSpace size is fill*(ai[am]+1) */ 1511 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1512 current_space = free_space; 1513 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1514 current_space_lvl = free_space_lvl; 1515 1516 for (k=0; k<am; k++){ /* for each active row k */ 1517 /* initialize lnk by the column indices of row rip[k] of A */ 1518 nzk = 0; 1519 ncols = ai[rip[k]+1] - ai[rip[k]]; 1520 ncols_upper = 0; 1521 for (j=0; j<ncols; j++){ 1522 i = *(aj + ai[rip[k]] + j); 1523 if (rip[i] >= k){ /* only take upper triangular entry */ 1524 ajtmp[ncols_upper] = i; 1525 ncols_upper++; 1526 } 1527 } 1528 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1529 nzk += nlnk; 1530 1531 /* update lnk by computing fill-in for each pivot row to be merged in */ 1532 prow = jl[k]; /* 1st pivot row */ 1533 1534 while (prow < k){ 1535 nextprow = jl[prow]; 1536 1537 /* merge prow into k-th row */ 1538 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1539 jmax = ui[prow+1]; 1540 ncols = jmax-jmin; 1541 i = jmin - ui[prow]; 1542 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1543 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1544 j = *(uj - 1); 1545 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1546 nzk += nlnk; 1547 1548 /* update il and jl for prow */ 1549 if (jmin < jmax){ 1550 il[prow] = jmin; 1551 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1552 } 1553 prow = nextprow; 1554 } 1555 1556 /* if free space is not available, make more free space */ 1557 if (current_space->local_remaining<nzk) { 1558 i = am - k + 1; /* num of unfactored rows */ 1559 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1560 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1561 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1562 reallocs++; 1563 } 1564 1565 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1566 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1567 1568 /* add the k-th row into il and jl */ 1569 if (nzk > 1){ 1570 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1571 jl[k] = jl[i]; jl[i] = k; 1572 il[k] = ui[k] + 1; 1573 } 1574 uj_ptr[k] = current_space->array; 1575 uj_lvl_ptr[k] = current_space_lvl->array; 1576 1577 current_space->array += nzk; 1578 current_space->local_used += nzk; 1579 current_space->local_remaining -= nzk; 1580 1581 current_space_lvl->array += nzk; 1582 current_space_lvl->local_used += nzk; 1583 current_space_lvl->local_remaining -= nzk; 1584 1585 ui[k+1] = ui[k] + nzk; 1586 } 1587 1588 #if defined(PETSC_USE_INFO) 1589 if (ai[am] != 0) { 1590 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1591 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1592 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1593 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1594 } else { 1595 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1596 } 1597 #endif 1598 1599 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1600 ierr = PetscFree(jl);CHKERRQ(ierr); 1601 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1602 1603 /* destroy list of free space and other temporary array(s) */ 1604 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1605 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1606 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1607 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1608 1609 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1610 1611 /* put together the new matrix in MATSEQSBAIJ format */ 1612 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1613 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1614 B = *fact; 1615 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1616 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1617 1618 b = (Mat_SeqSBAIJ*)B->data; 1619 b->singlemalloc = PETSC_FALSE; 1620 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1621 b->j = uj; 1622 b->i = ui; 1623 b->diag = 0; 1624 b->ilen = 0; 1625 b->imax = 0; 1626 b->row = perm; 1627 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1628 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1629 b->icol = perm; 1630 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1631 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1632 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1633 b->maxnz = b->nz = ui[am]; 1634 b->free_a = PETSC_TRUE; 1635 b->free_ij = PETSC_TRUE; 1636 1637 B->factor = FACTOR_CHOLESKY; 1638 B->info.factor_mallocs = reallocs; 1639 B->info.fill_ratio_given = fill; 1640 if (ai[am] != 0) { 1641 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1642 } else { 1643 B->info.fill_ratio_needed = 0.0; 1644 } 1645 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1646 if (perm_identity){ 1647 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1648 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1649 } 1650 PetscFunctionReturn(0); 1651 } 1652 1653 #undef __FUNCT__ 1654 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1655 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1656 { 1657 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1658 Mat_SeqSBAIJ *b; 1659 Mat B; 1660 PetscErrorCode ierr; 1661 PetscTruth perm_identity; 1662 PetscReal fill = info->fill; 1663 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1664 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1665 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1666 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1667 PetscBT lnkbt; 1668 IS iperm; 1669 1670 PetscFunctionBegin; 1671 /* check whether perm is the identity mapping */ 1672 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1673 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1674 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1675 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1676 1677 /* initialization */ 1678 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1679 ui[0] = 0; 1680 1681 /* jl: linked list for storing indices of the pivot rows 1682 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1683 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1684 il = jl + am; 1685 cols = il + am; 1686 ui_ptr = (PetscInt**)(cols + am); 1687 for (i=0; i<am; i++){ 1688 jl[i] = am; il[i] = 0; 1689 } 1690 1691 /* create and initialize a linked list for storing column indices of the active row k */ 1692 nlnk = am + 1; 1693 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1694 1695 /* initial FreeSpace size is fill*(ai[am]+1) */ 1696 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1697 current_space = free_space; 1698 1699 for (k=0; k<am; k++){ /* for each active row k */ 1700 /* initialize lnk by the column indices of row rip[k] of A */ 1701 nzk = 0; 1702 ncols = ai[rip[k]+1] - ai[rip[k]]; 1703 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1704 ncols_upper = 0; 1705 for (j=0; j<ncols; j++){ 1706 i = riip[*(aj + ai[rip[k]] + j)]; 1707 if (i >= k){ /* only take upper triangular entry */ 1708 cols[ncols_upper] = i; 1709 ncols_upper++; 1710 } 1711 } 1712 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1713 nzk += nlnk; 1714 1715 /* update lnk by computing fill-in for each pivot row to be merged in */ 1716 prow = jl[k]; /* 1st pivot row */ 1717 1718 while (prow < k){ 1719 nextprow = jl[prow]; 1720 /* merge prow into k-th row */ 1721 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1722 jmax = ui[prow+1]; 1723 ncols = jmax-jmin; 1724 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1725 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1726 nzk += nlnk; 1727 1728 /* update il and jl for prow */ 1729 if (jmin < jmax){ 1730 il[prow] = jmin; 1731 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1732 } 1733 prow = nextprow; 1734 } 1735 1736 /* if free space is not available, make more free space */ 1737 if (current_space->local_remaining<nzk) { 1738 i = am - k + 1; /* num of unfactored rows */ 1739 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1740 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1741 reallocs++; 1742 } 1743 1744 /* copy data into free space, then initialize lnk */ 1745 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1746 1747 /* add the k-th row into il and jl */ 1748 if (nzk-1 > 0){ 1749 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1750 jl[k] = jl[i]; jl[i] = k; 1751 il[k] = ui[k] + 1; 1752 } 1753 ui_ptr[k] = current_space->array; 1754 current_space->array += nzk; 1755 current_space->local_used += nzk; 1756 current_space->local_remaining -= nzk; 1757 1758 ui[k+1] = ui[k] + nzk; 1759 } 1760 1761 #if defined(PETSC_USE_INFO) 1762 if (ai[am] != 0) { 1763 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1764 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1765 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1766 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1767 } else { 1768 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1769 } 1770 #endif 1771 1772 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1773 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1774 ierr = PetscFree(jl);CHKERRQ(ierr); 1775 1776 /* destroy list of free space and other temporary array(s) */ 1777 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1778 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1779 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1780 1781 /* put together the new matrix in MATSEQSBAIJ format */ 1782 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1783 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1784 B = *fact; 1785 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1786 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1787 1788 b = (Mat_SeqSBAIJ*)B->data; 1789 b->singlemalloc = PETSC_FALSE; 1790 b->free_a = PETSC_TRUE; 1791 b->free_ij = PETSC_TRUE; 1792 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1793 b->j = uj; 1794 b->i = ui; 1795 b->diag = 0; 1796 b->ilen = 0; 1797 b->imax = 0; 1798 b->row = perm; 1799 b->col = perm; 1800 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1801 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1802 b->icol = iperm; 1803 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1804 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1805 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1806 b->maxnz = b->nz = ui[am]; 1807 1808 B->factor = FACTOR_CHOLESKY; 1809 B->info.factor_mallocs = reallocs; 1810 B->info.fill_ratio_given = fill; 1811 if (ai[am] != 0) { 1812 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1813 } else { 1814 B->info.fill_ratio_needed = 0.0; 1815 } 1816 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1817 if (perm_identity){ 1818 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1819 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1820 } 1821 PetscFunctionReturn(0); 1822 } 1823