1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 MatScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 260 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 261 { 262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 263 IS isicol; 264 PetscErrorCode ierr; 265 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 266 PetscInt *bi,*bj,*ajtmp; 267 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 268 PetscReal f; 269 PetscInt nlnk,*lnk,k,**bi_ptr; 270 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 271 PetscBT lnkbt; 272 273 PetscFunctionBegin; 274 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 275 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 276 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 277 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 278 279 /* get new row pointers */ 280 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 281 bi[0] = 0; 282 283 /* bdiag is location of diagonal in factor */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 285 bdiag[0] = 0; 286 287 /* linked list for storing column indices of the active row */ 288 nlnk = n + 1; 289 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 290 291 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 292 293 /* initial FreeSpace size is f*(ai[n]+1) */ 294 f = info->fill; 295 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 296 current_space = free_space; 297 298 for (i=0; i<n; i++) { 299 /* copy previous fill into linked list */ 300 nzi = 0; 301 nnz = ai[r[i]+1] - ai[r[i]]; 302 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 303 ajtmp = aj + ai[r[i]]; 304 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 305 nzi += nlnk; 306 307 /* add pivot rows into linked list */ 308 row = lnk[n]; 309 while (row < i) { 310 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 311 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 312 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 313 nzi += nlnk; 314 row = lnk[row]; 315 } 316 bi[i+1] = bi[i] + nzi; 317 im[i] = nzi; 318 319 /* mark bdiag */ 320 nzbd = 0; 321 nnz = nzi; 322 k = lnk[n]; 323 while (nnz-- && k < i){ 324 nzbd++; 325 k = lnk[k]; 326 } 327 bdiag[i] = bi[i] + nzbd; 328 329 /* if free space is not available, make more free space */ 330 if (current_space->local_remaining<nzi) { 331 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 332 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 333 reallocs++; 334 } 335 336 /* copy data into free space, then initialize lnk */ 337 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 338 bi_ptr[i] = current_space->array; 339 current_space->array += nzi; 340 current_space->local_used += nzi; 341 current_space->local_remaining -= nzi; 342 } 343 #if defined(PETSC_USE_INFO) 344 if (ai[n] != 0) { 345 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 346 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 347 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 348 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 349 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 350 } else { 351 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 352 } 353 #endif 354 355 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 356 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 357 358 /* destroy list of free space and other temporary array(s) */ 359 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 360 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 361 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 362 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 363 364 /* put together the new matrix */ 365 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 366 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 367 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 368 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 369 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 370 b = (Mat_SeqAIJ*)(*B)->data; 371 b->free_a = PETSC_TRUE; 372 b->free_ij = PETSC_TRUE; 373 b->singlemalloc = PETSC_FALSE; 374 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 375 b->j = bj; 376 b->i = bi; 377 b->diag = bdiag; 378 b->ilen = 0; 379 b->imax = 0; 380 b->row = isrow; 381 b->col = iscol; 382 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 383 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 384 b->icol = isicol; 385 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 386 387 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 388 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 389 b->maxnz = b->nz = bi[n] ; 390 391 (*B)->factor = FACTOR_LU; 392 (*B)->info.factor_mallocs = reallocs; 393 (*B)->info.fill_ratio_given = f; 394 395 if (ai[n] != 0) { 396 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 397 } else { 398 (*B)->info.fill_ratio_needed = 0.0; 399 } 400 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 401 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 402 PetscFunctionReturn(0); 403 } 404 405 /* 406 Trouble in factorization, should we dump the original matrix? 407 */ 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatFactorDumpMatrix" 410 PetscErrorCode MatFactorDumpMatrix(Mat A) 411 { 412 PetscErrorCode ierr; 413 PetscTruth flg; 414 415 PetscFunctionBegin; 416 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 417 if (flg) { 418 PetscViewer viewer; 419 char filename[PETSC_MAX_PATH_LEN]; 420 421 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 422 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 423 ierr = MatView(A,viewer);CHKERRQ(ierr); 424 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 /* ----------------------------------------------------------- */ 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 432 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 433 { 434 Mat C=*B; 435 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 436 IS isrow = b->row,isicol = b->icol; 437 PetscErrorCode ierr; 438 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 439 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 440 PetscInt *diag_offset = b->diag,diag,*pj; 441 PetscScalar *rtmp,*pc,multiplier,*rtmps; 442 MatScalar *v,*pv; 443 PetscScalar d; 444 PetscReal rs; 445 LUShift_Ctx sctx; 446 PetscInt newshift,*ddiag; 447 448 PetscFunctionBegin; 449 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 450 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 451 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 452 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 453 rtmps = rtmp; ics = ic; 454 455 sctx.shift_top = 0; 456 sctx.nshift_max = 0; 457 sctx.shift_lo = 0; 458 sctx.shift_hi = 0; 459 460 /* if both shift schemes are chosen by user, only use info->shiftpd */ 461 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 462 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 463 PetscInt *aai = a->i; 464 ddiag = a->diag; 465 sctx.shift_top = 0; 466 for (i=0; i<n; i++) { 467 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 468 d = (a->a)[ddiag[i]]; 469 rs = -PetscAbsScalar(d) - PetscRealPart(d); 470 v = a->a+aai[i]; 471 nz = aai[i+1] - aai[i]; 472 for (j=0; j<nz; j++) 473 rs += PetscAbsScalar(v[j]); 474 if (rs>sctx.shift_top) sctx.shift_top = rs; 475 } 476 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 477 sctx.shift_top *= 1.1; 478 sctx.nshift_max = 5; 479 sctx.shift_lo = 0.; 480 sctx.shift_hi = 1.; 481 } 482 483 sctx.shift_amount = 0; 484 sctx.nshift = 0; 485 do { 486 sctx.lushift = PETSC_FALSE; 487 for (i=0; i<n; i++){ 488 nz = bi[i+1] - bi[i]; 489 bjtmp = bj + bi[i]; 490 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 491 492 /* load in initial (unfactored row) */ 493 nz = a->i[r[i]+1] - a->i[r[i]]; 494 ajtmp = a->j + a->i[r[i]]; 495 v = a->a + a->i[r[i]]; 496 for (j=0; j<nz; j++) { 497 rtmp[ics[ajtmp[j]]] = v[j]; 498 } 499 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 500 501 row = *bjtmp++; 502 while (row < i) { 503 pc = rtmp + row; 504 if (*pc != 0.0) { 505 pv = b->a + diag_offset[row]; 506 pj = b->j + diag_offset[row] + 1; 507 multiplier = *pc / *pv++; 508 *pc = multiplier; 509 nz = bi[row+1] - diag_offset[row] - 1; 510 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 511 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 512 } 513 row = *bjtmp++; 514 } 515 /* finished row so stick it into b->a */ 516 pv = b->a + bi[i] ; 517 pj = b->j + bi[i] ; 518 nz = bi[i+1] - bi[i]; 519 diag = diag_offset[i] - bi[i]; 520 rs = 0.0; 521 for (j=0; j<nz; j++) { 522 pv[j] = rtmps[pj[j]]; 523 if (j != diag) rs += PetscAbsScalar(pv[j]); 524 } 525 526 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 527 sctx.rs = rs; 528 sctx.pv = pv[diag]; 529 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 530 if (newshift == 1) break; 531 } 532 533 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 534 /* 535 * if no shift in this attempt & shifting & started shifting & can refine, 536 * then try lower shift 537 */ 538 sctx.shift_hi = info->shift_fraction; 539 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 540 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 541 sctx.lushift = PETSC_TRUE; 542 sctx.nshift++; 543 } 544 } while (sctx.lushift); 545 546 /* invert diagonal entries for simplier triangular solves */ 547 for (i=0; i<n; i++) { 548 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 549 } 550 551 ierr = PetscFree(rtmp);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 553 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 554 C->factor = FACTOR_LU; 555 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 556 C->assembled = PETSC_TRUE; 557 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 558 if (sctx.nshift){ 559 if (info->shiftnz) { 560 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 561 } else if (info->shiftpd) { 562 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 563 } 564 } 565 PetscFunctionReturn(0); 566 } 567 568 /* 569 This routine implements inplace ILU(0) with row or/and column permutations. 570 Input: 571 A - original matrix 572 Output; 573 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 574 a->j (col index) is permuted by the inverse of colperm, then sorted 575 a->a reordered accordingly with a->j 576 a->diag (ptr to diagonal elements) is updated. 577 */ 578 #undef __FUNCT__ 579 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 580 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 581 { 582 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 583 IS isrow = a->row,isicol = a->icol; 584 PetscErrorCode ierr; 585 PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j; 586 PetscInt *ajtmp,nz,row,*ics; 587 PetscInt *diag = a->diag,nbdiag,*pj; 588 PetscScalar *rtmp,*v,*pc,multiplier,*pv,d; 589 PetscReal rs; 590 LUShift_Ctx sctx; 591 PetscInt newshift; 592 593 PetscFunctionBegin; 594 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 595 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 596 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 597 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 598 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 599 ics = ic; 600 601 sctx.shift_top = 0; 602 sctx.nshift_max = 0; 603 sctx.shift_lo = 0; 604 sctx.shift_hi = 0; 605 606 /* if both shift schemes are chosen by user, only use info->shiftpd */ 607 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 608 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 609 sctx.shift_top = 0; 610 for (i=0; i<n; i++) { 611 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 612 d = (a->a)[diag[i]]; 613 rs = -PetscAbsScalar(d) - PetscRealPart(d); 614 v = a->a+ai[i]; 615 nz = ai[i+1] - ai[i]; 616 for (j=0; j<nz; j++) 617 rs += PetscAbsScalar(v[j]); 618 if (rs>sctx.shift_top) sctx.shift_top = rs; 619 } 620 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 621 sctx.shift_top *= 1.1; 622 sctx.nshift_max = 5; 623 sctx.shift_lo = 0.; 624 sctx.shift_hi = 1.; 625 } 626 627 sctx.shift_amount = 0; 628 sctx.nshift = 0; 629 do { 630 sctx.lushift = PETSC_FALSE; 631 for (i=0; i<n; i++){ 632 /* load in initial unfactored row */ 633 nz = ai[r[i]+1] - ai[r[i]]; 634 ajtmp = aj + ai[r[i]]; 635 v = a->a + ai[r[i]]; 636 /* sort permuted ajtmp and values v accordingly */ 637 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 638 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 639 640 diag[r[i]] = ai[r[i]]; 641 for (j=0; j<nz; j++) { 642 rtmp[ajtmp[j]] = v[j]; 643 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 644 } 645 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 646 647 row = *ajtmp++; 648 while (row < i) { 649 pc = rtmp + row; 650 if (*pc != 0.0) { 651 pv = a->a + diag[r[row]]; 652 pj = aj + diag[r[row]] + 1; 653 654 multiplier = *pc / *pv++; 655 *pc = multiplier; 656 nz = ai[r[row]+1] - diag[r[row]] - 1; 657 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 658 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 659 } 660 row = *ajtmp++; 661 } 662 /* finished row so overwrite it onto a->a */ 663 pv = a->a + ai[r[i]] ; 664 pj = aj + ai[r[i]] ; 665 nz = ai[r[i]+1] - ai[r[i]]; 666 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 667 668 rs = 0.0; 669 for (j=0; j<nz; j++) { 670 pv[j] = rtmp[pj[j]]; 671 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 672 } 673 674 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 675 sctx.rs = rs; 676 sctx.pv = pv[nbdiag]; 677 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 678 if (newshift == 1) break; 679 } 680 681 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 682 /* 683 * if no shift in this attempt & shifting & started shifting & can refine, 684 * then try lower shift 685 */ 686 sctx.shift_hi = info->shift_fraction; 687 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 688 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 689 sctx.lushift = PETSC_TRUE; 690 sctx.nshift++; 691 } 692 } while (sctx.lushift); 693 694 /* invert diagonal entries for simplier triangular solves */ 695 for (i=0; i<n; i++) { 696 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 697 } 698 699 ierr = PetscFree(rtmp);CHKERRQ(ierr); 700 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 701 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 702 A->factor = FACTOR_LU; 703 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 704 A->assembled = PETSC_TRUE; 705 ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr); 706 if (sctx.nshift){ 707 if (info->shiftnz) { 708 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 709 } else if (info->shiftpd) { 710 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 711 } 712 } 713 PetscFunctionReturn(0); 714 } 715 716 #undef __FUNCT__ 717 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 718 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 719 { 720 PetscFunctionBegin; 721 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 722 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 723 PetscFunctionReturn(0); 724 } 725 726 727 /* ----------------------------------------------------------- */ 728 #undef __FUNCT__ 729 #define __FUNCT__ "MatLUFactor_SeqAIJ" 730 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 731 { 732 PetscErrorCode ierr; 733 Mat C; 734 735 PetscFunctionBegin; 736 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 737 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 738 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 739 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 740 PetscFunctionReturn(0); 741 } 742 /* ----------------------------------------------------------- */ 743 #undef __FUNCT__ 744 #define __FUNCT__ "MatSolve_SeqAIJ" 745 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 746 { 747 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 748 IS iscol = a->col,isrow = a->row; 749 PetscErrorCode ierr; 750 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 751 PetscInt nz,*rout,*cout; 752 PetscScalar *x,*tmp,*tmps,*aa = a->a,sum,*v; 753 const PetscScalar *b; 754 755 PetscFunctionBegin; 756 if (!n) PetscFunctionReturn(0); 757 758 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 759 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 760 tmp = a->solve_work; 761 762 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 763 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 764 765 /* forward solve the lower triangular */ 766 tmp[0] = b[*r++]; 767 tmps = tmp; 768 for (i=1; i<n; i++) { 769 v = aa + ai[i] ; 770 vi = aj + ai[i] ; 771 nz = a->diag[i] - ai[i]; 772 sum = b[*r++]; 773 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 774 tmp[i] = sum; 775 } 776 777 /* backward solve the upper triangular */ 778 for (i=n-1; i>=0; i--){ 779 v = aa + a->diag[i] + 1; 780 vi = aj + a->diag[i] + 1; 781 nz = ai[i+1] - a->diag[i] - 1; 782 sum = tmp[i]; 783 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 784 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 785 } 786 787 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 788 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 789 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 790 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 791 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 792 PetscFunctionReturn(0); 793 } 794 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatMatSolve_SeqAIJ" 797 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 798 { 799 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 800 IS iscol = a->col,isrow = a->row; 801 PetscErrorCode ierr; 802 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 803 PetscInt nz,*rout,*cout,neq; 804 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 805 806 PetscFunctionBegin; 807 if (!n) PetscFunctionReturn(0); 808 809 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 810 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 811 812 tmp = a->solve_work; 813 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 814 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 815 816 for (neq=0; neq<n; neq++){ 817 /* forward solve the lower triangular */ 818 tmp[0] = b[r[0]]; 819 tmps = tmp; 820 for (i=1; i<n; i++) { 821 v = aa + ai[i] ; 822 vi = aj + ai[i] ; 823 nz = a->diag[i] - ai[i]; 824 sum = b[r[i]]; 825 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 826 tmp[i] = sum; 827 } 828 /* backward solve the upper triangular */ 829 for (i=n-1; i>=0; i--){ 830 v = aa + a->diag[i] + 1; 831 vi = aj + a->diag[i] + 1; 832 nz = ai[i+1] - a->diag[i] - 1; 833 sum = tmp[i]; 834 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 835 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 836 } 837 838 b += n; 839 x += n; 840 } 841 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 842 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 843 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 844 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 845 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 846 PetscFunctionReturn(0); 847 } 848 849 #undef __FUNCT__ 850 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 851 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 852 { 853 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 854 IS iscol = a->col,isrow = a->row; 855 PetscErrorCode ierr; 856 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 857 PetscInt nz,*rout,*cout,row; 858 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 859 860 PetscFunctionBegin; 861 if (!n) PetscFunctionReturn(0); 862 863 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 864 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 865 tmp = a->solve_work; 866 867 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 868 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 869 870 /* forward solve the lower triangular */ 871 tmp[0] = b[*r++]; 872 tmps = tmp; 873 for (row=1; row<n; row++) { 874 i = rout[row]; /* permuted row */ 875 v = aa + ai[i] ; 876 vi = aj + ai[i] ; 877 nz = a->diag[i] - ai[i]; 878 sum = b[*r++]; 879 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 880 tmp[row] = sum; 881 } 882 883 /* backward solve the upper triangular */ 884 for (row=n-1; row>=0; row--){ 885 i = rout[row]; /* permuted row */ 886 v = aa + a->diag[i] + 1; 887 vi = aj + a->diag[i] + 1; 888 nz = ai[i+1] - a->diag[i] - 1; 889 sum = tmp[row]; 890 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 891 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 892 } 893 894 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 895 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 896 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 897 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 898 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 899 PetscFunctionReturn(0); 900 } 901 902 /* ----------------------------------------------------------- */ 903 #undef __FUNCT__ 904 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 905 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 906 { 907 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 908 PetscErrorCode ierr; 909 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 910 PetscScalar *x,*b,*aa = a->a; 911 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 912 PetscInt adiag_i,i,*vi,nz,ai_i; 913 PetscScalar *v,sum; 914 #endif 915 916 PetscFunctionBegin; 917 if (!n) PetscFunctionReturn(0); 918 919 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 920 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 921 922 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 923 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 924 #else 925 /* forward solve the lower triangular */ 926 x[0] = b[0]; 927 for (i=1; i<n; i++) { 928 ai_i = ai[i]; 929 v = aa + ai_i; 930 vi = aj + ai_i; 931 nz = adiag[i] - ai_i; 932 sum = b[i]; 933 while (nz--) sum -= *v++ * x[*vi++]; 934 x[i] = sum; 935 } 936 937 /* backward solve the upper triangular */ 938 for (i=n-1; i>=0; i--){ 939 adiag_i = adiag[i]; 940 v = aa + adiag_i + 1; 941 vi = aj + adiag_i + 1; 942 nz = ai[i+1] - adiag_i - 1; 943 sum = x[i]; 944 while (nz--) sum -= *v++ * x[*vi++]; 945 x[i] = sum*aa[adiag_i]; 946 } 947 #endif 948 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 949 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 950 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 951 PetscFunctionReturn(0); 952 } 953 954 #undef __FUNCT__ 955 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 956 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 957 { 958 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 959 IS iscol = a->col,isrow = a->row; 960 PetscErrorCode ierr; 961 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 962 PetscInt nz,*rout,*cout; 963 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 964 965 PetscFunctionBegin; 966 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 967 968 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 969 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 970 tmp = a->solve_work; 971 972 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 973 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 974 975 /* forward solve the lower triangular */ 976 tmp[0] = b[*r++]; 977 for (i=1; i<n; i++) { 978 v = aa + ai[i] ; 979 vi = aj + ai[i] ; 980 nz = a->diag[i] - ai[i]; 981 sum = b[*r++]; 982 while (nz--) sum -= *v++ * tmp[*vi++ ]; 983 tmp[i] = sum; 984 } 985 986 /* backward solve the upper triangular */ 987 for (i=n-1; i>=0; i--){ 988 v = aa + a->diag[i] + 1; 989 vi = aj + a->diag[i] + 1; 990 nz = ai[i+1] - a->diag[i] - 1; 991 sum = tmp[i]; 992 while (nz--) sum -= *v++ * tmp[*vi++ ]; 993 tmp[i] = sum*aa[a->diag[i]]; 994 x[*c--] += tmp[i]; 995 } 996 997 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 998 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 999 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1000 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1001 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1002 1003 PetscFunctionReturn(0); 1004 } 1005 /* -------------------------------------------------------------------*/ 1006 #undef __FUNCT__ 1007 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1008 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1009 { 1010 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1011 IS iscol = a->col,isrow = a->row; 1012 PetscErrorCode ierr; 1013 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1014 PetscInt nz,*rout,*cout,*diag = a->diag; 1015 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 1016 1017 PetscFunctionBegin; 1018 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1019 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1020 tmp = a->solve_work; 1021 1022 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1023 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1024 1025 /* copy the b into temp work space according to permutation */ 1026 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1027 1028 /* forward solve the U^T */ 1029 for (i=0; i<n; i++) { 1030 v = aa + diag[i] ; 1031 vi = aj + diag[i] + 1; 1032 nz = ai[i+1] - diag[i] - 1; 1033 s1 = tmp[i]; 1034 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1035 while (nz--) { 1036 tmp[*vi++ ] -= (*v++)*s1; 1037 } 1038 tmp[i] = s1; 1039 } 1040 1041 /* backward solve the L^T */ 1042 for (i=n-1; i>=0; i--){ 1043 v = aa + diag[i] - 1 ; 1044 vi = aj + diag[i] - 1 ; 1045 nz = diag[i] - ai[i]; 1046 s1 = tmp[i]; 1047 while (nz--) { 1048 tmp[*vi-- ] -= (*v--)*s1; 1049 } 1050 } 1051 1052 /* copy tmp into x according to permutation */ 1053 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1054 1055 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1056 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1057 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1058 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1059 1060 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 1061 PetscFunctionReturn(0); 1062 } 1063 1064 #undef __FUNCT__ 1065 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1066 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1067 { 1068 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1069 IS iscol = a->col,isrow = a->row; 1070 PetscErrorCode ierr; 1071 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1072 PetscInt nz,*rout,*cout,*diag = a->diag; 1073 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 1074 1075 PetscFunctionBegin; 1076 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1077 1078 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1079 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1080 tmp = a->solve_work; 1081 1082 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1083 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1084 1085 /* copy the b into temp work space according to permutation */ 1086 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1087 1088 /* forward solve the U^T */ 1089 for (i=0; i<n; i++) { 1090 v = aa + diag[i] ; 1091 vi = aj + diag[i] + 1; 1092 nz = ai[i+1] - diag[i] - 1; 1093 tmp[i] *= *v++; 1094 while (nz--) { 1095 tmp[*vi++ ] -= (*v++)*tmp[i]; 1096 } 1097 } 1098 1099 /* backward solve the L^T */ 1100 for (i=n-1; i>=0; i--){ 1101 v = aa + diag[i] - 1 ; 1102 vi = aj + diag[i] - 1 ; 1103 nz = diag[i] - ai[i]; 1104 while (nz--) { 1105 tmp[*vi-- ] -= (*v--)*tmp[i]; 1106 } 1107 } 1108 1109 /* copy tmp into x according to permutation */ 1110 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1111 1112 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1113 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1114 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1115 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1116 1117 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 /* ----------------------------------------------------------------*/ 1121 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1122 1123 #undef __FUNCT__ 1124 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1125 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1126 { 1127 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1128 IS isicol; 1129 PetscErrorCode ierr; 1130 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 1131 PetscInt *bi,*cols,nnz,*cols_lvl; 1132 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1133 PetscInt i,levels,diagonal_fill; 1134 PetscTruth col_identity,row_identity; 1135 PetscReal f; 1136 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1137 PetscBT lnkbt; 1138 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1139 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1140 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1141 PetscTruth missing; 1142 1143 PetscFunctionBegin; 1144 f = info->fill; 1145 levels = (PetscInt)info->levels; 1146 diagonal_fill = (PetscInt)info->diagonal_fill; 1147 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1148 1149 /* special case that simply copies fill pattern */ 1150 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1151 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1152 if (!levels && row_identity && col_identity) { 1153 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1154 (*fact)->factor = FACTOR_LU; 1155 (*fact)->info.factor_mallocs = 0; 1156 (*fact)->info.fill_ratio_given = info->fill; 1157 (*fact)->info.fill_ratio_needed = 1.0; 1158 b = (Mat_SeqAIJ*)(*fact)->data; 1159 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1160 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1161 b->row = isrow; 1162 b->col = iscol; 1163 b->icol = isicol; 1164 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1165 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1166 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1167 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1168 ierr = Mat_CheckInode(*fact,PETSC_FALSE);CHKERRQ(ierr); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1173 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1174 1175 /* get new row pointers */ 1176 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1177 bi[0] = 0; 1178 /* bdiag is location of diagonal in factor */ 1179 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1180 bdiag[0] = 0; 1181 1182 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1183 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1184 1185 /* create a linked list for storing column indices of the active row */ 1186 nlnk = n + 1; 1187 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1188 1189 /* initial FreeSpace size is f*(ai[n]+1) */ 1190 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1191 current_space = free_space; 1192 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1193 current_space_lvl = free_space_lvl; 1194 1195 for (i=0; i<n; i++) { 1196 nzi = 0; 1197 /* copy current row into linked list */ 1198 nnz = ai[r[i]+1] - ai[r[i]]; 1199 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1200 cols = aj + ai[r[i]]; 1201 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1202 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1203 nzi += nlnk; 1204 1205 /* make sure diagonal entry is included */ 1206 if (diagonal_fill && lnk[i] == -1) { 1207 fm = n; 1208 while (lnk[fm] < i) fm = lnk[fm]; 1209 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1210 lnk[fm] = i; 1211 lnk_lvl[i] = 0; 1212 nzi++; dcount++; 1213 } 1214 1215 /* add pivot rows into the active row */ 1216 nzbd = 0; 1217 prow = lnk[n]; 1218 while (prow < i) { 1219 nnz = bdiag[prow]; 1220 cols = bj_ptr[prow] + nnz + 1; 1221 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1222 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1223 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1224 nzi += nlnk; 1225 prow = lnk[prow]; 1226 nzbd++; 1227 } 1228 bdiag[i] = nzbd; 1229 bi[i+1] = bi[i] + nzi; 1230 1231 /* if free space is not available, make more free space */ 1232 if (current_space->local_remaining<nzi) { 1233 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1234 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1235 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1236 reallocs++; 1237 } 1238 1239 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1240 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1241 bj_ptr[i] = current_space->array; 1242 bjlvl_ptr[i] = current_space_lvl->array; 1243 1244 /* make sure the active row i has diagonal entry */ 1245 if (*(bj_ptr[i]+bdiag[i]) != i) { 1246 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1247 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1248 } 1249 1250 current_space->array += nzi; 1251 current_space->local_used += nzi; 1252 current_space->local_remaining -= nzi; 1253 current_space_lvl->array += nzi; 1254 current_space_lvl->local_used += nzi; 1255 current_space_lvl->local_remaining -= nzi; 1256 } 1257 1258 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1259 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1260 1261 /* destroy list of free space and other temporary arrays */ 1262 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1263 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1264 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1265 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1266 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1267 1268 #if defined(PETSC_USE_INFO) 1269 { 1270 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1271 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1272 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1273 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1274 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1275 if (diagonal_fill) { 1276 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1277 } 1278 } 1279 #endif 1280 1281 /* put together the new matrix */ 1282 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 1283 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1284 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 1285 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1286 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1287 b = (Mat_SeqAIJ*)(*fact)->data; 1288 b->free_a = PETSC_TRUE; 1289 b->free_ij = PETSC_TRUE; 1290 b->singlemalloc = PETSC_FALSE; 1291 len = (bi[n] )*sizeof(PetscScalar); 1292 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1293 b->j = bj; 1294 b->i = bi; 1295 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1296 b->diag = bdiag; 1297 b->ilen = 0; 1298 b->imax = 0; 1299 b->row = isrow; 1300 b->col = iscol; 1301 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1302 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1303 b->icol = isicol; 1304 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1305 /* In b structure: Free imax, ilen, old a, old j. 1306 Allocate bdiag, solve_work, new a, new j */ 1307 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1308 b->maxnz = b->nz = bi[n] ; 1309 (*fact)->factor = FACTOR_LU; 1310 (*fact)->info.factor_mallocs = reallocs; 1311 (*fact)->info.fill_ratio_given = f; 1312 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1313 1314 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1315 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1316 1317 PetscFunctionReturn(0); 1318 } 1319 1320 #include "src/mat/impls/sbaij/seq/sbaij.h" 1321 #undef __FUNCT__ 1322 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1323 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1324 { 1325 Mat C = *B; 1326 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1327 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1328 IS ip=b->row,iip = b->icol; 1329 PetscErrorCode ierr; 1330 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1331 PetscInt *ai=a->i,*aj=a->j; 1332 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1333 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1334 PetscReal zeropivot,rs,shiftnz; 1335 PetscReal shiftpd; 1336 ChShift_Ctx sctx; 1337 PetscInt newshift; 1338 1339 PetscFunctionBegin; 1340 1341 shiftnz = info->shiftnz; 1342 shiftpd = info->shiftpd; 1343 zeropivot = info->zeropivot; 1344 1345 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1346 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1347 1348 /* initialization */ 1349 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1350 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1351 jl = il + mbs; 1352 rtmp = (MatScalar*)(jl + mbs); 1353 1354 sctx.shift_amount = 0; 1355 sctx.nshift = 0; 1356 do { 1357 sctx.chshift = PETSC_FALSE; 1358 for (i=0; i<mbs; i++) { 1359 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1360 } 1361 1362 for (k = 0; k<mbs; k++){ 1363 bval = ba + bi[k]; 1364 /* initialize k-th row by the perm[k]-th row of A */ 1365 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1366 for (j = jmin; j < jmax; j++){ 1367 col = riip[aj[j]]; 1368 if (col >= k){ /* only take upper triangular entry */ 1369 rtmp[col] = aa[j]; 1370 *bval++ = 0.0; /* for in-place factorization */ 1371 } 1372 } 1373 /* shift the diagonal of the matrix */ 1374 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1375 1376 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1377 dk = rtmp[k]; 1378 i = jl[k]; /* first row to be added to k_th row */ 1379 1380 while (i < k){ 1381 nexti = jl[i]; /* next row to be added to k_th row */ 1382 1383 /* compute multiplier, update diag(k) and U(i,k) */ 1384 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1385 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1386 dk += uikdi*ba[ili]; 1387 ba[ili] = uikdi; /* -U(i,k) */ 1388 1389 /* add multiple of row i to k-th row */ 1390 jmin = ili + 1; jmax = bi[i+1]; 1391 if (jmin < jmax){ 1392 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1393 /* update il and jl for row i */ 1394 il[i] = jmin; 1395 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1396 } 1397 i = nexti; 1398 } 1399 1400 /* shift the diagonals when zero pivot is detected */ 1401 /* compute rs=sum of abs(off-diagonal) */ 1402 rs = 0.0; 1403 jmin = bi[k]+1; 1404 nz = bi[k+1] - jmin; 1405 bcol = bj + jmin; 1406 while (nz--){ 1407 rs += PetscAbsScalar(rtmp[*bcol]); 1408 bcol++; 1409 } 1410 1411 sctx.rs = rs; 1412 sctx.pv = dk; 1413 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1414 1415 if (newshift == 1) { 1416 if (!sctx.shift_amount) { 1417 sctx.shift_amount = 1e-5; 1418 } 1419 break; 1420 } 1421 1422 /* copy data into U(k,:) */ 1423 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1424 jmin = bi[k]+1; jmax = bi[k+1]; 1425 if (jmin < jmax) { 1426 for (j=jmin; j<jmax; j++){ 1427 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1428 } 1429 /* add the k-th row into il and jl */ 1430 il[k] = jmin; 1431 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1432 } 1433 } 1434 } while (sctx.chshift); 1435 ierr = PetscFree(il);CHKERRQ(ierr); 1436 1437 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1438 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1439 C->factor = FACTOR_CHOLESKY; 1440 C->assembled = PETSC_TRUE; 1441 C->preallocated = PETSC_TRUE; 1442 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1443 if (sctx.nshift){ 1444 if (shiftnz) { 1445 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1446 } else if (shiftpd) { 1447 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1448 } 1449 } 1450 PetscFunctionReturn(0); 1451 } 1452 1453 #undef __FUNCT__ 1454 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1455 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1456 { 1457 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1458 Mat_SeqSBAIJ *b; 1459 Mat B; 1460 PetscErrorCode ierr; 1461 PetscTruth perm_identity,missing; 1462 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1463 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1464 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1465 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1466 PetscReal fill=info->fill,levels=info->levels; 1467 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1468 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1469 PetscBT lnkbt; 1470 IS iperm; 1471 1472 PetscFunctionBegin; 1473 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1474 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1475 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1476 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1477 1478 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1479 ui[0] = 0; 1480 1481 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1482 if (!levels && perm_identity) { 1483 1484 for (i=0; i<am; i++) { 1485 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1486 } 1487 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1488 cols = uj; 1489 for (i=0; i<am; i++) { 1490 aj = a->j + a->diag[i]; 1491 ncols = ui[i+1] - ui[i]; 1492 for (j=0; j<ncols; j++) *cols++ = *aj++; 1493 } 1494 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1495 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1496 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1497 1498 /* initialization */ 1499 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1500 1501 /* jl: linked list for storing indices of the pivot rows 1502 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1503 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1504 il = jl + am; 1505 uj_ptr = (PetscInt**)(il + am); 1506 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1507 for (i=0; i<am; i++){ 1508 jl[i] = am; il[i] = 0; 1509 } 1510 1511 /* create and initialize a linked list for storing column indices of the active row k */ 1512 nlnk = am + 1; 1513 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1514 1515 /* initial FreeSpace size is fill*(ai[am]+1) */ 1516 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1517 current_space = free_space; 1518 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1519 current_space_lvl = free_space_lvl; 1520 1521 for (k=0; k<am; k++){ /* for each active row k */ 1522 /* initialize lnk by the column indices of row rip[k] of A */ 1523 nzk = 0; 1524 ncols = ai[rip[k]+1] - ai[rip[k]]; 1525 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1526 ncols_upper = 0; 1527 for (j=0; j<ncols; j++){ 1528 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1529 if (riip[i] >= k){ /* only take upper triangular entry */ 1530 ajtmp[ncols_upper] = i; 1531 ncols_upper++; 1532 } 1533 } 1534 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1535 nzk += nlnk; 1536 1537 /* update lnk by computing fill-in for each pivot row to be merged in */ 1538 prow = jl[k]; /* 1st pivot row */ 1539 1540 while (prow < k){ 1541 nextprow = jl[prow]; 1542 1543 /* merge prow into k-th row */ 1544 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1545 jmax = ui[prow+1]; 1546 ncols = jmax-jmin; 1547 i = jmin - ui[prow]; 1548 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1549 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1550 j = *(uj - 1); 1551 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1552 nzk += nlnk; 1553 1554 /* update il and jl for prow */ 1555 if (jmin < jmax){ 1556 il[prow] = jmin; 1557 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1558 } 1559 prow = nextprow; 1560 } 1561 1562 /* if free space is not available, make more free space */ 1563 if (current_space->local_remaining<nzk) { 1564 i = am - k + 1; /* num of unfactored rows */ 1565 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1566 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1567 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1568 reallocs++; 1569 } 1570 1571 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1572 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1573 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1574 1575 /* add the k-th row into il and jl */ 1576 if (nzk > 1){ 1577 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1578 jl[k] = jl[i]; jl[i] = k; 1579 il[k] = ui[k] + 1; 1580 } 1581 uj_ptr[k] = current_space->array; 1582 uj_lvl_ptr[k] = current_space_lvl->array; 1583 1584 current_space->array += nzk; 1585 current_space->local_used += nzk; 1586 current_space->local_remaining -= nzk; 1587 1588 current_space_lvl->array += nzk; 1589 current_space_lvl->local_used += nzk; 1590 current_space_lvl->local_remaining -= nzk; 1591 1592 ui[k+1] = ui[k] + nzk; 1593 } 1594 1595 #if defined(PETSC_USE_INFO) 1596 if (ai[am] != 0) { 1597 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1598 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1599 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1600 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1601 } else { 1602 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1603 } 1604 #endif 1605 1606 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1607 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1608 ierr = PetscFree(jl);CHKERRQ(ierr); 1609 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1610 1611 /* destroy list of free space and other temporary array(s) */ 1612 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1613 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1614 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1615 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1616 1617 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1618 1619 /* put together the new matrix in MATSEQSBAIJ format */ 1620 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1621 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1622 B = *fact; 1623 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1624 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1625 1626 b = (Mat_SeqSBAIJ*)B->data; 1627 b->singlemalloc = PETSC_FALSE; 1628 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1629 b->j = uj; 1630 b->i = ui; 1631 b->diag = 0; 1632 b->ilen = 0; 1633 b->imax = 0; 1634 b->row = perm; 1635 b->col = perm; 1636 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1637 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1638 b->icol = iperm; 1639 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1640 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1641 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1642 b->maxnz = b->nz = ui[am]; 1643 b->free_a = PETSC_TRUE; 1644 b->free_ij = PETSC_TRUE; 1645 1646 B->factor = FACTOR_CHOLESKY; 1647 B->info.factor_mallocs = reallocs; 1648 B->info.fill_ratio_given = fill; 1649 if (ai[am] != 0) { 1650 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1651 } else { 1652 B->info.fill_ratio_needed = 0.0; 1653 } 1654 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1655 if (perm_identity){ 1656 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1657 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1658 } 1659 PetscFunctionReturn(0); 1660 } 1661 1662 #undef __FUNCT__ 1663 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1664 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1665 { 1666 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1667 Mat_SeqSBAIJ *b; 1668 Mat B; 1669 PetscErrorCode ierr; 1670 PetscTruth perm_identity; 1671 PetscReal fill = info->fill; 1672 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1673 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1674 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1675 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1676 PetscBT lnkbt; 1677 IS iperm; 1678 1679 PetscFunctionBegin; 1680 /* check whether perm is the identity mapping */ 1681 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1682 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1683 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1684 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1685 1686 /* initialization */ 1687 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1688 ui[0] = 0; 1689 1690 /* jl: linked list for storing indices of the pivot rows 1691 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1692 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1693 il = jl + am; 1694 cols = il + am; 1695 ui_ptr = (PetscInt**)(cols + am); 1696 for (i=0; i<am; i++){ 1697 jl[i] = am; il[i] = 0; 1698 } 1699 1700 /* create and initialize a linked list for storing column indices of the active row k */ 1701 nlnk = am + 1; 1702 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1703 1704 /* initial FreeSpace size is fill*(ai[am]+1) */ 1705 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1706 current_space = free_space; 1707 1708 for (k=0; k<am; k++){ /* for each active row k */ 1709 /* initialize lnk by the column indices of row rip[k] of A */ 1710 nzk = 0; 1711 ncols = ai[rip[k]+1] - ai[rip[k]]; 1712 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1713 ncols_upper = 0; 1714 for (j=0; j<ncols; j++){ 1715 i = riip[*(aj + ai[rip[k]] + j)]; 1716 if (i >= k){ /* only take upper triangular entry */ 1717 cols[ncols_upper] = i; 1718 ncols_upper++; 1719 } 1720 } 1721 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1722 nzk += nlnk; 1723 1724 /* update lnk by computing fill-in for each pivot row to be merged in */ 1725 prow = jl[k]; /* 1st pivot row */ 1726 1727 while (prow < k){ 1728 nextprow = jl[prow]; 1729 /* merge prow into k-th row */ 1730 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1731 jmax = ui[prow+1]; 1732 ncols = jmax-jmin; 1733 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1734 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1735 nzk += nlnk; 1736 1737 /* update il and jl for prow */ 1738 if (jmin < jmax){ 1739 il[prow] = jmin; 1740 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1741 } 1742 prow = nextprow; 1743 } 1744 1745 /* if free space is not available, make more free space */ 1746 if (current_space->local_remaining<nzk) { 1747 i = am - k + 1; /* num of unfactored rows */ 1748 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1749 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1750 reallocs++; 1751 } 1752 1753 /* copy data into free space, then initialize lnk */ 1754 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1755 1756 /* add the k-th row into il and jl */ 1757 if (nzk-1 > 0){ 1758 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1759 jl[k] = jl[i]; jl[i] = k; 1760 il[k] = ui[k] + 1; 1761 } 1762 ui_ptr[k] = current_space->array; 1763 current_space->array += nzk; 1764 current_space->local_used += nzk; 1765 current_space->local_remaining -= nzk; 1766 1767 ui[k+1] = ui[k] + nzk; 1768 } 1769 1770 #if defined(PETSC_USE_INFO) 1771 if (ai[am] != 0) { 1772 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1773 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1774 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1775 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1776 } else { 1777 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1778 } 1779 #endif 1780 1781 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1782 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1783 ierr = PetscFree(jl);CHKERRQ(ierr); 1784 1785 /* destroy list of free space and other temporary array(s) */ 1786 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1787 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1788 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1789 1790 /* put together the new matrix in MATSEQSBAIJ format */ 1791 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1792 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1793 B = *fact; 1794 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1795 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1796 1797 b = (Mat_SeqSBAIJ*)B->data; 1798 b->singlemalloc = PETSC_FALSE; 1799 b->free_a = PETSC_TRUE; 1800 b->free_ij = PETSC_TRUE; 1801 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1802 b->j = uj; 1803 b->i = ui; 1804 b->diag = 0; 1805 b->ilen = 0; 1806 b->imax = 0; 1807 b->row = perm; 1808 b->col = perm; 1809 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1810 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1811 b->icol = iperm; 1812 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1813 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1814 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1815 b->maxnz = b->nz = ui[am]; 1816 1817 B->factor = FACTOR_CHOLESKY; 1818 B->info.factor_mallocs = reallocs; 1819 B->info.fill_ratio_given = fill; 1820 if (ai[am] != 0) { 1821 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1822 } else { 1823 B->info.fill_ratio_needed = 0.0; 1824 } 1825 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1826 if (perm_identity){ 1827 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1828 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1829 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1830 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1831 } else { 1832 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1; 1833 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1834 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1835 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1836 } 1837 PetscFunctionReturn(0); 1838 } 1839