1 #define PETSCMAT_DLL 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/inline/dot.h" 5 #include "src/inline/spops.h" 6 #include "petscbt.h" 7 #include "src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 22 #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 23 EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*); 24 EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*); 25 EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*); 26 #endif 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 30 /* ------------------------------------------------------------ 31 32 This interface was contribed by Tony Caola 33 34 This routine is an interface to the pivoting drop-tolerance 35 ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of 36 SPARSEKIT2. 37 38 The SPARSEKIT2 routines used here are covered by the GNU 39 copyright; see the file gnu in this directory. 40 41 Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their 42 help in getting this routine ironed out. 43 44 The major drawback to this routine is that if info->fill is 45 not large enough it fails rather than allocating more space; 46 this can be fixed by hacking/improving the f2c version of 47 Yousef Saad's code. 48 49 ------------------------------------------------------------ 50 */ 51 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 52 { 53 #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE) 54 PetscFunctionBegin; 55 SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\ 56 You can obtain the drop tolerance routines by installing PETSc from\n\ 57 www.mcs.anl.gov/petsc\n"); 58 #else 59 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 60 IS iscolf,isicol,isirow; 61 PetscTruth reorder; 62 PetscErrorCode ierr,sierr; 63 PetscInt *c,*r,*ic,i,n = A->rmap.n; 64 PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j; 65 PetscInt *ordcol,*iwk,*iperm,*jw; 66 PetscInt jmax,lfill,job,*o_i,*o_j; 67 PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a; 68 PetscReal af; 69 70 PetscFunctionBegin; 71 72 if (info->dt == PETSC_DEFAULT) info->dt = .005; 73 if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax); 74 if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01; 75 if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz; 76 lfill = (PetscInt)(info->dtcount/2.0); 77 jmax = (PetscInt)(info->fill*a->nz); 78 79 80 /* ------------------------------------------------------------ 81 If reorder=.TRUE., then the original matrix has to be 82 reordered to reflect the user selected ordering scheme, and 83 then de-reordered so it is in it's original format. 84 Because Saad's dperm() is NOT in place, we have to copy 85 the original matrix and allocate more storage. . . 86 ------------------------------------------------------------ 87 */ 88 89 /* set reorder to true if either isrow or iscol is not identity */ 90 ierr = ISIdentity(isrow,&reorder);CHKERRQ(ierr); 91 if (reorder) {ierr = ISIdentity(iscol,&reorder);CHKERRQ(ierr);} 92 reorder = PetscNot(reorder); 93 94 95 /* storage for ilu factor */ 96 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&new_i);CHKERRQ(ierr); 97 ierr = PetscMalloc(jmax*sizeof(PetscInt),&new_j);CHKERRQ(ierr); 98 ierr = PetscMalloc(jmax*sizeof(PetscScalar),&new_a);CHKERRQ(ierr); 99 ierr = PetscMalloc(n*sizeof(PetscInt),&ordcol);CHKERRQ(ierr); 100 101 /* ------------------------------------------------------------ 102 Make sure that everything is Fortran formatted (1-Based) 103 ------------------------------------------------------------ 104 */ 105 for (i=old_i[0];i<old_i[n];i++) { 106 old_j[i]++; 107 } 108 for(i=0;i<n+1;i++) { 109 old_i[i]++; 110 }; 111 112 113 if (reorder) { 114 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); 115 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 116 for(i=0;i<n;i++) { 117 r[i] = r[i]+1; 118 c[i] = c[i]+1; 119 } 120 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);CHKERRQ(ierr); 121 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);CHKERRQ(ierr); 122 ierr = PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);CHKERRQ(ierr); 123 job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job); 124 for (i=0;i<n;i++) { 125 r[i] = r[i]-1; 126 c[i] = c[i]-1; 127 } 128 ierr = ISRestoreIndices(iscol,&c);CHKERRQ(ierr); 129 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 130 o_a = old_a2; 131 o_j = old_j2; 132 o_i = old_i2; 133 } else { 134 o_a = old_a; 135 o_j = old_j; 136 o_i = old_i; 137 } 138 139 /* ------------------------------------------------------------ 140 Call Saad's ilutp() routine to generate the factorization 141 ------------------------------------------------------------ 142 */ 143 144 ierr = PetscMalloc(2*n*sizeof(PetscInt),&iperm);CHKERRQ(ierr); 145 ierr = PetscMalloc(2*n*sizeof(PetscInt),&jw);CHKERRQ(ierr); 146 ierr = PetscMalloc(n*sizeof(PetscScalar),&w);CHKERRQ(ierr); 147 148 SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr); 149 if (sierr) { 150 switch (sierr) { 151 case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 152 case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax); 153 case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered"); 154 case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong"); 155 case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax); 156 default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr); 157 } 158 } 159 160 ierr = PetscFree(w);CHKERRQ(ierr); 161 ierr = PetscFree(jw);CHKERRQ(ierr); 162 163 /* ------------------------------------------------------------ 164 Saad's routine gives the result in Modified Sparse Row (msr) 165 Convert to Compressed Sparse Row format (csr) 166 ------------------------------------------------------------ 167 */ 168 169 ierr = PetscMalloc(n*sizeof(PetscScalar),&wk);CHKERRQ(ierr); 170 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&iwk);CHKERRQ(ierr); 171 172 SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk); 173 174 ierr = PetscFree(iwk);CHKERRQ(ierr); 175 ierr = PetscFree(wk);CHKERRQ(ierr); 176 177 if (reorder) { 178 ierr = PetscFree(old_a2);CHKERRQ(ierr); 179 ierr = PetscFree(old_j2);CHKERRQ(ierr); 180 ierr = PetscFree(old_i2);CHKERRQ(ierr); 181 } else { 182 /* fix permutation of old_j that the factorization introduced */ 183 for (i=old_i[0]; i<old_i[n]; i++) { 184 old_j[i-1] = iperm[old_j[i-1]-1]; 185 } 186 } 187 188 /* get rid of the shift to indices starting at 1 */ 189 for (i=0; i<n+1; i++) { 190 old_i[i]--; 191 } 192 for (i=old_i[0];i<old_i[n];i++) { 193 old_j[i]--; 194 } 195 196 /* Make the factored matrix 0-based */ 197 for (i=0; i<n+1; i++) { 198 new_i[i]--; 199 } 200 for (i=new_i[0];i<new_i[n];i++) { 201 new_j[i]--; 202 } 203 204 /*-- due to the pivoting, we need to reorder iscol to correctly --*/ 205 /*-- permute the right-hand-side and solution vectors --*/ 206 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 207 ierr = ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);CHKERRQ(ierr); 208 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 209 for(i=0; i<n; i++) { 210 ordcol[i] = ic[iperm[i]-1]; 211 }; 212 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 213 ierr = ISDestroy(isicol);CHKERRQ(ierr); 214 215 ierr = PetscFree(iperm);CHKERRQ(ierr); 216 217 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);CHKERRQ(ierr); 218 ierr = PetscFree(ordcol);CHKERRQ(ierr); 219 220 /*----- put together the new matrix -----*/ 221 222 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 223 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 224 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 225 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 226 (*fact)->factor = FACTOR_LU; 227 (*fact)->assembled = PETSC_TRUE; 228 229 b = (Mat_SeqAIJ*)(*fact)->data; 230 b->free_a = PETSC_TRUE; 231 b->free_ij = PETSC_TRUE; 232 b->singlemalloc = PETSC_FALSE; 233 b->a = new_a; 234 b->j = new_j; 235 b->i = new_i; 236 b->ilen = 0; 237 b->imax = 0; 238 /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */ 239 b->row = isirow; 240 b->col = iscolf; 241 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 242 b->maxnz = b->nz = new_i[n]; 243 ierr = MatMarkDiagonal_SeqAIJ(*fact);CHKERRQ(ierr); 244 (*fact)->info.factor_mallocs = 0; 245 246 af = ((double)b->nz)/((double)a->nz) + .001; 247 ierr = PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);CHKERRQ(ierr); 248 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 249 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 250 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 251 252 ierr = MatILUDTFactor_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 253 254 PetscFunctionReturn(0); 255 #endif 256 } 257 258 #undef __FUNCT__ 259 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 260 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B) 261 { 262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 263 IS isicol; 264 PetscErrorCode ierr; 265 PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j; 266 PetscInt *bi,*bj,*ajtmp; 267 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 268 PetscReal f; 269 PetscInt nlnk,*lnk,k,**bi_ptr; 270 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 271 PetscBT lnkbt; 272 273 PetscFunctionBegin; 274 if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 275 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 276 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 277 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 278 279 /* get new row pointers */ 280 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 281 bi[0] = 0; 282 283 /* bdiag is location of diagonal in factor */ 284 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 285 bdiag[0] = 0; 286 287 /* linked list for storing column indices of the active row */ 288 nlnk = n + 1; 289 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 290 291 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 292 293 /* initial FreeSpace size is f*(ai[n]+1) */ 294 f = info->fill; 295 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 296 current_space = free_space; 297 298 for (i=0; i<n; i++) { 299 /* copy previous fill into linked list */ 300 nzi = 0; 301 nnz = ai[r[i]+1] - ai[r[i]]; 302 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 303 ajtmp = aj + ai[r[i]]; 304 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 305 nzi += nlnk; 306 307 /* add pivot rows into linked list */ 308 row = lnk[n]; 309 while (row < i) { 310 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 311 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 312 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 313 nzi += nlnk; 314 row = lnk[row]; 315 } 316 bi[i+1] = bi[i] + nzi; 317 im[i] = nzi; 318 319 /* mark bdiag */ 320 nzbd = 0; 321 nnz = nzi; 322 k = lnk[n]; 323 while (nnz-- && k < i){ 324 nzbd++; 325 k = lnk[k]; 326 } 327 bdiag[i] = bi[i] + nzbd; 328 329 /* if free space is not available, make more free space */ 330 if (current_space->local_remaining<nzi) { 331 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 332 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 333 reallocs++; 334 } 335 336 /* copy data into free space, then initialize lnk */ 337 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 338 bi_ptr[i] = current_space->array; 339 current_space->array += nzi; 340 current_space->local_used += nzi; 341 current_space->local_remaining -= nzi; 342 } 343 #if defined(PETSC_USE_INFO) 344 if (ai[n] != 0) { 345 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 346 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 347 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 348 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 349 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 350 } else { 351 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 352 } 353 #endif 354 355 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 356 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 357 358 /* destroy list of free space and other temporary array(s) */ 359 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 360 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 361 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 362 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 363 364 /* put together the new matrix */ 365 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 366 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 367 ierr = MatSetType(*B,((PetscObject)A)->type_name);CHKERRQ(ierr); 368 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 369 ierr = PetscLogObjectParent(*B,isicol);CHKERRQ(ierr); 370 b = (Mat_SeqAIJ*)(*B)->data; 371 b->free_a = PETSC_TRUE; 372 b->free_ij = PETSC_TRUE; 373 b->singlemalloc = PETSC_FALSE; 374 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 375 b->j = bj; 376 b->i = bi; 377 b->diag = bdiag; 378 b->ilen = 0; 379 b->imax = 0; 380 b->row = isrow; 381 b->col = iscol; 382 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 383 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 384 b->icol = isicol; 385 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 386 387 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 388 ierr = PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 389 b->maxnz = b->nz = bi[n] ; 390 391 (*B)->factor = FACTOR_LU; 392 (*B)->info.factor_mallocs = reallocs; 393 (*B)->info.fill_ratio_given = f; 394 395 if (ai[n] != 0) { 396 (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 397 } else { 398 (*B)->info.fill_ratio_needed = 0.0; 399 } 400 ierr = MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);CHKERRQ(ierr); 401 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 402 PetscFunctionReturn(0); 403 } 404 405 /* 406 Trouble in factorization, should we dump the original matrix? 407 */ 408 #undef __FUNCT__ 409 #define __FUNCT__ "MatFactorDumpMatrix" 410 PetscErrorCode MatFactorDumpMatrix(Mat A) 411 { 412 PetscErrorCode ierr; 413 PetscTruth flg; 414 415 PetscFunctionBegin; 416 ierr = PetscOptionsHasName(PETSC_NULL,"-mat_factor_dump_on_error",&flg);CHKERRQ(ierr); 417 if (flg) { 418 PetscViewer viewer; 419 char filename[PETSC_MAX_PATH_LEN]; 420 421 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 422 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 423 ierr = MatView(A,viewer);CHKERRQ(ierr); 424 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 425 } 426 PetscFunctionReturn(0); 427 } 428 429 /* ----------------------------------------------------------- */ 430 #undef __FUNCT__ 431 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 432 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 433 { 434 Mat C=*B; 435 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 436 IS isrow = b->row,isicol = b->icol; 437 PetscErrorCode ierr; 438 PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j; 439 PetscInt *ajtmp,*bjtmp,nz,row,*ics; 440 PetscInt *diag_offset = b->diag,diag,*pj; 441 PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps; 442 PetscScalar d; 443 PetscReal rs; 444 LUShift_Ctx sctx; 445 PetscInt newshift,*ddiag; 446 447 PetscFunctionBegin; 448 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 449 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 450 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 451 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 452 rtmps = rtmp; ics = ic; 453 454 sctx.shift_top = 0; 455 sctx.nshift_max = 0; 456 sctx.shift_lo = 0; 457 sctx.shift_hi = 0; 458 459 /* if both shift schemes are chosen by user, only use info->shiftpd */ 460 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 461 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 462 PetscInt *aai = a->i; 463 ddiag = a->diag; 464 sctx.shift_top = 0; 465 for (i=0; i<n; i++) { 466 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 467 d = (a->a)[ddiag[i]]; 468 rs = -PetscAbsScalar(d) - PetscRealPart(d); 469 v = a->a+aai[i]; 470 nz = aai[i+1] - aai[i]; 471 for (j=0; j<nz; j++) 472 rs += PetscAbsScalar(v[j]); 473 if (rs>sctx.shift_top) sctx.shift_top = rs; 474 } 475 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 476 sctx.shift_top *= 1.1; 477 sctx.nshift_max = 5; 478 sctx.shift_lo = 0.; 479 sctx.shift_hi = 1.; 480 } 481 482 sctx.shift_amount = 0; 483 sctx.nshift = 0; 484 do { 485 sctx.lushift = PETSC_FALSE; 486 for (i=0; i<n; i++){ 487 nz = bi[i+1] - bi[i]; 488 bjtmp = bj + bi[i]; 489 for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0; 490 491 /* load in initial (unfactored row) */ 492 nz = a->i[r[i]+1] - a->i[r[i]]; 493 ajtmp = a->j + a->i[r[i]]; 494 v = a->a + a->i[r[i]]; 495 for (j=0; j<nz; j++) { 496 rtmp[ics[ajtmp[j]]] = v[j]; 497 } 498 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 499 500 row = *bjtmp++; 501 while (row < i) { 502 pc = rtmp + row; 503 if (*pc != 0.0) { 504 pv = b->a + diag_offset[row]; 505 pj = b->j + diag_offset[row] + 1; 506 multiplier = *pc / *pv++; 507 *pc = multiplier; 508 nz = bi[row+1] - diag_offset[row] - 1; 509 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 510 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 511 } 512 row = *bjtmp++; 513 } 514 /* finished row so stick it into b->a */ 515 pv = b->a + bi[i] ; 516 pj = b->j + bi[i] ; 517 nz = bi[i+1] - bi[i]; 518 diag = diag_offset[i] - bi[i]; 519 rs = 0.0; 520 for (j=0; j<nz; j++) { 521 pv[j] = rtmps[pj[j]]; 522 if (j != diag) rs += PetscAbsScalar(pv[j]); 523 } 524 525 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 526 sctx.rs = rs; 527 sctx.pv = pv[diag]; 528 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 529 if (newshift == 1) break; 530 } 531 532 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 533 /* 534 * if no shift in this attempt & shifting & started shifting & can refine, 535 * then try lower shift 536 */ 537 sctx.shift_hi = info->shift_fraction; 538 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 539 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 540 sctx.lushift = PETSC_TRUE; 541 sctx.nshift++; 542 } 543 } while (sctx.lushift); 544 545 /* invert diagonal entries for simplier triangular solves */ 546 for (i=0; i<n; i++) { 547 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 548 } 549 550 ierr = PetscFree(rtmp);CHKERRQ(ierr); 551 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 552 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 553 C->factor = FACTOR_LU; 554 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 555 C->assembled = PETSC_TRUE; 556 ierr = PetscLogFlops(C->cmap.n);CHKERRQ(ierr); 557 if (sctx.nshift){ 558 if (info->shiftnz) { 559 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 560 } else if (info->shiftpd) { 561 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 562 } 563 } 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 This routine implements inplace ILU(0) with row or/and column permutations. 569 Input: 570 A - original matrix 571 Output; 572 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 573 a->j (col index) is permuted by the inverse of colperm, then sorted 574 a->a reordered accordingly with a->j 575 a->diag (ptr to diagonal elements) is updated. 576 */ 577 #undef __FUNCT__ 578 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 579 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat A,MatFactorInfo *info,Mat *B) 580 { 581 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 582 IS isrow = a->row,isicol = a->icol; 583 PetscErrorCode ierr; 584 PetscInt *r,*ic,i,j,n=A->rmap.n,*ai=a->i,*aj=a->j; 585 PetscInt *ajtmp,nz,row,*ics; 586 PetscInt *diag = a->diag,nbdiag,*pj; 587 PetscScalar *rtmp,*v,*pc,multiplier,*pv,d; 588 PetscReal rs; 589 LUShift_Ctx sctx; 590 PetscInt newshift; 591 592 PetscFunctionBegin; 593 if (A != *B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 594 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 595 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 596 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 597 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 598 ics = ic; 599 600 sctx.shift_top = 0; 601 sctx.nshift_max = 0; 602 sctx.shift_lo = 0; 603 sctx.shift_hi = 0; 604 605 /* if both shift schemes are chosen by user, only use info->shiftpd */ 606 if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0; 607 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 608 sctx.shift_top = 0; 609 for (i=0; i<n; i++) { 610 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 611 d = (a->a)[diag[i]]; 612 rs = -PetscAbsScalar(d) - PetscRealPart(d); 613 v = a->a+ai[i]; 614 nz = ai[i+1] - ai[i]; 615 for (j=0; j<nz; j++) 616 rs += PetscAbsScalar(v[j]); 617 if (rs>sctx.shift_top) sctx.shift_top = rs; 618 } 619 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 620 sctx.shift_top *= 1.1; 621 sctx.nshift_max = 5; 622 sctx.shift_lo = 0.; 623 sctx.shift_hi = 1.; 624 } 625 626 sctx.shift_amount = 0; 627 sctx.nshift = 0; 628 do { 629 sctx.lushift = PETSC_FALSE; 630 for (i=0; i<n; i++){ 631 /* load in initial unfactored row */ 632 nz = ai[r[i]+1] - ai[r[i]]; 633 ajtmp = aj + ai[r[i]]; 634 v = a->a + ai[r[i]]; 635 /* sort permuted ajtmp and values v accordingly */ 636 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 637 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 638 639 diag[r[i]] = ai[r[i]]; 640 for (j=0; j<nz; j++) { 641 rtmp[ajtmp[j]] = v[j]; 642 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 643 } 644 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 645 646 row = *ajtmp++; 647 while (row < i) { 648 pc = rtmp + row; 649 if (*pc != 0.0) { 650 pv = a->a + diag[r[row]]; 651 pj = aj + diag[r[row]] + 1; 652 653 multiplier = *pc / *pv++; 654 *pc = multiplier; 655 nz = ai[r[row]+1] - diag[r[row]] - 1; 656 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 657 ierr = PetscLogFlops(2*nz);CHKERRQ(ierr); 658 } 659 row = *ajtmp++; 660 } 661 /* finished row so overwrite it onto a->a */ 662 pv = a->a + ai[r[i]] ; 663 pj = aj + ai[r[i]] ; 664 nz = ai[r[i]+1] - ai[r[i]]; 665 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 666 667 rs = 0.0; 668 for (j=0; j<nz; j++) { 669 pv[j] = rtmp[pj[j]]; 670 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 671 } 672 673 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 674 sctx.rs = rs; 675 sctx.pv = pv[nbdiag]; 676 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 677 if (newshift == 1) break; 678 } 679 680 if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 681 /* 682 * if no shift in this attempt & shifting & started shifting & can refine, 683 * then try lower shift 684 */ 685 sctx.shift_hi = info->shift_fraction; 686 info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 687 sctx.shift_amount = info->shift_fraction * sctx.shift_top; 688 sctx.lushift = PETSC_TRUE; 689 sctx.nshift++; 690 } 691 } while (sctx.lushift); 692 693 /* invert diagonal entries for simplier triangular solves */ 694 for (i=0; i<n; i++) { 695 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 696 } 697 698 ierr = PetscFree(rtmp);CHKERRQ(ierr); 699 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 700 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 701 A->factor = FACTOR_LU; 702 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 703 A->assembled = PETSC_TRUE; 704 ierr = PetscLogFlops(A->cmap.n);CHKERRQ(ierr); 705 if (sctx.nshift){ 706 if (info->shiftnz) { 707 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 708 } else if (info->shiftpd) { 709 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);CHKERRQ(ierr); 710 } 711 } 712 PetscFunctionReturn(0); 713 } 714 715 #undef __FUNCT__ 716 #define __FUNCT__ "MatUsePETSc_SeqAIJ" 717 PetscErrorCode MatUsePETSc_SeqAIJ(Mat A) 718 { 719 PetscFunctionBegin; 720 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 721 A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 722 PetscFunctionReturn(0); 723 } 724 725 726 /* ----------------------------------------------------------- */ 727 #undef __FUNCT__ 728 #define __FUNCT__ "MatLUFactor_SeqAIJ" 729 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info) 730 { 731 PetscErrorCode ierr; 732 Mat C; 733 734 PetscFunctionBegin; 735 ierr = MatLUFactorSymbolic(A,row,col,info,&C);CHKERRQ(ierr); 736 ierr = MatLUFactorNumeric(A,info,&C);CHKERRQ(ierr); 737 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 738 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 739 PetscFunctionReturn(0); 740 } 741 /* ----------------------------------------------------------- */ 742 #undef __FUNCT__ 743 #define __FUNCT__ "MatSolve_SeqAIJ" 744 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 745 { 746 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 747 IS iscol = a->col,isrow = a->row; 748 PetscErrorCode ierr; 749 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 750 PetscInt nz,*rout,*cout; 751 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 752 753 PetscFunctionBegin; 754 if (!n) PetscFunctionReturn(0); 755 756 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 757 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 758 tmp = a->solve_work; 759 760 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 761 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 762 763 /* forward solve the lower triangular */ 764 tmp[0] = b[*r++]; 765 tmps = tmp; 766 for (i=1; i<n; i++) { 767 v = aa + ai[i] ; 768 vi = aj + ai[i] ; 769 nz = a->diag[i] - ai[i]; 770 sum = b[*r++]; 771 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 772 tmp[i] = sum; 773 } 774 775 /* backward solve the upper triangular */ 776 for (i=n-1; i>=0; i--){ 777 v = aa + a->diag[i] + 1; 778 vi = aj + a->diag[i] + 1; 779 nz = ai[i+1] - a->diag[i] - 1; 780 sum = tmp[i]; 781 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 782 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 783 } 784 785 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 786 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 787 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 788 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 789 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 790 PetscFunctionReturn(0); 791 } 792 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatMatSolve_SeqAIJ" 795 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 796 { 797 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 798 IS iscol = a->col,isrow = a->row; 799 PetscErrorCode ierr; 800 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 801 PetscInt nz,*rout,*cout,neq; 802 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 803 804 PetscFunctionBegin; 805 if (!n) PetscFunctionReturn(0); 806 807 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 808 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 809 810 tmp = a->solve_work; 811 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 812 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 813 814 for (neq=0; neq<n; neq++){ 815 /* forward solve the lower triangular */ 816 tmp[0] = b[r[0]]; 817 tmps = tmp; 818 for (i=1; i<n; i++) { 819 v = aa + ai[i] ; 820 vi = aj + ai[i] ; 821 nz = a->diag[i] - ai[i]; 822 sum = b[r[i]]; 823 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 824 tmp[i] = sum; 825 } 826 /* backward solve the upper triangular */ 827 for (i=n-1; i>=0; i--){ 828 v = aa + a->diag[i] + 1; 829 vi = aj + a->diag[i] + 1; 830 nz = ai[i+1] - a->diag[i] - 1; 831 sum = tmp[i]; 832 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 833 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 834 } 835 836 b += n; 837 x += n; 838 } 839 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 840 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 841 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 842 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 843 ierr = PetscLogFlops(n*(2*a->nz - n));CHKERRQ(ierr); 844 PetscFunctionReturn(0); 845 } 846 847 #undef __FUNCT__ 848 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 849 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 850 { 851 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 852 IS iscol = a->col,isrow = a->row; 853 PetscErrorCode ierr; 854 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 855 PetscInt nz,*rout,*cout,row; 856 PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v; 857 858 PetscFunctionBegin; 859 if (!n) PetscFunctionReturn(0); 860 861 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 862 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 863 tmp = a->solve_work; 864 865 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 866 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 867 868 /* forward solve the lower triangular */ 869 tmp[0] = b[*r++]; 870 tmps = tmp; 871 for (row=1; row<n; row++) { 872 i = rout[row]; /* permuted row */ 873 v = aa + ai[i] ; 874 vi = aj + ai[i] ; 875 nz = a->diag[i] - ai[i]; 876 sum = b[*r++]; 877 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 878 tmp[row] = sum; 879 } 880 881 /* backward solve the upper triangular */ 882 for (row=n-1; row>=0; row--){ 883 i = rout[row]; /* permuted row */ 884 v = aa + a->diag[i] + 1; 885 vi = aj + a->diag[i] + 1; 886 nz = ai[i+1] - a->diag[i] - 1; 887 sum = tmp[row]; 888 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 889 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 890 } 891 892 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 893 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 894 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 895 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 896 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 897 PetscFunctionReturn(0); 898 } 899 900 /* ----------------------------------------------------------- */ 901 #undef __FUNCT__ 902 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 903 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 904 { 905 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 906 PetscErrorCode ierr; 907 PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag; 908 PetscScalar *x,*b,*aa = a->a; 909 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 910 PetscInt adiag_i,i,*vi,nz,ai_i; 911 PetscScalar *v,sum; 912 #endif 913 914 PetscFunctionBegin; 915 if (!n) PetscFunctionReturn(0); 916 917 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 918 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 919 920 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 921 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 922 #else 923 /* forward solve the lower triangular */ 924 x[0] = b[0]; 925 for (i=1; i<n; i++) { 926 ai_i = ai[i]; 927 v = aa + ai_i; 928 vi = aj + ai_i; 929 nz = adiag[i] - ai_i; 930 sum = b[i]; 931 while (nz--) sum -= *v++ * x[*vi++]; 932 x[i] = sum; 933 } 934 935 /* backward solve the upper triangular */ 936 for (i=n-1; i>=0; i--){ 937 adiag_i = adiag[i]; 938 v = aa + adiag_i + 1; 939 vi = aj + adiag_i + 1; 940 nz = ai[i+1] - adiag_i - 1; 941 sum = x[i]; 942 while (nz--) sum -= *v++ * x[*vi++]; 943 x[i] = sum*aa[adiag_i]; 944 } 945 #endif 946 ierr = PetscLogFlops(2*a->nz - A->cmap.n);CHKERRQ(ierr); 947 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 948 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 #undef __FUNCT__ 953 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 954 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 955 { 956 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 957 IS iscol = a->col,isrow = a->row; 958 PetscErrorCode ierr; 959 PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 960 PetscInt nz,*rout,*cout; 961 PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v; 962 963 PetscFunctionBegin; 964 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 965 966 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 967 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 968 tmp = a->solve_work; 969 970 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 971 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 972 973 /* forward solve the lower triangular */ 974 tmp[0] = b[*r++]; 975 for (i=1; i<n; i++) { 976 v = aa + ai[i] ; 977 vi = aj + ai[i] ; 978 nz = a->diag[i] - ai[i]; 979 sum = b[*r++]; 980 while (nz--) sum -= *v++ * tmp[*vi++ ]; 981 tmp[i] = sum; 982 } 983 984 /* backward solve the upper triangular */ 985 for (i=n-1; i>=0; i--){ 986 v = aa + a->diag[i] + 1; 987 vi = aj + a->diag[i] + 1; 988 nz = ai[i+1] - a->diag[i] - 1; 989 sum = tmp[i]; 990 while (nz--) sum -= *v++ * tmp[*vi++ ]; 991 tmp[i] = sum*aa[a->diag[i]]; 992 x[*c--] += tmp[i]; 993 } 994 995 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 996 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 997 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 998 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 999 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1000 1001 PetscFunctionReturn(0); 1002 } 1003 /* -------------------------------------------------------------------*/ 1004 #undef __FUNCT__ 1005 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1006 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1007 { 1008 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1009 IS iscol = a->col,isrow = a->row; 1010 PetscErrorCode ierr; 1011 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1012 PetscInt nz,*rout,*cout,*diag = a->diag; 1013 PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1; 1014 1015 PetscFunctionBegin; 1016 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1017 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1018 tmp = a->solve_work; 1019 1020 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1021 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1022 1023 /* copy the b into temp work space according to permutation */ 1024 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1025 1026 /* forward solve the U^T */ 1027 for (i=0; i<n; i++) { 1028 v = aa + diag[i] ; 1029 vi = aj + diag[i] + 1; 1030 nz = ai[i+1] - diag[i] - 1; 1031 s1 = tmp[i]; 1032 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1033 while (nz--) { 1034 tmp[*vi++ ] -= (*v++)*s1; 1035 } 1036 tmp[i] = s1; 1037 } 1038 1039 /* backward solve the L^T */ 1040 for (i=n-1; i>=0; i--){ 1041 v = aa + diag[i] - 1 ; 1042 vi = aj + diag[i] - 1 ; 1043 nz = diag[i] - ai[i]; 1044 s1 = tmp[i]; 1045 while (nz--) { 1046 tmp[*vi-- ] -= (*v--)*s1; 1047 } 1048 } 1049 1050 /* copy tmp into x according to permutation */ 1051 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1052 1053 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1054 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1055 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1056 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1057 1058 ierr = PetscLogFlops(2*a->nz-A->cmap.n);CHKERRQ(ierr); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 #undef __FUNCT__ 1063 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1064 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1065 { 1066 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1067 IS iscol = a->col,isrow = a->row; 1068 PetscErrorCode ierr; 1069 PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j; 1070 PetscInt nz,*rout,*cout,*diag = a->diag; 1071 PetscScalar *x,*b,*tmp,*aa = a->a,*v; 1072 1073 PetscFunctionBegin; 1074 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1075 1076 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1077 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1078 tmp = a->solve_work; 1079 1080 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1081 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1082 1083 /* copy the b into temp work space according to permutation */ 1084 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1085 1086 /* forward solve the U^T */ 1087 for (i=0; i<n; i++) { 1088 v = aa + diag[i] ; 1089 vi = aj + diag[i] + 1; 1090 nz = ai[i+1] - diag[i] - 1; 1091 tmp[i] *= *v++; 1092 while (nz--) { 1093 tmp[*vi++ ] -= (*v++)*tmp[i]; 1094 } 1095 } 1096 1097 /* backward solve the L^T */ 1098 for (i=n-1; i>=0; i--){ 1099 v = aa + diag[i] - 1 ; 1100 vi = aj + diag[i] - 1 ; 1101 nz = diag[i] - ai[i]; 1102 while (nz--) { 1103 tmp[*vi-- ] -= (*v--)*tmp[i]; 1104 } 1105 } 1106 1107 /* copy tmp into x according to permutation */ 1108 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1109 1110 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1111 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1112 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1113 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1114 1115 ierr = PetscLogFlops(2*a->nz);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 /* ----------------------------------------------------------------*/ 1119 1120 #undef __FUNCT__ 1121 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1122 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact) 1123 { 1124 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1125 IS isicol; 1126 PetscErrorCode ierr; 1127 PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j,d; 1128 PetscInt *bi,*cols,nnz,*cols_lvl; 1129 PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0; 1130 PetscInt i,levels,diagonal_fill; 1131 PetscTruth col_identity,row_identity; 1132 PetscReal f; 1133 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1134 PetscBT lnkbt; 1135 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1136 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1137 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1138 PetscTruth missing; 1139 1140 PetscFunctionBegin; 1141 f = info->fill; 1142 levels = (PetscInt)info->levels; 1143 diagonal_fill = (PetscInt)info->diagonal_fill; 1144 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1145 1146 /* special case that simply copies fill pattern */ 1147 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1148 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1149 if (!levels && row_identity && col_identity) { 1150 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 1151 (*fact)->factor = FACTOR_LU; 1152 (*fact)->info.factor_mallocs = 0; 1153 (*fact)->info.fill_ratio_given = info->fill; 1154 (*fact)->info.fill_ratio_needed = 1.0; 1155 b = (Mat_SeqAIJ*)(*fact)->data; 1156 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1157 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1158 b->row = isrow; 1159 b->col = iscol; 1160 b->icol = isicol; 1161 ierr = PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1162 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 1163 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1164 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1169 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1170 1171 /* get new row pointers */ 1172 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1173 bi[0] = 0; 1174 /* bdiag is location of diagonal in factor */ 1175 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1176 bdiag[0] = 0; 1177 1178 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1179 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1180 1181 /* create a linked list for storing column indices of the active row */ 1182 nlnk = n + 1; 1183 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1184 1185 /* initial FreeSpace size is f*(ai[n]+1) */ 1186 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1187 current_space = free_space; 1188 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1189 current_space_lvl = free_space_lvl; 1190 1191 for (i=0; i<n; i++) { 1192 nzi = 0; 1193 /* copy current row into linked list */ 1194 nnz = ai[r[i]+1] - ai[r[i]]; 1195 if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix"); 1196 cols = aj + ai[r[i]]; 1197 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1198 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1199 nzi += nlnk; 1200 1201 /* make sure diagonal entry is included */ 1202 if (diagonal_fill && lnk[i] == -1) { 1203 fm = n; 1204 while (lnk[fm] < i) fm = lnk[fm]; 1205 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1206 lnk[fm] = i; 1207 lnk_lvl[i] = 0; 1208 nzi++; dcount++; 1209 } 1210 1211 /* add pivot rows into the active row */ 1212 nzbd = 0; 1213 prow = lnk[n]; 1214 while (prow < i) { 1215 nnz = bdiag[prow]; 1216 cols = bj_ptr[prow] + nnz + 1; 1217 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1218 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1219 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1220 nzi += nlnk; 1221 prow = lnk[prow]; 1222 nzbd++; 1223 } 1224 bdiag[i] = nzbd; 1225 bi[i+1] = bi[i] + nzi; 1226 1227 /* if free space is not available, make more free space */ 1228 if (current_space->local_remaining<nzi) { 1229 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1230 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1231 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1232 reallocs++; 1233 } 1234 1235 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1236 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1237 bj_ptr[i] = current_space->array; 1238 bjlvl_ptr[i] = current_space_lvl->array; 1239 1240 /* make sure the active row i has diagonal entry */ 1241 if (*(bj_ptr[i]+bdiag[i]) != i) { 1242 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1243 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1244 } 1245 1246 current_space->array += nzi; 1247 current_space->local_used += nzi; 1248 current_space->local_remaining -= nzi; 1249 current_space_lvl->array += nzi; 1250 current_space_lvl->local_used += nzi; 1251 current_space_lvl->local_remaining -= nzi; 1252 } 1253 1254 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1255 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1256 1257 /* destroy list of free space and other temporary arrays */ 1258 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1259 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1260 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1261 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1262 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1263 1264 #if defined(PETSC_USE_INFO) 1265 { 1266 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1267 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1268 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1269 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1270 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1271 if (diagonal_fill) { 1272 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1273 } 1274 } 1275 #endif 1276 1277 /* put together the new matrix */ 1278 ierr = MatCreate(((PetscObject)A)->comm,fact);CHKERRQ(ierr); 1279 ierr = MatSetSizes(*fact,n,n,n,n);CHKERRQ(ierr); 1280 ierr = MatSetType(*fact,((PetscObject)A)->type_name);CHKERRQ(ierr); 1281 ierr = MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1282 ierr = PetscLogObjectParent(*fact,isicol);CHKERRQ(ierr); 1283 b = (Mat_SeqAIJ*)(*fact)->data; 1284 b->free_a = PETSC_TRUE; 1285 b->free_ij = PETSC_TRUE; 1286 b->singlemalloc = PETSC_FALSE; 1287 len = (bi[n] )*sizeof(PetscScalar); 1288 ierr = PetscMalloc(len+1,&b->a);CHKERRQ(ierr); 1289 b->j = bj; 1290 b->i = bi; 1291 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1292 b->diag = bdiag; 1293 b->ilen = 0; 1294 b->imax = 0; 1295 b->row = isrow; 1296 b->col = iscol; 1297 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1298 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1299 b->icol = isicol; 1300 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1301 /* In b structure: Free imax, ilen, old a, old j. 1302 Allocate bdiag, solve_work, new a, new j */ 1303 ierr = PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1304 b->maxnz = b->nz = bi[n] ; 1305 (*fact)->factor = FACTOR_LU; 1306 (*fact)->info.factor_mallocs = reallocs; 1307 (*fact)->info.fill_ratio_given = f; 1308 (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1309 1310 ierr = MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);CHKERRQ(ierr); 1311 (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */ 1312 1313 PetscFunctionReturn(0); 1314 } 1315 1316 #include "src/mat/impls/sbaij/seq/sbaij.h" 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1319 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B) 1320 { 1321 Mat C = *B; 1322 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1323 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1324 IS ip=b->row,iip = b->icol; 1325 PetscErrorCode ierr; 1326 PetscInt *rip,*riip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol; 1327 PetscInt *ai=a->i,*aj=a->j; 1328 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1329 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1330 PetscReal zeropivot,rs,shiftnz; 1331 PetscReal shiftpd; 1332 ChShift_Ctx sctx; 1333 PetscInt newshift; 1334 1335 PetscFunctionBegin; 1336 1337 shiftnz = info->shiftnz; 1338 shiftpd = info->shiftpd; 1339 zeropivot = info->zeropivot; 1340 1341 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1342 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1343 1344 /* initialization */ 1345 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1346 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1347 jl = il + mbs; 1348 rtmp = (MatScalar*)(jl + mbs); 1349 1350 sctx.shift_amount = 0; 1351 sctx.nshift = 0; 1352 do { 1353 sctx.chshift = PETSC_FALSE; 1354 for (i=0; i<mbs; i++) { 1355 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1356 } 1357 1358 for (k = 0; k<mbs; k++){ 1359 bval = ba + bi[k]; 1360 /* initialize k-th row by the perm[k]-th row of A */ 1361 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1362 for (j = jmin; j < jmax; j++){ 1363 col = riip[aj[j]]; 1364 if (col >= k){ /* only take upper triangular entry */ 1365 rtmp[col] = aa[j]; 1366 *bval++ = 0.0; /* for in-place factorization */ 1367 } 1368 } 1369 /* shift the diagonal of the matrix */ 1370 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1371 1372 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1373 dk = rtmp[k]; 1374 i = jl[k]; /* first row to be added to k_th row */ 1375 1376 while (i < k){ 1377 nexti = jl[i]; /* next row to be added to k_th row */ 1378 1379 /* compute multiplier, update diag(k) and U(i,k) */ 1380 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1381 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1382 dk += uikdi*ba[ili]; 1383 ba[ili] = uikdi; /* -U(i,k) */ 1384 1385 /* add multiple of row i to k-th row */ 1386 jmin = ili + 1; jmax = bi[i+1]; 1387 if (jmin < jmax){ 1388 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1389 /* update il and jl for row i */ 1390 il[i] = jmin; 1391 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1392 } 1393 i = nexti; 1394 } 1395 1396 /* shift the diagonals when zero pivot is detected */ 1397 /* compute rs=sum of abs(off-diagonal) */ 1398 rs = 0.0; 1399 jmin = bi[k]+1; 1400 nz = bi[k+1] - jmin; 1401 bcol = bj + jmin; 1402 while (nz--){ 1403 rs += PetscAbsScalar(rtmp[*bcol]); 1404 bcol++; 1405 } 1406 1407 sctx.rs = rs; 1408 sctx.pv = dk; 1409 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1410 1411 if (newshift == 1) { 1412 if (!sctx.shift_amount) { 1413 sctx.shift_amount = 1e-5; 1414 } 1415 break; 1416 } 1417 1418 /* copy data into U(k,:) */ 1419 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1420 jmin = bi[k]+1; jmax = bi[k+1]; 1421 if (jmin < jmax) { 1422 for (j=jmin; j<jmax; j++){ 1423 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1424 } 1425 /* add the k-th row into il and jl */ 1426 il[k] = jmin; 1427 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1428 } 1429 } 1430 } while (sctx.chshift); 1431 ierr = PetscFree(il);CHKERRQ(ierr); 1432 1433 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1434 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1435 C->factor = FACTOR_CHOLESKY; 1436 C->assembled = PETSC_TRUE; 1437 C->preallocated = PETSC_TRUE; 1438 ierr = PetscLogFlops(C->rmap.n);CHKERRQ(ierr); 1439 if (sctx.nshift){ 1440 if (shiftnz) { 1441 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1442 } else if (shiftpd) { 1443 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1444 } 1445 } 1446 PetscFunctionReturn(0); 1447 } 1448 1449 #undef __FUNCT__ 1450 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1451 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1452 { 1453 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1454 Mat_SeqSBAIJ *b; 1455 Mat B; 1456 PetscErrorCode ierr; 1457 PetscTruth perm_identity,missing; 1458 PetscInt reallocs=0,*rip,*riip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui; 1459 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1460 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1461 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1462 PetscReal fill=info->fill,levels=info->levels; 1463 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1464 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1465 PetscBT lnkbt; 1466 IS iperm; 1467 1468 PetscFunctionBegin; 1469 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1470 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1471 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1472 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1473 1474 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1475 ui[0] = 0; 1476 1477 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1478 if (!levels && perm_identity) { 1479 1480 for (i=0; i<am; i++) { 1481 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1482 } 1483 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1484 cols = uj; 1485 for (i=0; i<am; i++) { 1486 aj = a->j + a->diag[i]; 1487 ncols = ui[i+1] - ui[i]; 1488 for (j=0; j<ncols; j++) *cols++ = *aj++; 1489 } 1490 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1491 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1492 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1493 1494 /* initialization */ 1495 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1496 1497 /* jl: linked list for storing indices of the pivot rows 1498 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1499 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1500 il = jl + am; 1501 uj_ptr = (PetscInt**)(il + am); 1502 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1503 for (i=0; i<am; i++){ 1504 jl[i] = am; il[i] = 0; 1505 } 1506 1507 /* create and initialize a linked list for storing column indices of the active row k */ 1508 nlnk = am + 1; 1509 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1510 1511 /* initial FreeSpace size is fill*(ai[am]+1) */ 1512 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1513 current_space = free_space; 1514 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1515 current_space_lvl = free_space_lvl; 1516 1517 for (k=0; k<am; k++){ /* for each active row k */ 1518 /* initialize lnk by the column indices of row rip[k] of A */ 1519 nzk = 0; 1520 ncols = ai[rip[k]+1] - ai[rip[k]]; 1521 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1522 ncols_upper = 0; 1523 for (j=0; j<ncols; j++){ 1524 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1525 if (riip[i] >= k){ /* only take upper triangular entry */ 1526 ajtmp[ncols_upper] = i; 1527 ncols_upper++; 1528 } 1529 } 1530 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1531 nzk += nlnk; 1532 1533 /* update lnk by computing fill-in for each pivot row to be merged in */ 1534 prow = jl[k]; /* 1st pivot row */ 1535 1536 while (prow < k){ 1537 nextprow = jl[prow]; 1538 1539 /* merge prow into k-th row */ 1540 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1541 jmax = ui[prow+1]; 1542 ncols = jmax-jmin; 1543 i = jmin - ui[prow]; 1544 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1545 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1546 j = *(uj - 1); 1547 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1548 nzk += nlnk; 1549 1550 /* update il and jl for prow */ 1551 if (jmin < jmax){ 1552 il[prow] = jmin; 1553 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1554 } 1555 prow = nextprow; 1556 } 1557 1558 /* if free space is not available, make more free space */ 1559 if (current_space->local_remaining<nzk) { 1560 i = am - k + 1; /* num of unfactored rows */ 1561 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1562 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1563 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1564 reallocs++; 1565 } 1566 1567 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1568 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1569 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1570 1571 /* add the k-th row into il and jl */ 1572 if (nzk > 1){ 1573 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1574 jl[k] = jl[i]; jl[i] = k; 1575 il[k] = ui[k] + 1; 1576 } 1577 uj_ptr[k] = current_space->array; 1578 uj_lvl_ptr[k] = current_space_lvl->array; 1579 1580 current_space->array += nzk; 1581 current_space->local_used += nzk; 1582 current_space->local_remaining -= nzk; 1583 1584 current_space_lvl->array += nzk; 1585 current_space_lvl->local_used += nzk; 1586 current_space_lvl->local_remaining -= nzk; 1587 1588 ui[k+1] = ui[k] + nzk; 1589 } 1590 1591 #if defined(PETSC_USE_INFO) 1592 if (ai[am] != 0) { 1593 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1594 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1595 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1596 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1597 } else { 1598 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1599 } 1600 #endif 1601 1602 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1603 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1604 ierr = PetscFree(jl);CHKERRQ(ierr); 1605 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1606 1607 /* destroy list of free space and other temporary array(s) */ 1608 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1609 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1610 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1611 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1612 1613 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1614 1615 /* put together the new matrix in MATSEQSBAIJ format */ 1616 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1617 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1618 B = *fact; 1619 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1620 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1621 1622 b = (Mat_SeqSBAIJ*)B->data; 1623 b->singlemalloc = PETSC_FALSE; 1624 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1625 b->j = uj; 1626 b->i = ui; 1627 b->diag = 0; 1628 b->ilen = 0; 1629 b->imax = 0; 1630 b->row = perm; 1631 b->col = perm; 1632 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1633 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1634 b->icol = iperm; 1635 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1636 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1637 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1638 b->maxnz = b->nz = ui[am]; 1639 b->free_a = PETSC_TRUE; 1640 b->free_ij = PETSC_TRUE; 1641 1642 B->factor = FACTOR_CHOLESKY; 1643 B->info.factor_mallocs = reallocs; 1644 B->info.fill_ratio_given = fill; 1645 if (ai[am] != 0) { 1646 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1647 } else { 1648 B->info.fill_ratio_needed = 0.0; 1649 } 1650 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1651 if (perm_identity){ 1652 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1653 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1654 } 1655 PetscFunctionReturn(0); 1656 } 1657 1658 #undef __FUNCT__ 1659 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1660 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact) 1661 { 1662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1663 Mat_SeqSBAIJ *b; 1664 Mat B; 1665 PetscErrorCode ierr; 1666 PetscTruth perm_identity; 1667 PetscReal fill = info->fill; 1668 PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1669 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1670 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1671 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1672 PetscBT lnkbt; 1673 IS iperm; 1674 1675 PetscFunctionBegin; 1676 /* check whether perm is the identity mapping */ 1677 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1678 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1679 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1680 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1681 1682 /* initialization */ 1683 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1684 ui[0] = 0; 1685 1686 /* jl: linked list for storing indices of the pivot rows 1687 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1688 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1689 il = jl + am; 1690 cols = il + am; 1691 ui_ptr = (PetscInt**)(cols + am); 1692 for (i=0; i<am; i++){ 1693 jl[i] = am; il[i] = 0; 1694 } 1695 1696 /* create and initialize a linked list for storing column indices of the active row k */ 1697 nlnk = am + 1; 1698 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1699 1700 /* initial FreeSpace size is fill*(ai[am]+1) */ 1701 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1702 current_space = free_space; 1703 1704 for (k=0; k<am; k++){ /* for each active row k */ 1705 /* initialize lnk by the column indices of row rip[k] of A */ 1706 nzk = 0; 1707 ncols = ai[rip[k]+1] - ai[rip[k]]; 1708 if (!ncols) SETERRQ(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix"); 1709 ncols_upper = 0; 1710 for (j=0; j<ncols; j++){ 1711 i = riip[*(aj + ai[rip[k]] + j)]; 1712 if (i >= k){ /* only take upper triangular entry */ 1713 cols[ncols_upper] = i; 1714 ncols_upper++; 1715 } 1716 } 1717 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1718 nzk += nlnk; 1719 1720 /* update lnk by computing fill-in for each pivot row to be merged in */ 1721 prow = jl[k]; /* 1st pivot row */ 1722 1723 while (prow < k){ 1724 nextprow = jl[prow]; 1725 /* merge prow into k-th row */ 1726 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1727 jmax = ui[prow+1]; 1728 ncols = jmax-jmin; 1729 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1730 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1731 nzk += nlnk; 1732 1733 /* update il and jl for prow */ 1734 if (jmin < jmax){ 1735 il[prow] = jmin; 1736 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1737 } 1738 prow = nextprow; 1739 } 1740 1741 /* if free space is not available, make more free space */ 1742 if (current_space->local_remaining<nzk) { 1743 i = am - k + 1; /* num of unfactored rows */ 1744 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1745 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1746 reallocs++; 1747 } 1748 1749 /* copy data into free space, then initialize lnk */ 1750 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1751 1752 /* add the k-th row into il and jl */ 1753 if (nzk-1 > 0){ 1754 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1755 jl[k] = jl[i]; jl[i] = k; 1756 il[k] = ui[k] + 1; 1757 } 1758 ui_ptr[k] = current_space->array; 1759 current_space->array += nzk; 1760 current_space->local_used += nzk; 1761 current_space->local_remaining -= nzk; 1762 1763 ui[k+1] = ui[k] + nzk; 1764 } 1765 1766 #if defined(PETSC_USE_INFO) 1767 if (ai[am] != 0) { 1768 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1769 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1770 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1771 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1772 } else { 1773 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1774 } 1775 #endif 1776 1777 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1778 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1779 ierr = PetscFree(jl);CHKERRQ(ierr); 1780 1781 /* destroy list of free space and other temporary array(s) */ 1782 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1783 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1784 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1785 1786 /* put together the new matrix in MATSEQSBAIJ format */ 1787 ierr = MatCreate(PETSC_COMM_SELF,fact);CHKERRQ(ierr); 1788 ierr = MatSetSizes(*fact,am,am,am,am);CHKERRQ(ierr); 1789 B = *fact; 1790 ierr = MatSetType(B,MATSEQSBAIJ);CHKERRQ(ierr); 1791 ierr = MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1792 1793 b = (Mat_SeqSBAIJ*)B->data; 1794 b->singlemalloc = PETSC_FALSE; 1795 b->free_a = PETSC_TRUE; 1796 b->free_ij = PETSC_TRUE; 1797 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1798 b->j = uj; 1799 b->i = ui; 1800 b->diag = 0; 1801 b->ilen = 0; 1802 b->imax = 0; 1803 b->row = perm; 1804 b->col = perm; 1805 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1806 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1807 b->icol = iperm; 1808 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1809 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1810 ierr = PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1811 b->maxnz = b->nz = ui[am]; 1812 1813 B->factor = FACTOR_CHOLESKY; 1814 B->info.factor_mallocs = reallocs; 1815 B->info.fill_ratio_given = fill; 1816 if (ai[am] != 0) { 1817 B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1818 } else { 1819 B->info.fill_ratio_needed = 0.0; 1820 } 1821 (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1822 if (perm_identity){ 1823 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1824 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1825 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1826 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1827 } else { 1828 (*fact)->ops->solve = MatSolve_SeqSBAIJ_1; 1829 (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1830 (*fact)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1831 (*fact)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1832 } 1833 PetscFunctionReturn(0); 1834 } 1835