1 #define PETSCMAT_DLL 2 3 #include "../src/mat/impls/aij/seq/aij.h" 4 #include "../src/inline/dot.h" 5 #include "../src/inline/spops.h" 6 #include "petscbt.h" 7 #include "../src/mat/utils/freespace.h" 8 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 12 { 13 PetscFunctionBegin; 14 15 SETERRQ(PETSC_ERR_SUP,"Code not written"); 16 #if !defined(PETSC_USE_DEBUG) 17 PetscFunctionReturn(0); 18 #endif 19 } 20 21 EXTERN_C_BEGIN 22 #undef __FUNCT__ 23 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 24 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 25 { 26 PetscFunctionBegin; 27 *flg = PETSC_TRUE; 28 PetscFunctionReturn(0); 29 } 30 EXTERN_C_END 31 32 EXTERN_C_BEGIN 33 #undef __FUNCT__ 34 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 35 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 36 { 37 PetscInt n = A->rmap->n; 38 PetscErrorCode ierr; 39 40 PetscFunctionBegin; 41 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 42 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 43 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 44 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 45 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 46 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 47 (*B)->ops->iludtfactor = MatILUDTFactor_SeqAIJ; 48 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 49 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 50 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 51 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 52 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 53 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 54 (*B)->factor = ftype; 55 PetscFunctionReturn(0); 56 } 57 EXTERN_C_END 58 59 #undef __FUNCT__ 60 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 61 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 62 { 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 64 IS isicol; 65 PetscErrorCode ierr; 66 const PetscInt *r,*ic; 67 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 68 PetscInt *bi,*bj,*ajtmp; 69 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 70 PetscReal f; 71 PetscInt nlnk,*lnk,k,**bi_ptr; 72 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 73 PetscBT lnkbt; 74 75 PetscFunctionBegin; 76 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 77 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 78 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 79 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 80 81 /* get new row pointers */ 82 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 83 bi[0] = 0; 84 85 /* bdiag is location of diagonal in factor */ 86 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 87 bdiag[0] = 0; 88 89 /* linked list for storing column indices of the active row */ 90 nlnk = n + 1; 91 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 92 93 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 94 95 /* initial FreeSpace size is f*(ai[n]+1) */ 96 f = info->fill; 97 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 98 current_space = free_space; 99 100 for (i=0; i<n; i++) { 101 /* copy previous fill into linked list */ 102 nzi = 0; 103 nnz = ai[r[i]+1] - ai[r[i]]; 104 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 105 ajtmp = aj + ai[r[i]]; 106 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 107 nzi += nlnk; 108 109 /* add pivot rows into linked list */ 110 row = lnk[n]; 111 while (row < i) { 112 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 113 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 114 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 115 nzi += nlnk; 116 row = lnk[row]; 117 } 118 bi[i+1] = bi[i] + nzi; 119 im[i] = nzi; 120 121 /* mark bdiag */ 122 nzbd = 0; 123 nnz = nzi; 124 k = lnk[n]; 125 while (nnz-- && k < i){ 126 nzbd++; 127 k = lnk[k]; 128 } 129 bdiag[i] = bi[i] + nzbd; 130 131 /* if free space is not available, make more free space */ 132 if (current_space->local_remaining<nzi) { 133 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 134 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 135 reallocs++; 136 } 137 138 /* copy data into free space, then initialize lnk */ 139 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 140 bi_ptr[i] = current_space->array; 141 current_space->array += nzi; 142 current_space->local_used += nzi; 143 current_space->local_remaining -= nzi; 144 } 145 #if defined(PETSC_USE_INFO) 146 if (ai[n] != 0) { 147 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 148 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 149 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 150 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 151 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 152 } else { 153 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 154 } 155 #endif 156 157 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 158 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 159 160 /* destroy list of free space and other temporary array(s) */ 161 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 162 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 163 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 164 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 165 166 /* put together the new matrix */ 167 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 168 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 169 b = (Mat_SeqAIJ*)(B)->data; 170 b->free_a = PETSC_TRUE; 171 b->free_ij = PETSC_TRUE; 172 b->singlemalloc = PETSC_FALSE; 173 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 174 b->j = bj; 175 b->i = bi; 176 b->diag = bdiag; 177 b->ilen = 0; 178 b->imax = 0; 179 b->row = isrow; 180 b->col = iscol; 181 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 182 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 183 b->icol = isicol; 184 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 185 186 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 187 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 188 b->maxnz = b->nz = bi[n] ; 189 190 (B)->factor = MAT_FACTOR_LU; 191 (B)->info.factor_mallocs = reallocs; 192 (B)->info.fill_ratio_given = f; 193 194 if (ai[n]) { 195 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 196 } else { 197 (B)->info.fill_ratio_needed = 0.0; 198 } 199 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 200 (B)->ops->solve = MatSolve_SeqAIJ; 201 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 202 /* switch to inodes if appropriate */ 203 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 204 PetscFunctionReturn(0); 205 } 206 207 /* 208 Trouble in factorization, should we dump the original matrix? 209 */ 210 #undef __FUNCT__ 211 #define __FUNCT__ "MatFactorDumpMatrix" 212 PetscErrorCode MatFactorDumpMatrix(Mat A) 213 { 214 PetscErrorCode ierr; 215 PetscTruth flg = PETSC_FALSE; 216 217 PetscFunctionBegin; 218 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 219 if (flg) { 220 PetscViewer viewer; 221 char filename[PETSC_MAX_PATH_LEN]; 222 223 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 224 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 225 ierr = MatView(A,viewer);CHKERRQ(ierr); 226 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 227 } 228 PetscFunctionReturn(0); 229 } 230 231 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 232 233 /* ----------------------------------------------------------- */ 234 #undef __FUNCT__ 235 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 236 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 237 { 238 Mat C=B; 239 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 240 IS isrow = b->row,isicol = b->icol; 241 PetscErrorCode ierr; 242 const PetscInt *r,*ic,*ics; 243 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 244 PetscInt *ajtmp,*bjtmp,nz,row,*diag_offset = b->diag,diag,*pj; 245 MatScalar *rtmp,*pc,multiplier,*v,*pv,d,*aa=a->a; 246 PetscReal rs=0.0; 247 LUShift_Ctx sctx; 248 PetscInt newshift,*ddiag; 249 250 PetscFunctionBegin; 251 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 252 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 253 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 254 ics = ic; 255 256 sctx.shift_top = 0; 257 sctx.nshift_max = 0; 258 sctx.shift_lo = 0; 259 sctx.shift_hi = 0; 260 sctx.shift_fraction = 0; 261 262 /* if both shift schemes are chosen by user, only use info->shiftpd */ 263 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 264 ddiag = a->diag; 265 sctx.shift_top = info->zeropivot; 266 for (i=0; i<n; i++) { 267 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 268 d = (aa)[ddiag[i]]; 269 rs = -PetscAbsScalar(d) - PetscRealPart(d); 270 v = aa+ai[i]; 271 nz = ai[i+1] - ai[i]; 272 for (j=0; j<nz; j++) 273 rs += PetscAbsScalar(v[j]); 274 if (rs>sctx.shift_top) sctx.shift_top = rs; 275 } 276 sctx.shift_top *= 1.1; 277 sctx.nshift_max = 5; 278 sctx.shift_lo = 0.; 279 sctx.shift_hi = 1.; 280 } 281 282 sctx.shift_amount = 0.0; 283 sctx.nshift = 0; 284 do { 285 sctx.lushift = PETSC_FALSE; 286 for (i=0; i<n; i++){ 287 nz = bi[i+1] - bi[i]; 288 bjtmp = bj + bi[i]; 289 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 290 291 /* load in initial (unfactored row) */ 292 nz = ai[r[i]+1] - ai[r[i]]; 293 ajtmp = aj + ai[r[i]]; 294 v = aa + ai[r[i]]; 295 for (j=0; j<nz; j++) { 296 rtmp[ics[ajtmp[j]]] = v[j]; 297 } 298 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 299 300 row = *bjtmp++; 301 while (row < i) { 302 pc = rtmp + row; 303 if (*pc != 0.0) { 304 pv = b->a + diag_offset[row]; 305 pj = b->j + diag_offset[row] + 1; 306 multiplier = *pc / *pv++; 307 *pc = multiplier; 308 nz = bi[row+1] - diag_offset[row] - 1; 309 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 310 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 311 } 312 row = *bjtmp++; 313 } 314 /* finished row so stick it into b->a */ 315 pv = b->a + bi[i] ; 316 pj = b->j + bi[i] ; 317 nz = bi[i+1] - bi[i]; 318 diag = diag_offset[i] - bi[i]; 319 rs = -PetscAbsScalar(pv[diag]); 320 for (j=0; j<nz; j++) { 321 pv[j] = rtmp[pj[j]]; 322 rs += PetscAbsScalar(pv[j]); 323 } 324 325 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 326 sctx.rs = rs; 327 sctx.pv = pv[diag]; 328 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 329 if (newshift == 1) break; 330 } 331 332 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 333 /* 334 * if no shift in this attempt & shifting & started shifting & can refine, 335 * then try lower shift 336 */ 337 sctx.shift_hi = sctx.shift_fraction; 338 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 339 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 340 sctx.lushift = PETSC_TRUE; 341 sctx.nshift++; 342 } 343 } while (sctx.lushift); 344 345 /* invert diagonal entries for simplier triangular solves */ 346 for (i=0; i<n; i++) { 347 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 348 } 349 ierr = PetscFree(rtmp);CHKERRQ(ierr); 350 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 351 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 352 if (b->inode.use) { 353 C->ops->solve = MatSolve_Inode; 354 } else { 355 PetscTruth row_identity, col_identity; 356 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 357 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 358 if (row_identity && col_identity) { 359 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 360 } else { 361 C->ops->solve = MatSolve_SeqAIJ; 362 } 363 } 364 C->ops->solveadd = MatSolveAdd_SeqAIJ; 365 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 366 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 367 C->ops->matsolve = MatMatSolve_SeqAIJ; 368 C->assembled = PETSC_TRUE; 369 C->preallocated = PETSC_TRUE; 370 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 371 if (sctx.nshift){ 372 if (info->shiftpd) { 373 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 374 } else if (info->shiftnz) { 375 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 376 } 377 } 378 PetscFunctionReturn(0); 379 } 380 381 /* 382 This routine implements inplace ILU(0) with row or/and column permutations. 383 Input: 384 A - original matrix 385 Output; 386 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 387 a->j (col index) is permuted by the inverse of colperm, then sorted 388 a->a reordered accordingly with a->j 389 a->diag (ptr to diagonal elements) is updated. 390 */ 391 #undef __FUNCT__ 392 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 393 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 394 { 395 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 396 IS isrow = a->row,isicol = a->icol; 397 PetscErrorCode ierr; 398 const PetscInt *r,*ic,*ics; 399 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 400 PetscInt *ajtmp,nz,row; 401 PetscInt *diag = a->diag,nbdiag,*pj; 402 PetscScalar *rtmp,*pc,multiplier,d; 403 MatScalar *v,*pv; 404 PetscReal rs; 405 LUShift_Ctx sctx; 406 PetscInt newshift; 407 408 PetscFunctionBegin; 409 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 410 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 411 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 412 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 413 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 414 ics = ic; 415 416 sctx.shift_top = 0; 417 sctx.nshift_max = 0; 418 sctx.shift_lo = 0; 419 sctx.shift_hi = 0; 420 sctx.shift_fraction = 0; 421 422 /* if both shift schemes are chosen by user, only use info->shiftpd */ 423 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 424 sctx.shift_top = 0; 425 for (i=0; i<n; i++) { 426 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 427 d = (a->a)[diag[i]]; 428 rs = -PetscAbsScalar(d) - PetscRealPart(d); 429 v = a->a+ai[i]; 430 nz = ai[i+1] - ai[i]; 431 for (j=0; j<nz; j++) 432 rs += PetscAbsScalar(v[j]); 433 if (rs>sctx.shift_top) sctx.shift_top = rs; 434 } 435 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 436 sctx.shift_top *= 1.1; 437 sctx.nshift_max = 5; 438 sctx.shift_lo = 0.; 439 sctx.shift_hi = 1.; 440 } 441 442 sctx.shift_amount = 0; 443 sctx.nshift = 0; 444 do { 445 sctx.lushift = PETSC_FALSE; 446 for (i=0; i<n; i++){ 447 /* load in initial unfactored row */ 448 nz = ai[r[i]+1] - ai[r[i]]; 449 ajtmp = aj + ai[r[i]]; 450 v = a->a + ai[r[i]]; 451 /* sort permuted ajtmp and values v accordingly */ 452 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 453 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 454 455 diag[r[i]] = ai[r[i]]; 456 for (j=0; j<nz; j++) { 457 rtmp[ajtmp[j]] = v[j]; 458 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 459 } 460 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 461 462 row = *ajtmp++; 463 while (row < i) { 464 pc = rtmp + row; 465 if (*pc != 0.0) { 466 pv = a->a + diag[r[row]]; 467 pj = aj + diag[r[row]] + 1; 468 469 multiplier = *pc / *pv++; 470 *pc = multiplier; 471 nz = ai[r[row]+1] - diag[r[row]] - 1; 472 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 473 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 474 } 475 row = *ajtmp++; 476 } 477 /* finished row so overwrite it onto a->a */ 478 pv = a->a + ai[r[i]] ; 479 pj = aj + ai[r[i]] ; 480 nz = ai[r[i]+1] - ai[r[i]]; 481 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 482 483 rs = 0.0; 484 for (j=0; j<nz; j++) { 485 pv[j] = rtmp[pj[j]]; 486 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 487 } 488 489 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 490 sctx.rs = rs; 491 sctx.pv = pv[nbdiag]; 492 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 493 if (newshift == 1) break; 494 } 495 496 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 497 /* 498 * if no shift in this attempt & shifting & started shifting & can refine, 499 * then try lower shift 500 */ 501 sctx.shift_hi = sctx.shift_fraction; 502 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 503 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 504 sctx.lushift = PETSC_TRUE; 505 sctx.nshift++; 506 } 507 } while (sctx.lushift); 508 509 /* invert diagonal entries for simplier triangular solves */ 510 for (i=0; i<n; i++) { 511 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 512 } 513 514 ierr = PetscFree(rtmp);CHKERRQ(ierr); 515 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 516 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 517 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 518 A->ops->solveadd = MatSolveAdd_SeqAIJ; 519 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 520 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 521 A->assembled = PETSC_TRUE; 522 A->preallocated = PETSC_TRUE; 523 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 524 if (sctx.nshift){ 525 if (info->shiftpd) { 526 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 527 } else if (info->shiftnz) { 528 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 529 } 530 } 531 PetscFunctionReturn(0); 532 } 533 534 /* ----------------------------------------------------------- */ 535 #undef __FUNCT__ 536 #define __FUNCT__ "MatLUFactor_SeqAIJ" 537 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 538 { 539 PetscErrorCode ierr; 540 Mat C; 541 542 PetscFunctionBegin; 543 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 544 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 545 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 546 A->ops->solve = C->ops->solve; 547 A->ops->solvetranspose = C->ops->solvetranspose; 548 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 549 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 550 PetscFunctionReturn(0); 551 } 552 /* ----------------------------------------------------------- */ 553 554 #undef __FUNCT__ 555 #define __FUNCT__ "MatSolve_SeqAIJ" 556 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 557 { 558 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 559 IS iscol = a->col,isrow = a->row; 560 PetscErrorCode ierr; 561 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 562 PetscInt nz; 563 const PetscInt *rout,*cout,*r,*c; 564 PetscScalar *x,*tmp,*tmps,sum; 565 const PetscScalar *b; 566 const MatScalar *aa = a->a,*v; 567 568 PetscFunctionBegin; 569 if (!n) PetscFunctionReturn(0); 570 571 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 572 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 573 tmp = a->solve_work; 574 575 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 576 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 577 578 /* forward solve the lower triangular */ 579 tmp[0] = b[*r++]; 580 tmps = tmp; 581 for (i=1; i<n; i++) { 582 v = aa + ai[i] ; 583 vi = aj + ai[i] ; 584 nz = a->diag[i] - ai[i]; 585 sum = b[*r++]; 586 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 587 tmp[i] = sum; 588 } 589 590 /* backward solve the upper triangular */ 591 for (i=n-1; i>=0; i--){ 592 v = aa + a->diag[i] + 1; 593 vi = aj + a->diag[i] + 1; 594 nz = ai[i+1] - a->diag[i] - 1; 595 sum = tmp[i]; 596 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 597 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 598 } 599 600 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 601 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 602 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 603 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 604 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 605 PetscFunctionReturn(0); 606 } 607 608 #undef __FUNCT__ 609 #define __FUNCT__ "MatMatSolve_SeqAIJ" 610 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 611 { 612 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 613 IS iscol = a->col,isrow = a->row; 614 PetscErrorCode ierr; 615 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 616 PetscInt nz,neq; 617 const PetscInt *rout,*cout,*r,*c; 618 PetscScalar *x,*b,*tmp,*tmps,sum; 619 const MatScalar *aa = a->a,*v; 620 PetscTruth bisdense,xisdense; 621 622 PetscFunctionBegin; 623 if (!n) PetscFunctionReturn(0); 624 625 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 626 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 627 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 628 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 629 630 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 631 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 632 633 tmp = a->solve_work; 634 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 635 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 636 637 for (neq=0; neq<B->cmap->n; neq++){ 638 /* forward solve the lower triangular */ 639 tmp[0] = b[r[0]]; 640 tmps = tmp; 641 for (i=1; i<n; i++) { 642 v = aa + ai[i] ; 643 vi = aj + ai[i] ; 644 nz = a->diag[i] - ai[i]; 645 sum = b[r[i]]; 646 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 647 tmp[i] = sum; 648 } 649 /* backward solve the upper triangular */ 650 for (i=n-1; i>=0; i--){ 651 v = aa + a->diag[i] + 1; 652 vi = aj + a->diag[i] + 1; 653 nz = ai[i+1] - a->diag[i] - 1; 654 sum = tmp[i]; 655 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 656 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 657 } 658 659 b += n; 660 x += n; 661 } 662 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 663 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 664 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 665 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 666 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 667 PetscFunctionReturn(0); 668 } 669 670 #undef __FUNCT__ 671 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 672 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 673 { 674 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 675 IS iscol = a->col,isrow = a->row; 676 PetscErrorCode ierr; 677 const PetscInt *r,*c,*rout,*cout; 678 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 679 PetscInt nz,row; 680 PetscScalar *x,*b,*tmp,*tmps,sum; 681 const MatScalar *aa = a->a,*v; 682 683 PetscFunctionBegin; 684 if (!n) PetscFunctionReturn(0); 685 686 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 687 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 688 tmp = a->solve_work; 689 690 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 691 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 692 693 /* forward solve the lower triangular */ 694 tmp[0] = b[*r++]; 695 tmps = tmp; 696 for (row=1; row<n; row++) { 697 i = rout[row]; /* permuted row */ 698 v = aa + ai[i] ; 699 vi = aj + ai[i] ; 700 nz = a->diag[i] - ai[i]; 701 sum = b[*r++]; 702 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 703 tmp[row] = sum; 704 } 705 706 /* backward solve the upper triangular */ 707 for (row=n-1; row>=0; row--){ 708 i = rout[row]; /* permuted row */ 709 v = aa + a->diag[i] + 1; 710 vi = aj + a->diag[i] + 1; 711 nz = ai[i+1] - a->diag[i] - 1; 712 sum = tmp[row]; 713 SPARSEDENSEMDOT(sum,tmps,v,vi,nz); 714 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 715 } 716 717 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 718 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 719 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 720 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 721 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 722 PetscFunctionReturn(0); 723 } 724 725 /* ----------------------------------------------------------- */ 726 #undef __FUNCT__ 727 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 728 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 729 { 730 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 731 PetscErrorCode ierr; 732 PetscInt n = A->rmap->n; 733 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 734 PetscScalar *x; 735 const PetscScalar *b; 736 const MatScalar *aa = a->a; 737 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 738 PetscInt adiag_i,i,nz,ai_i; 739 const MatScalar *v; 740 PetscScalar sum; 741 #endif 742 743 PetscFunctionBegin; 744 if (!n) PetscFunctionReturn(0); 745 746 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 747 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 748 749 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 750 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 751 #else 752 /* forward solve the lower triangular */ 753 x[0] = b[0]; 754 for (i=1; i<n; i++) { 755 ai_i = ai[i]; 756 v = aa + ai_i; 757 vi = aj + ai_i; 758 nz = adiag[i] - ai_i; 759 sum = b[i]; 760 while (nz--) sum -= *v++ * x[*vi++]; 761 x[i] = sum; 762 } 763 764 /* backward solve the upper triangular */ 765 for (i=n-1; i>=0; i--){ 766 adiag_i = adiag[i]; 767 v = aa + adiag_i + 1; 768 vi = aj + adiag_i + 1; 769 nz = ai[i+1] - adiag_i - 1; 770 sum = x[i]; 771 while (nz--) sum -= *v++ * x[*vi++]; 772 x[i] = sum*aa[adiag_i]; 773 } 774 #endif 775 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 776 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 777 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 783 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 784 { 785 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 786 IS iscol = a->col,isrow = a->row; 787 PetscErrorCode ierr; 788 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 789 PetscInt nz; 790 const PetscInt *rout,*cout,*r,*c; 791 PetscScalar *x,*b,*tmp,sum; 792 const MatScalar *aa = a->a,*v; 793 794 PetscFunctionBegin; 795 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 796 797 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 798 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 799 tmp = a->solve_work; 800 801 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 802 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 803 804 /* forward solve the lower triangular */ 805 tmp[0] = b[*r++]; 806 for (i=1; i<n; i++) { 807 v = aa + ai[i] ; 808 vi = aj + ai[i] ; 809 nz = a->diag[i] - ai[i]; 810 sum = b[*r++]; 811 while (nz--) sum -= *v++ * tmp[*vi++ ]; 812 tmp[i] = sum; 813 } 814 815 /* backward solve the upper triangular */ 816 for (i=n-1; i>=0; i--){ 817 v = aa + a->diag[i] + 1; 818 vi = aj + a->diag[i] + 1; 819 nz = ai[i+1] - a->diag[i] - 1; 820 sum = tmp[i]; 821 while (nz--) sum -= *v++ * tmp[*vi++ ]; 822 tmp[i] = sum*aa[a->diag[i]]; 823 x[*c--] += tmp[i]; 824 } 825 826 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 827 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 828 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 829 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 830 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 831 832 PetscFunctionReturn(0); 833 } 834 /* -------------------------------------------------------------------*/ 835 #undef __FUNCT__ 836 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 837 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 838 { 839 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 840 IS iscol = a->col,isrow = a->row; 841 PetscErrorCode ierr; 842 const PetscInt *rout,*cout,*r,*c; 843 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 844 PetscInt nz,*diag = a->diag; 845 PetscScalar *x,*b,*tmp,s1; 846 const MatScalar *aa = a->a,*v; 847 848 PetscFunctionBegin; 849 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 850 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 851 tmp = a->solve_work; 852 853 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 854 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 855 856 /* copy the b into temp work space according to permutation */ 857 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 858 859 /* forward solve the U^T */ 860 for (i=0; i<n; i++) { 861 v = aa + diag[i] ; 862 vi = aj + diag[i] + 1; 863 nz = ai[i+1] - diag[i] - 1; 864 s1 = tmp[i]; 865 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 866 while (nz--) { 867 tmp[*vi++ ] -= (*v++)*s1; 868 } 869 tmp[i] = s1; 870 } 871 872 /* backward solve the L^T */ 873 for (i=n-1; i>=0; i--){ 874 v = aa + diag[i] - 1 ; 875 vi = aj + diag[i] - 1 ; 876 nz = diag[i] - ai[i]; 877 s1 = tmp[i]; 878 while (nz--) { 879 tmp[*vi-- ] -= (*v--)*s1; 880 } 881 } 882 883 /* copy tmp into x according to permutation */ 884 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 885 886 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 887 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 888 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 889 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 890 891 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 897 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 898 { 899 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 900 IS iscol = a->col,isrow = a->row; 901 PetscErrorCode ierr; 902 const PetscInt *r,*c,*rout,*cout; 903 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 904 PetscInt nz,*diag = a->diag; 905 PetscScalar *x,*b,*tmp; 906 const MatScalar *aa = a->a,*v; 907 908 PetscFunctionBegin; 909 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 910 911 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 912 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 913 tmp = a->solve_work; 914 915 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 916 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 917 918 /* copy the b into temp work space according to permutation */ 919 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 920 921 /* forward solve the U^T */ 922 for (i=0; i<n; i++) { 923 v = aa + diag[i] ; 924 vi = aj + diag[i] + 1; 925 nz = ai[i+1] - diag[i] - 1; 926 tmp[i] *= *v++; 927 while (nz--) { 928 tmp[*vi++ ] -= (*v++)*tmp[i]; 929 } 930 } 931 932 /* backward solve the L^T */ 933 for (i=n-1; i>=0; i--){ 934 v = aa + diag[i] - 1 ; 935 vi = aj + diag[i] - 1 ; 936 nz = diag[i] - ai[i]; 937 while (nz--) { 938 tmp[*vi-- ] -= (*v--)*tmp[i]; 939 } 940 } 941 942 /* copy tmp into x according to permutation */ 943 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 944 945 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 946 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 947 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 948 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 949 950 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 951 PetscFunctionReturn(0); 952 } 953 /* ----------------------------------------------------------------*/ 954 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 955 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption); 956 957 #undef __FUNCT__ 958 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 959 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 960 { 961 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 962 IS isicol; 963 PetscErrorCode ierr; 964 const PetscInt *r,*ic; 965 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 966 PetscInt *bi,*cols,nnz,*cols_lvl; 967 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 968 PetscInt i,levels,diagonal_fill; 969 PetscTruth col_identity,row_identity; 970 PetscReal f; 971 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 972 PetscBT lnkbt; 973 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 974 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 975 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 976 PetscTruth missing; 977 978 PetscFunctionBegin; 979 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 980 f = info->fill; 981 levels = (PetscInt)info->levels; 982 diagonal_fill = (PetscInt)info->diagonal_fill; 983 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 984 985 /* special case that simply copies fill pattern */ 986 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 987 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 988 if (!levels && row_identity && col_identity) { 989 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES);CHKERRQ(ierr); 990 fact->factor = MAT_FACTOR_ILU; 991 (fact)->info.factor_mallocs = 0; 992 (fact)->info.fill_ratio_given = info->fill; 993 (fact)->info.fill_ratio_needed = 1.0; 994 b = (Mat_SeqAIJ*)(fact)->data; 995 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 996 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 997 b->row = isrow; 998 b->col = iscol; 999 b->icol = isicol; 1000 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1001 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1002 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1003 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1004 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1009 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1010 1011 /* get new row pointers */ 1012 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1013 bi[0] = 0; 1014 /* bdiag is location of diagonal in factor */ 1015 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1016 bdiag[0] = 0; 1017 1018 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1019 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1020 1021 /* create a linked list for storing column indices of the active row */ 1022 nlnk = n + 1; 1023 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1024 1025 /* initial FreeSpace size is f*(ai[n]+1) */ 1026 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1027 current_space = free_space; 1028 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1029 current_space_lvl = free_space_lvl; 1030 1031 for (i=0; i<n; i++) { 1032 nzi = 0; 1033 /* copy current row into linked list */ 1034 nnz = ai[r[i]+1] - ai[r[i]]; 1035 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1036 cols = aj + ai[r[i]]; 1037 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1038 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1039 nzi += nlnk; 1040 1041 /* make sure diagonal entry is included */ 1042 if (diagonal_fill && lnk[i] == -1) { 1043 fm = n; 1044 while (lnk[fm] < i) fm = lnk[fm]; 1045 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1046 lnk[fm] = i; 1047 lnk_lvl[i] = 0; 1048 nzi++; dcount++; 1049 } 1050 1051 /* add pivot rows into the active row */ 1052 nzbd = 0; 1053 prow = lnk[n]; 1054 while (prow < i) { 1055 nnz = bdiag[prow]; 1056 cols = bj_ptr[prow] + nnz + 1; 1057 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1058 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1059 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1060 nzi += nlnk; 1061 prow = lnk[prow]; 1062 nzbd++; 1063 } 1064 bdiag[i] = nzbd; 1065 bi[i+1] = bi[i] + nzi; 1066 1067 /* if free space is not available, make more free space */ 1068 if (current_space->local_remaining<nzi) { 1069 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1070 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1071 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1072 reallocs++; 1073 } 1074 1075 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1076 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1077 bj_ptr[i] = current_space->array; 1078 bjlvl_ptr[i] = current_space_lvl->array; 1079 1080 /* make sure the active row i has diagonal entry */ 1081 if (*(bj_ptr[i]+bdiag[i]) != i) { 1082 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1083 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1084 } 1085 1086 current_space->array += nzi; 1087 current_space->local_used += nzi; 1088 current_space->local_remaining -= nzi; 1089 current_space_lvl->array += nzi; 1090 current_space_lvl->local_used += nzi; 1091 current_space_lvl->local_remaining -= nzi; 1092 } 1093 1094 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1095 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1096 1097 /* destroy list of free space and other temporary arrays */ 1098 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1099 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 1100 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1101 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1102 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1103 1104 #if defined(PETSC_USE_INFO) 1105 { 1106 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1107 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1108 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1109 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1110 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1111 if (diagonal_fill) { 1112 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1113 } 1114 } 1115 #endif 1116 1117 /* put together the new matrix */ 1118 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1119 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1120 b = (Mat_SeqAIJ*)(fact)->data; 1121 b->free_a = PETSC_TRUE; 1122 b->free_ij = PETSC_TRUE; 1123 b->singlemalloc = PETSC_FALSE; 1124 ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1125 b->j = bj; 1126 b->i = bi; 1127 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1128 b->diag = bdiag; 1129 b->ilen = 0; 1130 b->imax = 0; 1131 b->row = isrow; 1132 b->col = iscol; 1133 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1134 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1135 b->icol = isicol; 1136 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1137 /* In b structure: Free imax, ilen, old a, old j. 1138 Allocate bdiag, solve_work, new a, new j */ 1139 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1140 b->maxnz = b->nz = bi[n] ; 1141 (fact)->info.factor_mallocs = reallocs; 1142 (fact)->info.fill_ratio_given = f; 1143 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1144 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1145 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #include "../src/mat/impls/sbaij/seq/sbaij.h" 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1152 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 1153 { 1154 Mat C = B; 1155 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1156 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1157 IS ip=b->row,iip = b->icol; 1158 PetscErrorCode ierr; 1159 const PetscInt *rip,*riip; 1160 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1161 PetscInt *ai=a->i,*aj=a->j; 1162 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1163 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1164 PetscReal zeropivot,rs,shiftnz; 1165 PetscReal shiftpd; 1166 ChShift_Ctx sctx; 1167 PetscInt newshift; 1168 PetscTruth perm_identity; 1169 1170 PetscFunctionBegin; 1171 1172 shiftnz = info->shiftnz; 1173 shiftpd = info->shiftpd; 1174 zeropivot = info->zeropivot; 1175 1176 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1177 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1178 1179 /* initialization */ 1180 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1181 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1182 jl = il + mbs; 1183 rtmp = (MatScalar*)(jl + mbs); 1184 1185 sctx.shift_amount = 0; 1186 sctx.nshift = 0; 1187 do { 1188 sctx.chshift = PETSC_FALSE; 1189 for (i=0; i<mbs; i++) { 1190 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1191 } 1192 1193 for (k = 0; k<mbs; k++){ 1194 bval = ba + bi[k]; 1195 /* initialize k-th row by the perm[k]-th row of A */ 1196 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1197 for (j = jmin; j < jmax; j++){ 1198 col = riip[aj[j]]; 1199 if (col >= k){ /* only take upper triangular entry */ 1200 rtmp[col] = aa[j]; 1201 *bval++ = 0.0; /* for in-place factorization */ 1202 } 1203 } 1204 /* shift the diagonal of the matrix */ 1205 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1206 1207 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1208 dk = rtmp[k]; 1209 i = jl[k]; /* first row to be added to k_th row */ 1210 1211 while (i < k){ 1212 nexti = jl[i]; /* next row to be added to k_th row */ 1213 1214 /* compute multiplier, update diag(k) and U(i,k) */ 1215 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1216 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1217 dk += uikdi*ba[ili]; 1218 ba[ili] = uikdi; /* -U(i,k) */ 1219 1220 /* add multiple of row i to k-th row */ 1221 jmin = ili + 1; jmax = bi[i+1]; 1222 if (jmin < jmax){ 1223 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1224 /* update il and jl for row i */ 1225 il[i] = jmin; 1226 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1227 } 1228 i = nexti; 1229 } 1230 1231 /* shift the diagonals when zero pivot is detected */ 1232 /* compute rs=sum of abs(off-diagonal) */ 1233 rs = 0.0; 1234 jmin = bi[k]+1; 1235 nz = bi[k+1] - jmin; 1236 bcol = bj + jmin; 1237 while (nz--){ 1238 rs += PetscAbsScalar(rtmp[*bcol]); 1239 bcol++; 1240 } 1241 1242 sctx.rs = rs; 1243 sctx.pv = dk; 1244 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1245 1246 if (newshift == 1) { 1247 if (!sctx.shift_amount) { 1248 sctx.shift_amount = 1e-5; 1249 } 1250 break; 1251 } 1252 1253 /* copy data into U(k,:) */ 1254 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1255 jmin = bi[k]+1; jmax = bi[k+1]; 1256 if (jmin < jmax) { 1257 for (j=jmin; j<jmax; j++){ 1258 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1259 } 1260 /* add the k-th row into il and jl */ 1261 il[k] = jmin; 1262 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1263 } 1264 } 1265 } while (sctx.chshift); 1266 ierr = PetscFree(il);CHKERRQ(ierr); 1267 1268 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1269 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1270 1271 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1272 if (perm_identity){ 1273 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1274 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1275 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1276 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1277 } else { 1278 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1279 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1280 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1281 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1282 } 1283 1284 C->assembled = PETSC_TRUE; 1285 C->preallocated = PETSC_TRUE; 1286 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1287 if (sctx.nshift){ 1288 if (shiftnz) { 1289 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1290 } else if (shiftpd) { 1291 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1292 } 1293 } 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #undef __FUNCT__ 1298 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1299 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1300 { 1301 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1302 Mat_SeqSBAIJ *b; 1303 PetscErrorCode ierr; 1304 PetscTruth perm_identity,missing; 1305 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1306 const PetscInt *rip,*riip; 1307 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1308 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1309 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1310 PetscReal fill=info->fill,levels=info->levels; 1311 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1312 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1313 PetscBT lnkbt; 1314 IS iperm; 1315 1316 PetscFunctionBegin; 1317 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1318 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1319 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1320 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1321 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1322 1323 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1324 ui[0] = 0; 1325 1326 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1327 if (!levels && perm_identity) { 1328 1329 for (i=0; i<am; i++) { 1330 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1331 } 1332 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1333 cols = uj; 1334 for (i=0; i<am; i++) { 1335 aj = a->j + a->diag[i]; 1336 ncols = ui[i+1] - ui[i]; 1337 for (j=0; j<ncols; j++) *cols++ = *aj++; 1338 } 1339 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1340 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1341 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1342 1343 /* initialization */ 1344 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1345 1346 /* jl: linked list for storing indices of the pivot rows 1347 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1348 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1349 il = jl + am; 1350 uj_ptr = (PetscInt**)(il + am); 1351 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1352 for (i=0; i<am; i++){ 1353 jl[i] = am; il[i] = 0; 1354 } 1355 1356 /* create and initialize a linked list for storing column indices of the active row k */ 1357 nlnk = am + 1; 1358 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1359 1360 /* initial FreeSpace size is fill*(ai[am]+1) */ 1361 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1362 current_space = free_space; 1363 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1364 current_space_lvl = free_space_lvl; 1365 1366 for (k=0; k<am; k++){ /* for each active row k */ 1367 /* initialize lnk by the column indices of row rip[k] of A */ 1368 nzk = 0; 1369 ncols = ai[rip[k]+1] - ai[rip[k]]; 1370 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 1371 ncols_upper = 0; 1372 for (j=0; j<ncols; j++){ 1373 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1374 if (riip[i] >= k){ /* only take upper triangular entry */ 1375 ajtmp[ncols_upper] = i; 1376 ncols_upper++; 1377 } 1378 } 1379 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1380 nzk += nlnk; 1381 1382 /* update lnk by computing fill-in for each pivot row to be merged in */ 1383 prow = jl[k]; /* 1st pivot row */ 1384 1385 while (prow < k){ 1386 nextprow = jl[prow]; 1387 1388 /* merge prow into k-th row */ 1389 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1390 jmax = ui[prow+1]; 1391 ncols = jmax-jmin; 1392 i = jmin - ui[prow]; 1393 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1394 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 1395 j = *(uj - 1); 1396 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 1397 nzk += nlnk; 1398 1399 /* update il and jl for prow */ 1400 if (jmin < jmax){ 1401 il[prow] = jmin; 1402 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 1403 } 1404 prow = nextprow; 1405 } 1406 1407 /* if free space is not available, make more free space */ 1408 if (current_space->local_remaining<nzk) { 1409 i = am - k + 1; /* num of unfactored rows */ 1410 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1411 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1412 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 1413 reallocs++; 1414 } 1415 1416 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1417 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 1418 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1419 1420 /* add the k-th row into il and jl */ 1421 if (nzk > 1){ 1422 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1423 jl[k] = jl[i]; jl[i] = k; 1424 il[k] = ui[k] + 1; 1425 } 1426 uj_ptr[k] = current_space->array; 1427 uj_lvl_ptr[k] = current_space_lvl->array; 1428 1429 current_space->array += nzk; 1430 current_space->local_used += nzk; 1431 current_space->local_remaining -= nzk; 1432 1433 current_space_lvl->array += nzk; 1434 current_space_lvl->local_used += nzk; 1435 current_space_lvl->local_remaining -= nzk; 1436 1437 ui[k+1] = ui[k] + nzk; 1438 } 1439 1440 #if defined(PETSC_USE_INFO) 1441 if (ai[am] != 0) { 1442 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 1443 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1444 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1445 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1446 } else { 1447 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1448 } 1449 #endif 1450 1451 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1452 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1453 ierr = PetscFree(jl);CHKERRQ(ierr); 1454 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 1455 1456 /* destroy list of free space and other temporary array(s) */ 1457 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1458 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1459 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1460 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1461 1462 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 1463 1464 /* put together the new matrix in MATSEQSBAIJ format */ 1465 1466 b = (Mat_SeqSBAIJ*)(fact)->data; 1467 b->singlemalloc = PETSC_FALSE; 1468 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1469 b->j = uj; 1470 b->i = ui; 1471 b->diag = 0; 1472 b->ilen = 0; 1473 b->imax = 0; 1474 b->row = perm; 1475 b->col = perm; 1476 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1477 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1478 b->icol = iperm; 1479 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1480 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1481 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1482 b->maxnz = b->nz = ui[am]; 1483 b->free_a = PETSC_TRUE; 1484 b->free_ij = PETSC_TRUE; 1485 1486 (fact)->info.factor_mallocs = reallocs; 1487 (fact)->info.fill_ratio_given = fill; 1488 if (ai[am] != 0) { 1489 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1490 } else { 1491 (fact)->info.fill_ratio_needed = 0.0; 1492 } 1493 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1494 PetscFunctionReturn(0); 1495 } 1496 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 1499 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1500 { 1501 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1502 Mat_SeqSBAIJ *b; 1503 PetscErrorCode ierr; 1504 PetscTruth perm_identity; 1505 PetscReal fill = info->fill; 1506 const PetscInt *rip,*riip; 1507 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 1508 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 1509 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 1510 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1511 PetscBT lnkbt; 1512 IS iperm; 1513 1514 PetscFunctionBegin; 1515 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1516 /* check whether perm is the identity mapping */ 1517 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1518 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1519 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1520 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1521 1522 /* initialization */ 1523 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1524 ui[0] = 0; 1525 1526 /* jl: linked list for storing indices of the pivot rows 1527 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1528 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1529 il = jl + am; 1530 cols = il + am; 1531 ui_ptr = (PetscInt**)(cols + am); 1532 for (i=0; i<am; i++){ 1533 jl[i] = am; il[i] = 0; 1534 } 1535 1536 /* create and initialize a linked list for storing column indices of the active row k */ 1537 nlnk = am + 1; 1538 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1539 1540 /* initial FreeSpace size is fill*(ai[am]+1) */ 1541 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1542 current_space = free_space; 1543 1544 for (k=0; k<am; k++){ /* for each active row k */ 1545 /* initialize lnk by the column indices of row rip[k] of A */ 1546 nzk = 0; 1547 ncols = ai[rip[k]+1] - ai[rip[k]]; 1548 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 1549 ncols_upper = 0; 1550 for (j=0; j<ncols; j++){ 1551 i = riip[*(aj + ai[rip[k]] + j)]; 1552 if (i >= k){ /* only take upper triangular entry */ 1553 cols[ncols_upper] = i; 1554 ncols_upper++; 1555 } 1556 } 1557 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1558 nzk += nlnk; 1559 1560 /* update lnk by computing fill-in for each pivot row to be merged in */ 1561 prow = jl[k]; /* 1st pivot row */ 1562 1563 while (prow < k){ 1564 nextprow = jl[prow]; 1565 /* merge prow into k-th row */ 1566 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 1567 jmax = ui[prow+1]; 1568 ncols = jmax-jmin; 1569 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 1570 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1571 nzk += nlnk; 1572 1573 /* update il and jl for prow */ 1574 if (jmin < jmax){ 1575 il[prow] = jmin; 1576 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 1577 } 1578 prow = nextprow; 1579 } 1580 1581 /* if free space is not available, make more free space */ 1582 if (current_space->local_remaining<nzk) { 1583 i = am - k + 1; /* num of unfactored rows */ 1584 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 1585 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 1586 reallocs++; 1587 } 1588 1589 /* copy data into free space, then initialize lnk */ 1590 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 1591 1592 /* add the k-th row into il and jl */ 1593 if (nzk-1 > 0){ 1594 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 1595 jl[k] = jl[i]; jl[i] = k; 1596 il[k] = ui[k] + 1; 1597 } 1598 ui_ptr[k] = current_space->array; 1599 current_space->array += nzk; 1600 current_space->local_used += nzk; 1601 current_space->local_remaining -= nzk; 1602 1603 ui[k+1] = ui[k] + nzk; 1604 } 1605 1606 #if defined(PETSC_USE_INFO) 1607 if (ai[am] != 0) { 1608 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 1609 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 1610 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1611 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 1612 } else { 1613 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 1614 } 1615 #endif 1616 1617 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 1618 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 1619 ierr = PetscFree(jl);CHKERRQ(ierr); 1620 1621 /* destroy list of free space and other temporary array(s) */ 1622 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1623 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 1624 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1625 1626 /* put together the new matrix in MATSEQSBAIJ format */ 1627 1628 b = (Mat_SeqSBAIJ*)(fact)->data; 1629 b->singlemalloc = PETSC_FALSE; 1630 b->free_a = PETSC_TRUE; 1631 b->free_ij = PETSC_TRUE; 1632 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 1633 b->j = uj; 1634 b->i = ui; 1635 b->diag = 0; 1636 b->ilen = 0; 1637 b->imax = 0; 1638 b->row = perm; 1639 b->col = perm; 1640 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1641 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 1642 b->icol = iperm; 1643 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 1644 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1645 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1646 b->maxnz = b->nz = ui[am]; 1647 1648 (fact)->info.factor_mallocs = reallocs; 1649 (fact)->info.fill_ratio_given = fill; 1650 if (ai[am] != 0) { 1651 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 1652 } else { 1653 (fact)->info.fill_ratio_needed = 0.0; 1654 } 1655 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 1656 PetscFunctionReturn(0); 1657 } 1658 1659 #undef __FUNCT__ 1660 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_iludt" 1661 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_iludt(Mat A,Vec bb,Vec xx) 1662 { 1663 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1664 PetscErrorCode ierr; 1665 PetscInt n = A->rmap->n; 1666 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 1667 PetscScalar *x,sum; 1668 const PetscScalar *b; 1669 const MatScalar *aa = a->a,*v; 1670 PetscInt i,nz; 1671 1672 PetscFunctionBegin; 1673 if (!n) PetscFunctionReturn(0); 1674 1675 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1676 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1677 1678 /* forward solve the lower triangular */ 1679 x[0] = b[0]; 1680 v = aa; 1681 vi = aj; 1682 for (i=1; i<n; i++) { 1683 nz = ai[i+1] - ai[i]; 1684 sum = b[i]; 1685 while (nz--) sum -= *v++ * x[*vi++]; 1686 x[i] = sum; 1687 } 1688 1689 /* backward solve the upper triangular */ 1690 v = aa + adiag[n] + 1; 1691 vi = aj + adiag[n] + 1; 1692 for (i=n-1; i>=0; i--){ 1693 nz = adiag[i] - adiag[i+1] - 1; 1694 sum = x[i]; 1695 while (nz--) sum -= *v++ * x[*vi++]; 1696 x[i] = sum*aa[adiag[i]]; 1697 v++; vi++; 1698 } 1699 1700 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1701 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1702 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatSolve_SeqAIJ_iludt" 1708 PetscErrorCode MatSolve_SeqAIJ_iludt(Mat A,Vec bb,Vec xx) 1709 { 1710 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1711 IS iscol = a->col,isrow = a->row; 1712 PetscErrorCode ierr; 1713 PetscInt i,n=A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag=a->diag; 1714 PetscInt nz; 1715 const PetscInt *rout,*cout,*r,*c; 1716 PetscScalar *x,*tmp,*tmps; 1717 const PetscScalar *b; 1718 const MatScalar *aa = a->a,*v; 1719 1720 PetscFunctionBegin; 1721 if (!n) PetscFunctionReturn(0); 1722 1723 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1724 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1725 tmp = a->solve_work; 1726 1727 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1728 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1729 1730 /* forward solve the lower triangular */ 1731 tmp[0] = b[*r++]; 1732 tmps = tmp; 1733 v = aa; 1734 vi = aj; 1735 for (i=1; i<n; i++) { 1736 nz = ai[i+1] - ai[i]; 1737 tmp[i] = b[*r++]; 1738 SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz); 1739 v += nz; vi += nz; 1740 } 1741 1742 /* backward solve the upper triangular */ 1743 v = aa + adiag[n] + 1; 1744 vi = aj + adiag[n] + 1; 1745 for (i=n-1; i>=0; i--){ 1746 nz = adiag[i] - adiag[i+1] - 1; 1747 SPARSEDENSEMDOT(tmp[i],tmps,v,vi,nz); 1748 x[*c--] = tmp[i] = tmp[i]*aa[adiag[i]]; 1749 v += nz+1; vi += nz+1; 1750 } 1751 1752 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1753 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1754 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1755 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1756 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 1757 PetscFunctionReturn(0); 1758 } 1759 1760 #undef __FUNCT__ 1761 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 1762 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 1763 { 1764 Mat B = *fact; 1765 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 1766 IS isicol; 1767 PetscErrorCode ierr; 1768 const PetscInt *r,*ic; 1769 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 1770 PetscInt *bi,*bj,*bdiag; 1771 PetscInt row,nzi,nzi_bl,nzi_bu,*im,dtcount,nzi_al,nzi_au; 1772 PetscInt nlnk,*lnk; 1773 PetscBT lnkbt; 1774 PetscTruth row_identity,icol_identity,both_identity; 1775 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp; 1776 const PetscInt *ics; 1777 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 1778 PetscReal dt=info->dt,shift=info->shiftinblocks; 1779 PetscInt nnz_max; 1780 PetscTruth missing; 1781 1782 PetscFunctionBegin; 1783 /* ------- symbolic factorization, can be reused ---------*/ 1784 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1785 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1786 adiag=a->diag; 1787 1788 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1789 1790 /* bdiag is location of diagonal in factor */ 1791 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1792 1793 /* allocate row pointers bi */ 1794 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1795 1796 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 1797 dtcount = (PetscInt)info->dtcount; 1798 if (dtcount > n/2) dtcount = n/2; 1799 nnz_max = ai[n]+2*n*dtcount+2; 1800 ierr = PetscMalloc(nnz_max*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1801 ierr = PetscMalloc(nnz_max*sizeof(MatScalar),&ba);CHKERRQ(ierr); 1802 1803 /* put together the new matrix */ 1804 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1805 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 1806 b = (Mat_SeqAIJ*)(B)->data; 1807 b->free_a = PETSC_TRUE; 1808 b->free_ij = PETSC_TRUE; 1809 b->singlemalloc = PETSC_FALSE; 1810 b->a = ba; 1811 b->j = bj; 1812 b->i = bi; 1813 b->diag = bdiag; 1814 b->ilen = 0; 1815 b->imax = 0; 1816 b->row = isrow; 1817 b->col = iscol; 1818 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1819 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1820 b->icol = isicol; 1821 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1822 1823 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 1824 b->maxnz = nnz_max; 1825 1826 (B)->factor = MAT_FACTOR_ILUDT; 1827 (B)->info.factor_mallocs = 0; 1828 (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 1829 CHKMEMQ; 1830 /* ------- end of symbolic factorization ---------*/ 1831 1832 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1833 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1834 ics = ic; 1835 1836 /* linked list for storing column indices of the active row */ 1837 nlnk = n + 1; 1838 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1839 1840 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 1841 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 1842 jtmp = im + n; 1843 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 1844 ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 1845 vtmp = rtmp + n; 1846 1847 bi[0] = 0; 1848 bdiag[0] = nnz_max-1; /* location of diagonal in factor B */ 1849 for (i=0; i<n; i++) { 1850 /* copy initial fill into linked list */ 1851 nzi = 0; /* nonzeros for active row i */ 1852 nzi = ai[r[i]+1] - ai[r[i]]; 1853 if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1854 nzi_al = adiag[r[i]] - ai[r[i]]; 1855 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 1856 ajtmp = aj + ai[r[i]]; 1857 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 1858 1859 /* load in initial (unfactored row) */ 1860 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 1861 aatmp = a->a + ai[r[i]]; 1862 for (j=0; j<nzi; j++) { 1863 rtmp[ics[*ajtmp++]] = *aatmp++; 1864 } 1865 1866 /* add pivot rows into linked list */ 1867 row = lnk[n]; 1868 while (row < i) { 1869 nzi_bl = bi[row+1] - bi[row] + 1; 1870 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 1871 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 1872 nzi += nlnk; 1873 row = lnk[row]; 1874 } 1875 1876 /* copy data from lnk into jtmp, then initialize lnk */ 1877 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 1878 1879 /* numerical factorization */ 1880 bjtmp = jtmp; 1881 row = *bjtmp++; /* 1st pivot row */ 1882 while (row < i) { 1883 pc = rtmp + row; 1884 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 1885 multiplier = (*pc) * (*pv); 1886 *pc = multiplier; 1887 if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 1888 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 1889 pv = ba + bdiag[row+1] + 1; 1890 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 1891 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 1892 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 1893 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 1894 } 1895 row = *bjtmp++; 1896 } 1897 1898 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 1899 nzi_bl = 0; j = 0; 1900 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 1901 vtmp[j] = rtmp[jtmp[j]]; 1902 nzi_bl++; j++; 1903 } 1904 nzi_bu = nzi - nzi_bl -1; 1905 while (j < nzi){ 1906 vtmp[j] = rtmp[jtmp[j]]; 1907 j++; 1908 } 1909 1910 bjtmp = bj + bi[i]; 1911 batmp = ba + bi[i]; 1912 /* apply level dropping rule to L part */ 1913 ncut = nzi_al + dtcount; 1914 if (ncut < nzi_bl){ 1915 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 1916 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 1917 } else { 1918 ncut = nzi_bl; 1919 } 1920 for (j=0; j<ncut; j++){ 1921 bjtmp[j] = jtmp[j]; 1922 batmp[j] = vtmp[j]; 1923 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 1924 } 1925 bi[i+1] = bi[i] + ncut; 1926 nzi = ncut + 1; 1927 1928 /* apply level dropping rule to U part */ 1929 ncut = nzi_au + dtcount; 1930 if (ncut < nzi_bu){ 1931 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 1932 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 1933 } else { 1934 ncut = nzi_bu; 1935 } 1936 nzi += ncut; 1937 1938 /* mark bdiagonal */ 1939 bdiag[i+1] = bdiag[i] - (ncut + 1); 1940 bjtmp = bj + bdiag[i]; 1941 batmp = ba + bdiag[i]; 1942 *bjtmp = i; 1943 *batmp = rtmp[i]; 1944 if (*batmp == 0.0) *batmp = dt+shift; 1945 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 1946 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 1947 1948 bjtmp = bj + bdiag[i+1]+1; 1949 batmp = ba + bdiag[i+1]+1; 1950 for (k=0; k<ncut; k++){ 1951 bjtmp[k] = jtmp[nzi_bl+1+k]; 1952 batmp[k] = vtmp[nzi_bl+1+k]; 1953 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 1954 } 1955 /* printf("\n"); */ 1956 1957 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 1958 /* 1959 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 1960 printf(" ----------------------------\n"); 1961 */ 1962 } /* for (i=0; i<n; i++) */ 1963 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 1964 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 1965 1966 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1967 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1968 1969 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1970 ierr = PetscFree(im);CHKERRQ(ierr); 1971 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1972 1973 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 1974 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 1975 1976 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1977 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 1978 both_identity = (PetscTruth) (row_identity && icol_identity); 1979 if (row_identity && icol_identity) { 1980 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_iludt; 1981 } else { 1982 B->ops->solve = MatSolve_SeqAIJ_iludt; 1983 } 1984 1985 B->ops->lufactorsymbolic = MatILUDTFactorSymbolic_SeqAIJ; 1986 B->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 1987 B->ops->solveadd = 0; 1988 B->ops->solvetranspose = 0; 1989 B->ops->solvetransposeadd = 0; 1990 B->ops->matsolve = 0; 1991 B->assembled = PETSC_TRUE; 1992 B->preallocated = PETSC_TRUE; 1993 PetscFunctionReturn(0); 1994 } 1995 1996 /* a wraper of MatILUDTFactor_SeqAIJ() */ 1997 #undef __FUNCT__ 1998 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 1999 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 2000 { 2001 PetscErrorCode ierr; 2002 2003 PetscFunctionBegin; 2004 /* printf("MatILUDTFactorSymbolic_SeqAIJ is called...\n"); */ 2005 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 2006 2007 fact->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 2008 PetscFunctionReturn(0); 2009 } 2010 2011 /* 2012 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 2013 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 2014 */ 2015 #undef __FUNCT__ 2016 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 2017 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 2018 { 2019 Mat C=fact; 2020 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 2021 IS isrow = b->row,isicol = b->icol; 2022 PetscErrorCode ierr; 2023 const PetscInt *r,*ic,*ics; 2024 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 2025 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 2026 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 2027 PetscReal dt=info->dt,shift=info->shiftinblocks; 2028 PetscTruth row_identity, col_identity; 2029 2030 PetscFunctionBegin; 2031 /* printf("MatILUDTFactorNumeric_SeqAIJ is called...\n"); */ 2032 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2033 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 2034 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 2035 ics = ic; 2036 2037 for (i=0; i<n; i++){ 2038 /* initialize rtmp array */ 2039 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 2040 bjtmp = bj + bi[i]; 2041 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 2042 rtmp[i] = 0.0; 2043 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 2044 bjtmp = bj + bdiag[i+1] + 1; 2045 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 2046 2047 /* load in initial unfactored row of A */ 2048 /* printf("row %d\n",i); */ 2049 nz = ai[r[i]+1] - ai[r[i]]; 2050 ajtmp = aj + ai[r[i]]; 2051 v = aa + ai[r[i]]; 2052 for (j=0; j<nz; j++) { 2053 rtmp[ics[*ajtmp++]] = v[j]; 2054 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 2055 } 2056 /* printf("\n"); */ 2057 2058 /* numerical factorization */ 2059 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 2060 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 2061 k = 0; 2062 while (k < nzl){ 2063 row = *bjtmp++; 2064 /* printf(" prow %d\n",row); */ 2065 pc = rtmp + row; 2066 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 2067 multiplier = (*pc) * (*pv); 2068 *pc = multiplier; 2069 if (PetscAbsScalar(multiplier) > dt){ 2070 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 2071 pv = b->a + bdiag[row+1] + 1; 2072 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 2073 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 2074 /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */ 2075 } 2076 k++; 2077 } 2078 2079 /* finished row so stick it into b->a */ 2080 /* L-part */ 2081 pv = b->a + bi[i] ; 2082 pj = bj + bi[i] ; 2083 nzl = bi[i+1] - bi[i]; 2084 for (j=0; j<nzl; j++) { 2085 pv[j] = rtmp[pj[j]]; 2086 /* printf(" (%d,%g),",pj[j],pv[j]); */ 2087 } 2088 2089 /* diagonal: invert diagonal entries for simplier triangular solves */ 2090 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 2091 b->a[bdiag[i]] = 1.0/rtmp[i]; 2092 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 2093 2094 /* U-part */ 2095 pv = b->a + bdiag[i+1] + 1; 2096 pj = bj + bdiag[i+1] + 1; 2097 nzu = bdiag[i] - bdiag[i+1] - 1; 2098 for (j=0; j<nzu; j++) { 2099 pv[j] = rtmp[pj[j]]; 2100 /* printf(" (%d,%g),",pj[j],pv[j]); */ 2101 } 2102 /* printf("\n"); */ 2103 } 2104 2105 ierr = PetscFree(rtmp);CHKERRQ(ierr); 2106 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2107 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2108 2109 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 2110 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 2111 if (row_identity && col_identity) { 2112 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_iludt; 2113 } else { 2114 C->ops->solve = MatSolve_SeqAIJ_iludt; 2115 } 2116 C->ops->solveadd = 0; 2117 C->ops->solvetranspose = 0; 2118 C->ops->solvetransposeadd = 0; 2119 C->ops->matsolve = 0; 2120 C->assembled = PETSC_TRUE; 2121 C->preallocated = PETSC_TRUE; 2122 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125