1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 #undef __FUNCT__ 8 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 9 /* 10 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 11 12 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 13 */ 14 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 15 { 16 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 19 const PetscInt *ai = a->i, *aj = a->j; 20 const PetscScalar *aa = a->a; 21 PetscBool *done; 22 PetscReal best,past = 0,future; 23 24 PetscFunctionBegin; 25 /* pick initial row */ 26 best = -1; 27 for (i=0; i<n; i++) { 28 future = 0.0; 29 for (j=ai[i]; j<ai[i+1]; j++) { 30 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 31 else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc1(n,&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc1(n,&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 96 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 97 { 98 PetscFunctionBegin; 99 *flg = PETSC_TRUE; 100 PetscFunctionReturn(0); 101 } 102 103 #undef __FUNCT__ 104 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 105 PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 106 { 107 PetscInt n = A->rmap->n; 108 PetscErrorCode ierr; 109 110 PetscFunctionBegin; 111 #if defined(PETSC_USE_COMPLEX) 112 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 113 #endif 114 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 115 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 116 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 117 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 118 119 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 120 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 121 122 ierr = MatSetBlockSizesFromMats(*B,A,A);CHKERRQ(ierr); 123 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 124 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 125 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 126 127 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 128 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 129 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 130 (*B)->factortype = ftype; 131 PetscFunctionReturn(0); 132 } 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 137 { 138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 139 IS isicol; 140 PetscErrorCode ierr; 141 const PetscInt *r,*ic; 142 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 143 PetscInt *bi,*bj,*ajtmp; 144 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 145 PetscReal f; 146 PetscInt nlnk,*lnk,k,**bi_ptr; 147 PetscFreeSpaceList free_space=NULL,current_space=NULL; 148 PetscBT lnkbt; 149 150 PetscFunctionBegin; 151 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 152 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 153 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 154 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 155 156 /* get new row pointers */ 157 ierr = PetscMalloc1((n+1),&bi);CHKERRQ(ierr); 158 bi[0] = 0; 159 160 /* bdiag is location of diagonal in factor */ 161 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); 162 bdiag[0] = 0; 163 164 /* linked list for storing column indices of the active row */ 165 nlnk = n + 1; 166 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 167 168 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 169 170 /* initial FreeSpace size is f*(ai[n]+1) */ 171 f = info->fill; 172 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 173 current_space = free_space; 174 175 for (i=0; i<n; i++) { 176 /* copy previous fill into linked list */ 177 nzi = 0; 178 nnz = ai[r[i]+1] - ai[r[i]]; 179 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 180 ajtmp = aj + ai[r[i]]; 181 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 182 nzi += nlnk; 183 184 /* add pivot rows into linked list */ 185 row = lnk[n]; 186 while (row < i) { 187 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 188 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 189 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 190 nzi += nlnk; 191 row = lnk[row]; 192 } 193 bi[i+1] = bi[i] + nzi; 194 im[i] = nzi; 195 196 /* mark bdiag */ 197 nzbd = 0; 198 nnz = nzi; 199 k = lnk[n]; 200 while (nnz-- && k < i) { 201 nzbd++; 202 k = lnk[k]; 203 } 204 bdiag[i] = bi[i] + nzbd; 205 206 /* if free space is not available, make more free space */ 207 if (current_space->local_remaining<nzi) { 208 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 209 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 210 reallocs++; 211 } 212 213 /* copy data into free space, then initialize lnk */ 214 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 215 216 bi_ptr[i] = current_space->array; 217 current_space->array += nzi; 218 current_space->local_used += nzi; 219 current_space->local_remaining -= nzi; 220 } 221 #if defined(PETSC_USE_INFO) 222 if (ai[n] != 0) { 223 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 224 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 225 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 226 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 227 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 228 } else { 229 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 230 } 231 #endif 232 233 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 234 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 235 236 /* destroy list of free space and other temporary array(s) */ 237 ierr = PetscMalloc1((bi[n]+1),&bj);CHKERRQ(ierr); 238 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 239 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 240 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 241 242 /* put together the new matrix */ 243 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 244 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 245 b = (Mat_SeqAIJ*)(B)->data; 246 247 b->free_a = PETSC_TRUE; 248 b->free_ij = PETSC_TRUE; 249 b->singlemalloc = PETSC_FALSE; 250 251 ierr = PetscMalloc1((bi[n]+1),&b->a);CHKERRQ(ierr); 252 b->j = bj; 253 b->i = bi; 254 b->diag = bdiag; 255 b->ilen = 0; 256 b->imax = 0; 257 b->row = isrow; 258 b->col = iscol; 259 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 260 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 261 b->icol = isicol; 262 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 263 264 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 265 ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 266 b->maxnz = b->nz = bi[n]; 267 268 (B)->factortype = MAT_FACTOR_LU; 269 (B)->info.factor_mallocs = reallocs; 270 (B)->info.fill_ratio_given = f; 271 272 if (ai[n]) { 273 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 274 } else { 275 (B)->info.fill_ratio_needed = 0.0; 276 } 277 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 278 if (a->inode.size) { 279 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 280 } 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 286 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 287 { 288 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 289 IS isicol; 290 PetscErrorCode ierr; 291 const PetscInt *r,*ic,*ai=a->i,*aj=a->j,*ajtmp; 292 PetscInt i,n=A->rmap->n; 293 PetscInt *bi,*bj; 294 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 295 PetscReal f; 296 PetscInt nlnk,*lnk,k,**bi_ptr; 297 PetscFreeSpaceList free_space=NULL,current_space=NULL; 298 PetscBT lnkbt; 299 300 PetscFunctionBegin; 301 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 302 /* 303 PetscBool olddatastruct=PETSC_FALSE; 304 ierr = PetscOptionsGetBool(NULL,"-lu_old",&olddatastruct,NULL);CHKERRQ(ierr); 305 if (olddatastruct) { 306 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 */ 310 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 311 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 312 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 313 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 314 315 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 316 ierr = PetscMalloc1((n+1),&bi);CHKERRQ(ierr); 317 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); 318 bi[0] = bdiag[0] = 0; 319 320 /* linked list for storing column indices of the active row */ 321 nlnk = n + 1; 322 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 323 324 ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr); 325 326 /* initial FreeSpace size is f*(ai[n]+1) */ 327 f = info->fill; 328 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 329 current_space = free_space; 330 331 for (i=0; i<n; i++) { 332 /* copy previous fill into linked list */ 333 nzi = 0; 334 nnz = ai[r[i]+1] - ai[r[i]]; 335 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 336 ajtmp = aj + ai[r[i]]; 337 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 338 nzi += nlnk; 339 340 /* add pivot rows into linked list */ 341 row = lnk[n]; 342 while (row < i) { 343 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 344 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 345 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 346 nzi += nlnk; 347 row = lnk[row]; 348 } 349 bi[i+1] = bi[i] + nzi; 350 im[i] = nzi; 351 352 /* mark bdiag */ 353 nzbd = 0; 354 nnz = nzi; 355 k = lnk[n]; 356 while (nnz-- && k < i) { 357 nzbd++; 358 k = lnk[k]; 359 } 360 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 361 362 /* if free space is not available, make more free space */ 363 if (current_space->local_remaining<nzi) { 364 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 365 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 366 reallocs++; 367 } 368 369 /* copy data into free space, then initialize lnk */ 370 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 371 372 bi_ptr[i] = current_space->array; 373 current_space->array += nzi; 374 current_space->local_used += nzi; 375 current_space->local_remaining -= nzi; 376 } 377 378 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 379 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 380 381 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 382 ierr = PetscMalloc1((bi[n]+1),&bj);CHKERRQ(ierr); 383 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 384 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 385 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 386 387 /* put together the new matrix */ 388 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 389 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 390 b = (Mat_SeqAIJ*)(B)->data; 391 392 b->free_a = PETSC_TRUE; 393 b->free_ij = PETSC_TRUE; 394 b->singlemalloc = PETSC_FALSE; 395 396 ierr = PetscMalloc1((bdiag[0]+1),&b->a);CHKERRQ(ierr); 397 398 b->j = bj; 399 b->i = bi; 400 b->diag = bdiag; 401 b->ilen = 0; 402 b->imax = 0; 403 b->row = isrow; 404 b->col = iscol; 405 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 406 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 407 b->icol = isicol; 408 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 409 410 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 411 ierr = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 412 b->maxnz = b->nz = bdiag[0]+1; 413 414 B->factortype = MAT_FACTOR_LU; 415 B->info.factor_mallocs = reallocs; 416 B->info.fill_ratio_given = f; 417 418 if (ai[n]) { 419 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 420 } else { 421 B->info.fill_ratio_needed = 0.0; 422 } 423 #if defined(PETSC_USE_INFO) 424 if (ai[n] != 0) { 425 PetscReal af = B->info.fill_ratio_needed; 426 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 427 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 428 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr); 429 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 430 } else { 431 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 432 } 433 #endif 434 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 435 if (a->inode.size) { 436 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 437 } 438 ierr = Mat_CheckInode_FactorLU(B);CHKERRQ(ierr); 439 PetscFunctionReturn(0); 440 } 441 442 /* 443 Trouble in factorization, should we dump the original matrix? 444 */ 445 #undef __FUNCT__ 446 #define __FUNCT__ "MatFactorDumpMatrix" 447 PetscErrorCode MatFactorDumpMatrix(Mat A) 448 { 449 PetscErrorCode ierr; 450 PetscBool flg = PETSC_FALSE; 451 452 PetscFunctionBegin; 453 ierr = PetscOptionsGetBool(NULL,"-mat_factor_dump_on_error",&flg,NULL);CHKERRQ(ierr); 454 if (flg) { 455 PetscViewer viewer; 456 char filename[PETSC_MAX_PATH_LEN]; 457 458 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 459 ierr = PetscViewerBinaryOpen(PetscObjectComm((PetscObject)A),filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 460 ierr = MatView(A,viewer);CHKERRQ(ierr); 461 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 462 } 463 PetscFunctionReturn(0); 464 } 465 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 468 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 469 { 470 Mat C =B; 471 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 472 IS isrow = b->row,isicol = b->icol; 473 PetscErrorCode ierr; 474 const PetscInt *r,*ic,*ics; 475 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 476 PetscInt i,j,k,nz,nzL,row,*pj; 477 const PetscInt *ajtmp,*bjtmp; 478 MatScalar *rtmp,*pc,multiplier,*pv; 479 const MatScalar *aa=a->a,*v; 480 PetscBool row_identity,col_identity; 481 FactorShiftCtx sctx; 482 const PetscInt *ddiag; 483 PetscReal rs; 484 MatScalar d; 485 486 PetscFunctionBegin; 487 /* MatPivotSetUp(): initialize shift context sctx */ 488 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 489 490 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 491 ddiag = a->diag; 492 sctx.shift_top = info->zeropivot; 493 for (i=0; i<n; i++) { 494 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 495 d = (aa)[ddiag[i]]; 496 rs = -PetscAbsScalar(d) - PetscRealPart(d); 497 v = aa+ai[i]; 498 nz = ai[i+1] - ai[i]; 499 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 500 if (rs>sctx.shift_top) sctx.shift_top = rs; 501 } 502 sctx.shift_top *= 1.1; 503 sctx.nshift_max = 5; 504 sctx.shift_lo = 0.; 505 sctx.shift_hi = 1.; 506 } 507 508 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 509 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 510 ierr = PetscMalloc1((n+1),&rtmp);CHKERRQ(ierr); 511 ics = ic; 512 513 do { 514 sctx.newshift = PETSC_FALSE; 515 for (i=0; i<n; i++) { 516 /* zero rtmp */ 517 /* L part */ 518 nz = bi[i+1] - bi[i]; 519 bjtmp = bj + bi[i]; 520 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 521 522 /* U part */ 523 nz = bdiag[i]-bdiag[i+1]; 524 bjtmp = bj + bdiag[i+1]+1; 525 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 526 527 /* load in initial (unfactored row) */ 528 nz = ai[r[i]+1] - ai[r[i]]; 529 ajtmp = aj + ai[r[i]]; 530 v = aa + ai[r[i]]; 531 for (j=0; j<nz; j++) { 532 rtmp[ics[ajtmp[j]]] = v[j]; 533 } 534 /* ZeropivotApply() */ 535 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 536 537 /* elimination */ 538 bjtmp = bj + bi[i]; 539 row = *bjtmp++; 540 nzL = bi[i+1] - bi[i]; 541 for (k=0; k < nzL; k++) { 542 pc = rtmp + row; 543 if (*pc != 0.0) { 544 pv = b->a + bdiag[row]; 545 multiplier = *pc * (*pv); 546 *pc = multiplier; 547 548 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 549 pv = b->a + bdiag[row+1]+1; 550 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 551 552 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 553 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 554 } 555 row = *bjtmp++; 556 } 557 558 /* finished row so stick it into b->a */ 559 rs = 0.0; 560 /* L part */ 561 pv = b->a + bi[i]; 562 pj = b->j + bi[i]; 563 nz = bi[i+1] - bi[i]; 564 for (j=0; j<nz; j++) { 565 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 566 } 567 568 /* U part */ 569 pv = b->a + bdiag[i+1]+1; 570 pj = b->j + bdiag[i+1]+1; 571 nz = bdiag[i] - bdiag[i+1]-1; 572 for (j=0; j<nz; j++) { 573 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 574 } 575 576 sctx.rs = rs; 577 sctx.pv = rtmp[i]; 578 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 579 if (sctx.newshift) break; /* break for-loop */ 580 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 581 582 /* Mark diagonal and invert diagonal for simplier triangular solves */ 583 pv = b->a + bdiag[i]; 584 *pv = 1.0/rtmp[i]; 585 586 } /* endof for (i=0; i<n; i++) { */ 587 588 /* MatPivotRefine() */ 589 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 590 /* 591 * if no shift in this attempt & shifting & started shifting & can refine, 592 * then try lower shift 593 */ 594 sctx.shift_hi = sctx.shift_fraction; 595 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 596 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 597 sctx.newshift = PETSC_TRUE; 598 sctx.nshift++; 599 } 600 } while (sctx.newshift); 601 602 ierr = PetscFree(rtmp);CHKERRQ(ierr); 603 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 604 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 605 606 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 607 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 608 if (b->inode.size) { 609 C->ops->solve = MatSolve_SeqAIJ_Inode; 610 } else if (row_identity && col_identity) { 611 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 612 } else { 613 C->ops->solve = MatSolve_SeqAIJ; 614 } 615 C->ops->solveadd = MatSolveAdd_SeqAIJ; 616 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 617 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 618 C->ops->matsolve = MatMatSolve_SeqAIJ; 619 C->assembled = PETSC_TRUE; 620 C->preallocated = PETSC_TRUE; 621 622 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 623 624 /* MatShiftView(A,info,&sctx) */ 625 if (sctx.nshift) { 626 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 627 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 628 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 629 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 630 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 631 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 632 } 633 } 634 PetscFunctionReturn(0); 635 } 636 637 #undef __FUNCT__ 638 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 639 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 640 { 641 Mat C =B; 642 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 643 IS isrow = b->row,isicol = b->icol; 644 PetscErrorCode ierr; 645 const PetscInt *r,*ic,*ics; 646 PetscInt nz,row,i,j,n=A->rmap->n,diag; 647 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 648 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 649 MatScalar *pv,*rtmp,*pc,multiplier,d; 650 const MatScalar *v,*aa=a->a; 651 PetscReal rs=0.0; 652 FactorShiftCtx sctx; 653 const PetscInt *ddiag; 654 PetscBool row_identity, col_identity; 655 656 PetscFunctionBegin; 657 /* MatPivotSetUp(): initialize shift context sctx */ 658 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 659 660 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 661 ddiag = a->diag; 662 sctx.shift_top = info->zeropivot; 663 for (i=0; i<n; i++) { 664 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 665 d = (aa)[ddiag[i]]; 666 rs = -PetscAbsScalar(d) - PetscRealPart(d); 667 v = aa+ai[i]; 668 nz = ai[i+1] - ai[i]; 669 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 670 if (rs>sctx.shift_top) sctx.shift_top = rs; 671 } 672 sctx.shift_top *= 1.1; 673 sctx.nshift_max = 5; 674 sctx.shift_lo = 0.; 675 sctx.shift_hi = 1.; 676 } 677 678 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 679 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 680 ierr = PetscMalloc1((n+1),&rtmp);CHKERRQ(ierr); 681 ics = ic; 682 683 do { 684 sctx.newshift = PETSC_FALSE; 685 for (i=0; i<n; i++) { 686 nz = bi[i+1] - bi[i]; 687 bjtmp = bj + bi[i]; 688 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 689 690 /* load in initial (unfactored row) */ 691 nz = ai[r[i]+1] - ai[r[i]]; 692 ajtmp = aj + ai[r[i]]; 693 v = aa + ai[r[i]]; 694 for (j=0; j<nz; j++) { 695 rtmp[ics[ajtmp[j]]] = v[j]; 696 } 697 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 698 699 row = *bjtmp++; 700 while (row < i) { 701 pc = rtmp + row; 702 if (*pc != 0.0) { 703 pv = b->a + diag_offset[row]; 704 pj = b->j + diag_offset[row] + 1; 705 multiplier = *pc / *pv++; 706 *pc = multiplier; 707 nz = bi[row+1] - diag_offset[row] - 1; 708 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 709 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 710 } 711 row = *bjtmp++; 712 } 713 /* finished row so stick it into b->a */ 714 pv = b->a + bi[i]; 715 pj = b->j + bi[i]; 716 nz = bi[i+1] - bi[i]; 717 diag = diag_offset[i] - bi[i]; 718 rs = 0.0; 719 for (j=0; j<nz; j++) { 720 pv[j] = rtmp[pj[j]]; 721 rs += PetscAbsScalar(pv[j]); 722 } 723 rs -= PetscAbsScalar(pv[diag]); 724 725 sctx.rs = rs; 726 sctx.pv = pv[diag]; 727 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 728 if (sctx.newshift) break; 729 pv[diag] = sctx.pv; 730 } 731 732 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 733 /* 734 * if no shift in this attempt & shifting & started shifting & can refine, 735 * then try lower shift 736 */ 737 sctx.shift_hi = sctx.shift_fraction; 738 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 739 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 740 sctx.newshift = PETSC_TRUE; 741 sctx.nshift++; 742 } 743 } while (sctx.newshift); 744 745 /* invert diagonal entries for simplier triangular solves */ 746 for (i=0; i<n; i++) { 747 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 748 } 749 ierr = PetscFree(rtmp);CHKERRQ(ierr); 750 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 751 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 752 753 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 754 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 755 if (row_identity && col_identity) { 756 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 757 } else { 758 C->ops->solve = MatSolve_SeqAIJ_inplace; 759 } 760 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 761 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 762 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 763 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 764 765 C->assembled = PETSC_TRUE; 766 C->preallocated = PETSC_TRUE; 767 768 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 769 if (sctx.nshift) { 770 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 771 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 772 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 773 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 774 } 775 } 776 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 777 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 778 779 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 780 PetscFunctionReturn(0); 781 } 782 783 /* 784 This routine implements inplace ILU(0) with row or/and column permutations. 785 Input: 786 A - original matrix 787 Output; 788 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 789 a->j (col index) is permuted by the inverse of colperm, then sorted 790 a->a reordered accordingly with a->j 791 a->diag (ptr to diagonal elements) is updated. 792 */ 793 #undef __FUNCT__ 794 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 795 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 796 { 797 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 798 IS isrow = a->row,isicol = a->icol; 799 PetscErrorCode ierr; 800 const PetscInt *r,*ic,*ics; 801 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 802 PetscInt *ajtmp,nz,row; 803 PetscInt *diag = a->diag,nbdiag,*pj; 804 PetscScalar *rtmp,*pc,multiplier,d; 805 MatScalar *pv,*v; 806 PetscReal rs; 807 FactorShiftCtx sctx; 808 const MatScalar *aa=a->a,*vtmp; 809 810 PetscFunctionBegin; 811 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 812 813 /* MatPivotSetUp(): initialize shift context sctx */ 814 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 815 816 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 817 const PetscInt *ddiag = a->diag; 818 sctx.shift_top = info->zeropivot; 819 for (i=0; i<n; i++) { 820 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 821 d = (aa)[ddiag[i]]; 822 rs = -PetscAbsScalar(d) - PetscRealPart(d); 823 vtmp = aa+ai[i]; 824 nz = ai[i+1] - ai[i]; 825 for (j=0; j<nz; j++) rs += PetscAbsScalar(vtmp[j]); 826 if (rs>sctx.shift_top) sctx.shift_top = rs; 827 } 828 sctx.shift_top *= 1.1; 829 sctx.nshift_max = 5; 830 sctx.shift_lo = 0.; 831 sctx.shift_hi = 1.; 832 } 833 834 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 835 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 836 ierr = PetscMalloc1((n+1),&rtmp);CHKERRQ(ierr); 837 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 838 ics = ic; 839 840 #if defined(MV) 841 sctx.shift_top = 0.; 842 sctx.nshift_max = 0; 843 sctx.shift_lo = 0.; 844 sctx.shift_hi = 0.; 845 sctx.shift_fraction = 0.; 846 847 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 848 sctx.shift_top = 0.; 849 for (i=0; i<n; i++) { 850 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 851 d = (a->a)[diag[i]]; 852 rs = -PetscAbsScalar(d) - PetscRealPart(d); 853 v = a->a+ai[i]; 854 nz = ai[i+1] - ai[i]; 855 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 856 if (rs>sctx.shift_top) sctx.shift_top = rs; 857 } 858 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 859 sctx.shift_top *= 1.1; 860 sctx.nshift_max = 5; 861 sctx.shift_lo = 0.; 862 sctx.shift_hi = 1.; 863 } 864 865 sctx.shift_amount = 0.; 866 sctx.nshift = 0; 867 #endif 868 869 do { 870 sctx.newshift = PETSC_FALSE; 871 for (i=0; i<n; i++) { 872 /* load in initial unfactored row */ 873 nz = ai[r[i]+1] - ai[r[i]]; 874 ajtmp = aj + ai[r[i]]; 875 v = a->a + ai[r[i]]; 876 /* sort permuted ajtmp and values v accordingly */ 877 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 878 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 879 880 diag[r[i]] = ai[r[i]]; 881 for (j=0; j<nz; j++) { 882 rtmp[ajtmp[j]] = v[j]; 883 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 884 } 885 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 886 887 row = *ajtmp++; 888 while (row < i) { 889 pc = rtmp + row; 890 if (*pc != 0.0) { 891 pv = a->a + diag[r[row]]; 892 pj = aj + diag[r[row]] + 1; 893 894 multiplier = *pc / *pv++; 895 *pc = multiplier; 896 nz = ai[r[row]+1] - diag[r[row]] - 1; 897 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 898 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 899 } 900 row = *ajtmp++; 901 } 902 /* finished row so overwrite it onto a->a */ 903 pv = a->a + ai[r[i]]; 904 pj = aj + ai[r[i]]; 905 nz = ai[r[i]+1] - ai[r[i]]; 906 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 907 908 rs = 0.0; 909 for (j=0; j<nz; j++) { 910 pv[j] = rtmp[pj[j]]; 911 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 912 } 913 914 sctx.rs = rs; 915 sctx.pv = pv[nbdiag]; 916 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 917 if (sctx.newshift) break; 918 pv[nbdiag] = sctx.pv; 919 } 920 921 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 922 /* 923 * if no shift in this attempt & shifting & started shifting & can refine, 924 * then try lower shift 925 */ 926 sctx.shift_hi = sctx.shift_fraction; 927 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 928 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 929 sctx.newshift = PETSC_TRUE; 930 sctx.nshift++; 931 } 932 } while (sctx.newshift); 933 934 /* invert diagonal entries for simplier triangular solves */ 935 for (i=0; i<n; i++) { 936 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 937 } 938 939 ierr = PetscFree(rtmp);CHKERRQ(ierr); 940 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 941 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 942 943 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 944 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 945 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 946 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 947 948 A->assembled = PETSC_TRUE; 949 A->preallocated = PETSC_TRUE; 950 951 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 952 if (sctx.nshift) { 953 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 954 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 955 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 956 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 957 } 958 } 959 PetscFunctionReturn(0); 960 } 961 962 /* ----------------------------------------------------------- */ 963 #undef __FUNCT__ 964 #define __FUNCT__ "MatLUFactor_SeqAIJ" 965 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 966 { 967 PetscErrorCode ierr; 968 Mat C; 969 970 PetscFunctionBegin; 971 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 972 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 973 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 974 975 A->ops->solve = C->ops->solve; 976 A->ops->solvetranspose = C->ops->solvetranspose; 977 978 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 979 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 980 PetscFunctionReturn(0); 981 } 982 /* ----------------------------------------------------------- */ 983 984 985 #undef __FUNCT__ 986 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 987 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 988 { 989 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 990 IS iscol = a->col,isrow = a->row; 991 PetscErrorCode ierr; 992 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 993 PetscInt nz; 994 const PetscInt *rout,*cout,*r,*c; 995 PetscScalar *x,*tmp,*tmps,sum; 996 const PetscScalar *b; 997 const MatScalar *aa = a->a,*v; 998 999 PetscFunctionBegin; 1000 if (!n) PetscFunctionReturn(0); 1001 1002 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1003 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1004 tmp = a->solve_work; 1005 1006 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1007 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1008 1009 /* forward solve the lower triangular */ 1010 tmp[0] = b[*r++]; 1011 tmps = tmp; 1012 for (i=1; i<n; i++) { 1013 v = aa + ai[i]; 1014 vi = aj + ai[i]; 1015 nz = a->diag[i] - ai[i]; 1016 sum = b[*r++]; 1017 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1018 tmp[i] = sum; 1019 } 1020 1021 /* backward solve the upper triangular */ 1022 for (i=n-1; i>=0; i--) { 1023 v = aa + a->diag[i] + 1; 1024 vi = aj + a->diag[i] + 1; 1025 nz = ai[i+1] - a->diag[i] - 1; 1026 sum = tmp[i]; 1027 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1028 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1029 } 1030 1031 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1032 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1033 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1034 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1035 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1041 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1042 { 1043 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1044 IS iscol = a->col,isrow = a->row; 1045 PetscErrorCode ierr; 1046 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1047 PetscInt nz,neq; 1048 const PetscInt *rout,*cout,*r,*c; 1049 PetscScalar *x,*b,*tmp,*tmps,sum; 1050 const MatScalar *aa = a->a,*v; 1051 PetscBool bisdense,xisdense; 1052 1053 PetscFunctionBegin; 1054 if (!n) PetscFunctionReturn(0); 1055 1056 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1057 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1058 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1059 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1060 1061 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1062 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1063 1064 tmp = a->solve_work; 1065 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1066 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1067 1068 for (neq=0; neq<B->cmap->n; neq++) { 1069 /* forward solve the lower triangular */ 1070 tmp[0] = b[r[0]]; 1071 tmps = tmp; 1072 for (i=1; i<n; i++) { 1073 v = aa + ai[i]; 1074 vi = aj + ai[i]; 1075 nz = a->diag[i] - ai[i]; 1076 sum = b[r[i]]; 1077 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1078 tmp[i] = sum; 1079 } 1080 /* backward solve the upper triangular */ 1081 for (i=n-1; i>=0; i--) { 1082 v = aa + a->diag[i] + 1; 1083 vi = aj + a->diag[i] + 1; 1084 nz = ai[i+1] - a->diag[i] - 1; 1085 sum = tmp[i]; 1086 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1087 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1088 } 1089 1090 b += n; 1091 x += n; 1092 } 1093 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1094 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1095 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1096 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1097 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1098 PetscFunctionReturn(0); 1099 } 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1103 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1104 { 1105 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1106 IS iscol = a->col,isrow = a->row; 1107 PetscErrorCode ierr; 1108 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1109 PetscInt nz,neq; 1110 const PetscInt *rout,*cout,*r,*c; 1111 PetscScalar *x,*b,*tmp,sum; 1112 const MatScalar *aa = a->a,*v; 1113 PetscBool bisdense,xisdense; 1114 1115 PetscFunctionBegin; 1116 if (!n) PetscFunctionReturn(0); 1117 1118 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1119 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1120 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1121 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1122 1123 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1124 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1125 1126 tmp = a->solve_work; 1127 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1128 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1129 1130 for (neq=0; neq<B->cmap->n; neq++) { 1131 /* forward solve the lower triangular */ 1132 tmp[0] = b[r[0]]; 1133 v = aa; 1134 vi = aj; 1135 for (i=1; i<n; i++) { 1136 nz = ai[i+1] - ai[i]; 1137 sum = b[r[i]]; 1138 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1139 tmp[i] = sum; 1140 v += nz; vi += nz; 1141 } 1142 1143 /* backward solve the upper triangular */ 1144 for (i=n-1; i>=0; i--) { 1145 v = aa + adiag[i+1]+1; 1146 vi = aj + adiag[i+1]+1; 1147 nz = adiag[i]-adiag[i+1]-1; 1148 sum = tmp[i]; 1149 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1150 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1151 } 1152 1153 b += n; 1154 x += n; 1155 } 1156 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1157 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1158 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1159 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1160 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 #undef __FUNCT__ 1165 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1166 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1167 { 1168 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1169 IS iscol = a->col,isrow = a->row; 1170 PetscErrorCode ierr; 1171 const PetscInt *r,*c,*rout,*cout; 1172 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1173 PetscInt nz,row; 1174 PetscScalar *x,*b,*tmp,*tmps,sum; 1175 const MatScalar *aa = a->a,*v; 1176 1177 PetscFunctionBegin; 1178 if (!n) PetscFunctionReturn(0); 1179 1180 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1181 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1182 tmp = a->solve_work; 1183 1184 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1185 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1186 1187 /* forward solve the lower triangular */ 1188 tmp[0] = b[*r++]; 1189 tmps = tmp; 1190 for (row=1; row<n; row++) { 1191 i = rout[row]; /* permuted row */ 1192 v = aa + ai[i]; 1193 vi = aj + ai[i]; 1194 nz = a->diag[i] - ai[i]; 1195 sum = b[*r++]; 1196 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1197 tmp[row] = sum; 1198 } 1199 1200 /* backward solve the upper triangular */ 1201 for (row=n-1; row>=0; row--) { 1202 i = rout[row]; /* permuted row */ 1203 v = aa + a->diag[i] + 1; 1204 vi = aj + a->diag[i] + 1; 1205 nz = ai[i+1] - a->diag[i] - 1; 1206 sum = tmp[row]; 1207 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1208 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1209 } 1210 1211 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1212 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1213 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1215 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1216 PetscFunctionReturn(0); 1217 } 1218 1219 /* ----------------------------------------------------------- */ 1220 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1221 #undef __FUNCT__ 1222 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1223 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1224 { 1225 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1226 PetscErrorCode ierr; 1227 PetscInt n = A->rmap->n; 1228 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1229 PetscScalar *x; 1230 const PetscScalar *b; 1231 const MatScalar *aa = a->a; 1232 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1233 PetscInt adiag_i,i,nz,ai_i; 1234 const PetscInt *vi; 1235 const MatScalar *v; 1236 PetscScalar sum; 1237 #endif 1238 1239 PetscFunctionBegin; 1240 if (!n) PetscFunctionReturn(0); 1241 1242 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1243 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1244 1245 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1246 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1247 #else 1248 /* forward solve the lower triangular */ 1249 x[0] = b[0]; 1250 for (i=1; i<n; i++) { 1251 ai_i = ai[i]; 1252 v = aa + ai_i; 1253 vi = aj + ai_i; 1254 nz = adiag[i] - ai_i; 1255 sum = b[i]; 1256 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1257 x[i] = sum; 1258 } 1259 1260 /* backward solve the upper triangular */ 1261 for (i=n-1; i>=0; i--) { 1262 adiag_i = adiag[i]; 1263 v = aa + adiag_i + 1; 1264 vi = aj + adiag_i + 1; 1265 nz = ai[i+1] - adiag_i - 1; 1266 sum = x[i]; 1267 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1268 x[i] = sum*aa[adiag_i]; 1269 } 1270 #endif 1271 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1272 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1273 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1274 PetscFunctionReturn(0); 1275 } 1276 1277 #undef __FUNCT__ 1278 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1279 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1280 { 1281 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1282 IS iscol = a->col,isrow = a->row; 1283 PetscErrorCode ierr; 1284 PetscInt i, n = A->rmap->n,j; 1285 PetscInt nz; 1286 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1287 PetscScalar *x,*tmp,sum; 1288 const PetscScalar *b; 1289 const MatScalar *aa = a->a,*v; 1290 1291 PetscFunctionBegin; 1292 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1293 1294 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1295 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1296 tmp = a->solve_work; 1297 1298 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1299 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1300 1301 /* forward solve the lower triangular */ 1302 tmp[0] = b[*r++]; 1303 for (i=1; i<n; i++) { 1304 v = aa + ai[i]; 1305 vi = aj + ai[i]; 1306 nz = a->diag[i] - ai[i]; 1307 sum = b[*r++]; 1308 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1309 tmp[i] = sum; 1310 } 1311 1312 /* backward solve the upper triangular */ 1313 for (i=n-1; i>=0; i--) { 1314 v = aa + a->diag[i] + 1; 1315 vi = aj + a->diag[i] + 1; 1316 nz = ai[i+1] - a->diag[i] - 1; 1317 sum = tmp[i]; 1318 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1319 tmp[i] = sum*aa[a->diag[i]]; 1320 x[*c--] += tmp[i]; 1321 } 1322 1323 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1324 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1325 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1326 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1327 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1328 PetscFunctionReturn(0); 1329 } 1330 1331 #undef __FUNCT__ 1332 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1333 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1334 { 1335 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1336 IS iscol = a->col,isrow = a->row; 1337 PetscErrorCode ierr; 1338 PetscInt i, n = A->rmap->n,j; 1339 PetscInt nz; 1340 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1341 PetscScalar *x,*tmp,sum; 1342 const PetscScalar *b; 1343 const MatScalar *aa = a->a,*v; 1344 1345 PetscFunctionBegin; 1346 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1347 1348 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1349 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1350 tmp = a->solve_work; 1351 1352 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1353 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1354 1355 /* forward solve the lower triangular */ 1356 tmp[0] = b[r[0]]; 1357 v = aa; 1358 vi = aj; 1359 for (i=1; i<n; i++) { 1360 nz = ai[i+1] - ai[i]; 1361 sum = b[r[i]]; 1362 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1363 tmp[i] = sum; 1364 v += nz; 1365 vi += nz; 1366 } 1367 1368 /* backward solve the upper triangular */ 1369 v = aa + adiag[n-1]; 1370 vi = aj + adiag[n-1]; 1371 for (i=n-1; i>=0; i--) { 1372 nz = adiag[i] - adiag[i+1] - 1; 1373 sum = tmp[i]; 1374 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1375 tmp[i] = sum*v[nz]; 1376 x[c[i]] += tmp[i]; 1377 v += nz+1; vi += nz+1; 1378 } 1379 1380 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1381 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1382 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1383 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1384 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1385 PetscFunctionReturn(0); 1386 } 1387 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1390 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1391 { 1392 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1393 IS iscol = a->col,isrow = a->row; 1394 PetscErrorCode ierr; 1395 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1396 PetscInt i,n = A->rmap->n,j; 1397 PetscInt nz; 1398 PetscScalar *x,*tmp,s1; 1399 const MatScalar *aa = a->a,*v; 1400 const PetscScalar *b; 1401 1402 PetscFunctionBegin; 1403 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1404 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1405 tmp = a->solve_work; 1406 1407 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1408 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1409 1410 /* copy the b into temp work space according to permutation */ 1411 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1412 1413 /* forward solve the U^T */ 1414 for (i=0; i<n; i++) { 1415 v = aa + diag[i]; 1416 vi = aj + diag[i] + 1; 1417 nz = ai[i+1] - diag[i] - 1; 1418 s1 = tmp[i]; 1419 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1420 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1421 tmp[i] = s1; 1422 } 1423 1424 /* backward solve the L^T */ 1425 for (i=n-1; i>=0; i--) { 1426 v = aa + diag[i] - 1; 1427 vi = aj + diag[i] - 1; 1428 nz = diag[i] - ai[i]; 1429 s1 = tmp[i]; 1430 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1431 } 1432 1433 /* copy tmp into x according to permutation */ 1434 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1435 1436 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1437 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1438 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1439 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1440 1441 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1442 PetscFunctionReturn(0); 1443 } 1444 1445 #undef __FUNCT__ 1446 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1447 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1448 { 1449 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1450 IS iscol = a->col,isrow = a->row; 1451 PetscErrorCode ierr; 1452 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1453 PetscInt i,n = A->rmap->n,j; 1454 PetscInt nz; 1455 PetscScalar *x,*tmp,s1; 1456 const MatScalar *aa = a->a,*v; 1457 const PetscScalar *b; 1458 1459 PetscFunctionBegin; 1460 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1461 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1462 tmp = a->solve_work; 1463 1464 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1465 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1466 1467 /* copy the b into temp work space according to permutation */ 1468 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1469 1470 /* forward solve the U^T */ 1471 for (i=0; i<n; i++) { 1472 v = aa + adiag[i+1] + 1; 1473 vi = aj + adiag[i+1] + 1; 1474 nz = adiag[i] - adiag[i+1] - 1; 1475 s1 = tmp[i]; 1476 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1477 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1478 tmp[i] = s1; 1479 } 1480 1481 /* backward solve the L^T */ 1482 for (i=n-1; i>=0; i--) { 1483 v = aa + ai[i]; 1484 vi = aj + ai[i]; 1485 nz = ai[i+1] - ai[i]; 1486 s1 = tmp[i]; 1487 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1488 } 1489 1490 /* copy tmp into x according to permutation */ 1491 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1492 1493 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1494 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1495 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1496 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1497 1498 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } 1501 1502 #undef __FUNCT__ 1503 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1504 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1505 { 1506 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1507 IS iscol = a->col,isrow = a->row; 1508 PetscErrorCode ierr; 1509 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1510 PetscInt i,n = A->rmap->n,j; 1511 PetscInt nz; 1512 PetscScalar *x,*tmp,s1; 1513 const MatScalar *aa = a->a,*v; 1514 const PetscScalar *b; 1515 1516 PetscFunctionBegin; 1517 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1518 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1519 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1520 tmp = a->solve_work; 1521 1522 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1523 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1524 1525 /* copy the b into temp work space according to permutation */ 1526 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1527 1528 /* forward solve the U^T */ 1529 for (i=0; i<n; i++) { 1530 v = aa + diag[i]; 1531 vi = aj + diag[i] + 1; 1532 nz = ai[i+1] - diag[i] - 1; 1533 s1 = tmp[i]; 1534 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1535 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1536 tmp[i] = s1; 1537 } 1538 1539 /* backward solve the L^T */ 1540 for (i=n-1; i>=0; i--) { 1541 v = aa + diag[i] - 1; 1542 vi = aj + diag[i] - 1; 1543 nz = diag[i] - ai[i]; 1544 s1 = tmp[i]; 1545 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1546 } 1547 1548 /* copy tmp into x according to permutation */ 1549 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1550 1551 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1552 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1553 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1554 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1555 1556 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1557 PetscFunctionReturn(0); 1558 } 1559 1560 #undef __FUNCT__ 1561 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1562 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1563 { 1564 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1565 IS iscol = a->col,isrow = a->row; 1566 PetscErrorCode ierr; 1567 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1568 PetscInt i,n = A->rmap->n,j; 1569 PetscInt nz; 1570 PetscScalar *x,*tmp,s1; 1571 const MatScalar *aa = a->a,*v; 1572 const PetscScalar *b; 1573 1574 PetscFunctionBegin; 1575 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1576 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1577 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1578 tmp = a->solve_work; 1579 1580 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1581 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1582 1583 /* copy the b into temp work space according to permutation */ 1584 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1585 1586 /* forward solve the U^T */ 1587 for (i=0; i<n; i++) { 1588 v = aa + adiag[i+1] + 1; 1589 vi = aj + adiag[i+1] + 1; 1590 nz = adiag[i] - adiag[i+1] - 1; 1591 s1 = tmp[i]; 1592 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1593 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1594 tmp[i] = s1; 1595 } 1596 1597 1598 /* backward solve the L^T */ 1599 for (i=n-1; i>=0; i--) { 1600 v = aa + ai[i]; 1601 vi = aj + ai[i]; 1602 nz = ai[i+1] - ai[i]; 1603 s1 = tmp[i]; 1604 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1605 } 1606 1607 /* copy tmp into x according to permutation */ 1608 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1609 1610 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1611 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1612 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1613 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1614 1615 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 /* ----------------------------------------------------------------*/ 1620 1621 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool); 1622 1623 /* 1624 ilu() under revised new data structure. 1625 Factored arrays bj and ba are stored as 1626 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1627 1628 bi=fact->i is an array of size n+1, in which 1629 bi+ 1630 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1631 bi[n]: points to L(n-1,n-1)+1 1632 1633 bdiag=fact->diag is an array of size n+1,in which 1634 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1635 bdiag[n]: points to entry of U(n-1,0)-1 1636 1637 U(i,:) contains bdiag[i] as its last entry, i.e., 1638 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1639 */ 1640 #undef __FUNCT__ 1641 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1642 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1643 { 1644 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1645 PetscErrorCode ierr; 1646 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1647 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1648 IS isicol; 1649 1650 PetscFunctionBegin; 1651 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1652 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1653 b = (Mat_SeqAIJ*)(fact)->data; 1654 1655 /* allocate matrix arrays for new data structure */ 1656 ierr = PetscMalloc3(ai[n]+1,&b->a,ai[n]+1,&b->j,n+1,&b->i);CHKERRQ(ierr); 1657 ierr = PetscLogObjectMemory((PetscObject)fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1658 1659 b->singlemalloc = PETSC_TRUE; 1660 if (!b->diag) { 1661 ierr = PetscMalloc1((n+1),&b->diag);CHKERRQ(ierr); 1662 ierr = PetscLogObjectMemory((PetscObject)fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1663 } 1664 bdiag = b->diag; 1665 1666 if (n > 0) { 1667 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1668 } 1669 1670 /* set bi and bj with new data structure */ 1671 bi = b->i; 1672 bj = b->j; 1673 1674 /* L part */ 1675 bi[0] = 0; 1676 for (i=0; i<n; i++) { 1677 nz = adiag[i] - ai[i]; 1678 bi[i+1] = bi[i] + nz; 1679 aj = a->j + ai[i]; 1680 for (j=0; j<nz; j++) { 1681 /* *bj = aj[j]; bj++; */ 1682 bj[k++] = aj[j]; 1683 } 1684 } 1685 1686 /* U part */ 1687 bdiag[n] = bi[n]-1; 1688 for (i=n-1; i>=0; i--) { 1689 nz = ai[i+1] - adiag[i] - 1; 1690 aj = a->j + adiag[i] + 1; 1691 for (j=0; j<nz; j++) { 1692 /* *bj = aj[j]; bj++; */ 1693 bj[k++] = aj[j]; 1694 } 1695 /* diag[i] */ 1696 /* *bj = i; bj++; */ 1697 bj[k++] = i; 1698 bdiag[i] = bdiag[i+1] + nz + 1; 1699 } 1700 1701 fact->factortype = MAT_FACTOR_ILU; 1702 fact->info.factor_mallocs = 0; 1703 fact->info.fill_ratio_given = info->fill; 1704 fact->info.fill_ratio_needed = 1.0; 1705 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1706 ierr = Mat_CheckInode_FactorLU(fact);CHKERRQ(ierr); 1707 1708 b = (Mat_SeqAIJ*)(fact)->data; 1709 b->row = isrow; 1710 b->col = iscol; 1711 b->icol = isicol; 1712 ierr = PetscMalloc1((fact->rmap->n+1),&b->solve_work);CHKERRQ(ierr); 1713 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1714 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1715 PetscFunctionReturn(0); 1716 } 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1720 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1721 { 1722 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1723 IS isicol; 1724 PetscErrorCode ierr; 1725 const PetscInt *r,*ic; 1726 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1727 PetscInt *bi,*cols,nnz,*cols_lvl; 1728 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1729 PetscInt i,levels,diagonal_fill; 1730 PetscBool col_identity,row_identity,missing; 1731 PetscReal f; 1732 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1733 PetscBT lnkbt; 1734 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1735 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1736 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1737 1738 PetscFunctionBegin; 1739 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1740 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1741 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1742 1743 levels = (PetscInt)info->levels; 1744 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1745 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1746 if (!levels && row_identity && col_identity) { 1747 /* special case: ilu(0) with natural ordering */ 1748 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1749 if (a->inode.size) { 1750 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1751 } 1752 PetscFunctionReturn(0); 1753 } 1754 1755 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1756 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1757 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1758 1759 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1760 ierr = PetscMalloc1((n+1),&bi);CHKERRQ(ierr); 1761 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); 1762 bi[0] = bdiag[0] = 0; 1763 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1764 1765 /* create a linked list for storing column indices of the active row */ 1766 nlnk = n + 1; 1767 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1768 1769 /* initial FreeSpace size is f*(ai[n]+1) */ 1770 f = info->fill; 1771 diagonal_fill = (PetscInt)info->diagonal_fill; 1772 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1773 current_space = free_space; 1774 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1775 current_space_lvl = free_space_lvl; 1776 for (i=0; i<n; i++) { 1777 nzi = 0; 1778 /* copy current row into linked list */ 1779 nnz = ai[r[i]+1] - ai[r[i]]; 1780 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1781 cols = aj + ai[r[i]]; 1782 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1783 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1784 nzi += nlnk; 1785 1786 /* make sure diagonal entry is included */ 1787 if (diagonal_fill && lnk[i] == -1) { 1788 fm = n; 1789 while (lnk[fm] < i) fm = lnk[fm]; 1790 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1791 lnk[fm] = i; 1792 lnk_lvl[i] = 0; 1793 nzi++; dcount++; 1794 } 1795 1796 /* add pivot rows into the active row */ 1797 nzbd = 0; 1798 prow = lnk[n]; 1799 while (prow < i) { 1800 nnz = bdiag[prow]; 1801 cols = bj_ptr[prow] + nnz + 1; 1802 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1803 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1804 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1805 nzi += nlnk; 1806 prow = lnk[prow]; 1807 nzbd++; 1808 } 1809 bdiag[i] = nzbd; 1810 bi[i+1] = bi[i] + nzi; 1811 /* if free space is not available, make more free space */ 1812 if (current_space->local_remaining<nzi) { 1813 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1814 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1815 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1816 reallocs++; 1817 } 1818 1819 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1820 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1821 bj_ptr[i] = current_space->array; 1822 bjlvl_ptr[i] = current_space_lvl->array; 1823 1824 /* make sure the active row i has diagonal entry */ 1825 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1826 1827 current_space->array += nzi; 1828 current_space->local_used += nzi; 1829 current_space->local_remaining -= nzi; 1830 current_space_lvl->array += nzi; 1831 current_space_lvl->local_used += nzi; 1832 current_space_lvl->local_remaining -= nzi; 1833 } 1834 1835 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1836 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1837 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1838 ierr = PetscMalloc1((bi[n]+1),&bj);CHKERRQ(ierr); 1839 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1840 1841 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1842 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1843 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1844 1845 #if defined(PETSC_USE_INFO) 1846 { 1847 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1848 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 1849 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 1850 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 1851 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1852 if (diagonal_fill) { 1853 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1854 } 1855 } 1856 #endif 1857 /* put together the new matrix */ 1858 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 1859 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 1860 b = (Mat_SeqAIJ*)(fact)->data; 1861 1862 b->free_a = PETSC_TRUE; 1863 b->free_ij = PETSC_TRUE; 1864 b->singlemalloc = PETSC_FALSE; 1865 1866 ierr = PetscMalloc1((bdiag[0]+1),&b->a);CHKERRQ(ierr); 1867 1868 b->j = bj; 1869 b->i = bi; 1870 b->diag = bdiag; 1871 b->ilen = 0; 1872 b->imax = 0; 1873 b->row = isrow; 1874 b->col = iscol; 1875 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1876 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1877 b->icol = isicol; 1878 1879 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 1880 /* In b structure: Free imax, ilen, old a, old j. 1881 Allocate bdiag, solve_work, new a, new j */ 1882 ierr = PetscLogObjectMemory((PetscObject)fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1883 b->maxnz = b->nz = bdiag[0]+1; 1884 1885 (fact)->info.factor_mallocs = reallocs; 1886 (fact)->info.fill_ratio_given = f; 1887 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1888 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1889 if (a->inode.size) { 1890 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1891 } 1892 ierr = Mat_CheckInode_FactorLU(fact);CHKERRQ(ierr); 1893 PetscFunctionReturn(0); 1894 } 1895 1896 #undef __FUNCT__ 1897 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1898 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1899 { 1900 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1901 IS isicol; 1902 PetscErrorCode ierr; 1903 const PetscInt *r,*ic; 1904 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1905 PetscInt *bi,*cols,nnz,*cols_lvl; 1906 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1907 PetscInt i,levels,diagonal_fill; 1908 PetscBool col_identity,row_identity; 1909 PetscReal f; 1910 PetscInt nlnk,*lnk,*lnk_lvl=NULL; 1911 PetscBT lnkbt; 1912 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1913 PetscFreeSpaceList free_space =NULL,current_space=NULL; 1914 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 1915 PetscBool missing; 1916 1917 PetscFunctionBegin; 1918 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1919 f = info->fill; 1920 levels = (PetscInt)info->levels; 1921 diagonal_fill = (PetscInt)info->diagonal_fill; 1922 1923 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1924 1925 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1926 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1927 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1928 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1929 1930 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1931 if (a->inode.size) { 1932 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1933 } 1934 fact->factortype = MAT_FACTOR_ILU; 1935 (fact)->info.factor_mallocs = 0; 1936 (fact)->info.fill_ratio_given = info->fill; 1937 (fact)->info.fill_ratio_needed = 1.0; 1938 1939 b = (Mat_SeqAIJ*)(fact)->data; 1940 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1941 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1942 b->row = isrow; 1943 b->col = iscol; 1944 b->icol = isicol; 1945 ierr = PetscMalloc1(((fact)->rmap->n+1),&b->solve_work);CHKERRQ(ierr); 1946 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1947 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1948 PetscFunctionReturn(0); 1949 } 1950 1951 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1952 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1953 1954 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1955 ierr = PetscMalloc1((n+1),&bi);CHKERRQ(ierr); 1956 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); 1957 bi[0] = bdiag[0] = 0; 1958 1959 ierr = PetscMalloc2(n,&bj_ptr,n,&bjlvl_ptr);CHKERRQ(ierr); 1960 1961 /* create a linked list for storing column indices of the active row */ 1962 nlnk = n + 1; 1963 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1964 1965 /* initial FreeSpace size is f*(ai[n]+1) */ 1966 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1967 current_space = free_space; 1968 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1969 current_space_lvl = free_space_lvl; 1970 1971 for (i=0; i<n; i++) { 1972 nzi = 0; 1973 /* copy current row into linked list */ 1974 nnz = ai[r[i]+1] - ai[r[i]]; 1975 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1976 cols = aj + ai[r[i]]; 1977 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1978 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1979 nzi += nlnk; 1980 1981 /* make sure diagonal entry is included */ 1982 if (diagonal_fill && lnk[i] == -1) { 1983 fm = n; 1984 while (lnk[fm] < i) fm = lnk[fm]; 1985 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1986 lnk[fm] = i; 1987 lnk_lvl[i] = 0; 1988 nzi++; dcount++; 1989 } 1990 1991 /* add pivot rows into the active row */ 1992 nzbd = 0; 1993 prow = lnk[n]; 1994 while (prow < i) { 1995 nnz = bdiag[prow]; 1996 cols = bj_ptr[prow] + nnz + 1; 1997 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1998 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1999 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 2000 nzi += nlnk; 2001 prow = lnk[prow]; 2002 nzbd++; 2003 } 2004 bdiag[i] = nzbd; 2005 bi[i+1] = bi[i] + nzi; 2006 2007 /* if free space is not available, make more free space */ 2008 if (current_space->local_remaining<nzi) { 2009 nnz = nzi*(n - i); /* estimated and max additional space needed */ 2010 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2011 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2012 reallocs++; 2013 } 2014 2015 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2016 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2017 bj_ptr[i] = current_space->array; 2018 bjlvl_ptr[i] = current_space_lvl->array; 2019 2020 /* make sure the active row i has diagonal entry */ 2021 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2022 2023 current_space->array += nzi; 2024 current_space->local_used += nzi; 2025 current_space->local_remaining -= nzi; 2026 current_space_lvl->array += nzi; 2027 current_space_lvl->local_used += nzi; 2028 current_space_lvl->local_remaining -= nzi; 2029 } 2030 2031 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2032 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2033 2034 /* destroy list of free space and other temporary arrays */ 2035 ierr = PetscMalloc1((bi[n]+1),&bj);CHKERRQ(ierr); 2036 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2037 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2038 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2039 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2040 2041 #if defined(PETSC_USE_INFO) 2042 { 2043 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2044 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr); 2045 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2046 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%g);\n",(double)af);CHKERRQ(ierr); 2047 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2048 if (diagonal_fill) { 2049 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2050 } 2051 } 2052 #endif 2053 2054 /* put together the new matrix */ 2055 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 2056 ierr = PetscLogObjectParent((PetscObject)fact,(PetscObject)isicol);CHKERRQ(ierr); 2057 b = (Mat_SeqAIJ*)(fact)->data; 2058 2059 b->free_a = PETSC_TRUE; 2060 b->free_ij = PETSC_TRUE; 2061 b->singlemalloc = PETSC_FALSE; 2062 2063 ierr = PetscMalloc1(bi[n],&b->a);CHKERRQ(ierr); 2064 b->j = bj; 2065 b->i = bi; 2066 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2067 b->diag = bdiag; 2068 b->ilen = 0; 2069 b->imax = 0; 2070 b->row = isrow; 2071 b->col = iscol; 2072 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2073 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2074 b->icol = isicol; 2075 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 2076 /* In b structure: Free imax, ilen, old a, old j. 2077 Allocate bdiag, solve_work, new a, new j */ 2078 ierr = PetscLogObjectMemory((PetscObject)fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2079 b->maxnz = b->nz = bi[n]; 2080 2081 (fact)->info.factor_mallocs = reallocs; 2082 (fact)->info.fill_ratio_given = f; 2083 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2084 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2085 if (a->inode.size) { 2086 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2087 } 2088 PetscFunctionReturn(0); 2089 } 2090 2091 #undef __FUNCT__ 2092 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2093 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2094 { 2095 Mat C = B; 2096 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2097 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2098 IS ip=b->row,iip = b->icol; 2099 PetscErrorCode ierr; 2100 const PetscInt *rip,*riip; 2101 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2102 PetscInt *ai=a->i,*aj=a->j; 2103 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2104 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2105 PetscBool perm_identity; 2106 FactorShiftCtx sctx; 2107 PetscReal rs; 2108 MatScalar d,*v; 2109 2110 PetscFunctionBegin; 2111 /* MatPivotSetUp(): initialize shift context sctx */ 2112 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2113 2114 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2115 sctx.shift_top = info->zeropivot; 2116 for (i=0; i<mbs; i++) { 2117 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2118 d = (aa)[a->diag[i]]; 2119 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2120 v = aa+ai[i]; 2121 nz = ai[i+1] - ai[i]; 2122 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2123 if (rs>sctx.shift_top) sctx.shift_top = rs; 2124 } 2125 sctx.shift_top *= 1.1; 2126 sctx.nshift_max = 5; 2127 sctx.shift_lo = 0.; 2128 sctx.shift_hi = 1.; 2129 } 2130 2131 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2132 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2133 2134 /* allocate working arrays 2135 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2136 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2137 */ 2138 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);CHKERRQ(ierr); 2139 2140 do { 2141 sctx.newshift = PETSC_FALSE; 2142 2143 for (i=0; i<mbs; i++) c2r[i] = mbs; 2144 if (mbs) il[0] = 0; 2145 2146 for (k = 0; k<mbs; k++) { 2147 /* zero rtmp */ 2148 nz = bi[k+1] - bi[k]; 2149 bjtmp = bj + bi[k]; 2150 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2151 2152 /* load in initial unfactored row */ 2153 bval = ba + bi[k]; 2154 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2155 for (j = jmin; j < jmax; j++) { 2156 col = riip[aj[j]]; 2157 if (col >= k) { /* only take upper triangular entry */ 2158 rtmp[col] = aa[j]; 2159 *bval++ = 0.0; /* for in-place factorization */ 2160 } 2161 } 2162 /* shift the diagonal of the matrix: ZeropivotApply() */ 2163 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2164 2165 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2166 dk = rtmp[k]; 2167 i = c2r[k]; /* first row to be added to k_th row */ 2168 2169 while (i < k) { 2170 nexti = c2r[i]; /* next row to be added to k_th row */ 2171 2172 /* compute multiplier, update diag(k) and U(i,k) */ 2173 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2174 uikdi = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2175 dk += uikdi*ba[ili]; /* update diag[k] */ 2176 ba[ili] = uikdi; /* -U(i,k) */ 2177 2178 /* add multiple of row i to k-th row */ 2179 jmin = ili + 1; jmax = bi[i+1]; 2180 if (jmin < jmax) { 2181 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2182 /* update il and c2r for row i */ 2183 il[i] = jmin; 2184 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2185 } 2186 i = nexti; 2187 } 2188 2189 /* copy data into U(k,:) */ 2190 rs = 0.0; 2191 jmin = bi[k]; jmax = bi[k+1]-1; 2192 if (jmin < jmax) { 2193 for (j=jmin; j<jmax; j++) { 2194 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2195 } 2196 /* add the k-th row into il and c2r */ 2197 il[k] = jmin; 2198 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2199 } 2200 2201 /* MatPivotCheck() */ 2202 sctx.rs = rs; 2203 sctx.pv = dk; 2204 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2205 if (sctx.newshift) break; 2206 dk = sctx.pv; 2207 2208 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2209 } 2210 } while (sctx.newshift); 2211 2212 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2213 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2214 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2215 2216 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2217 if (perm_identity) { 2218 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2219 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2220 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2221 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2222 } else { 2223 B->ops->solve = MatSolve_SeqSBAIJ_1; 2224 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2225 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2226 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2227 } 2228 2229 C->assembled = PETSC_TRUE; 2230 C->preallocated = PETSC_TRUE; 2231 2232 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2233 2234 /* MatPivotView() */ 2235 if (sctx.nshift) { 2236 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2237 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);CHKERRQ(ierr); 2238 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2239 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2240 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2241 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);CHKERRQ(ierr); 2242 } 2243 } 2244 PetscFunctionReturn(0); 2245 } 2246 2247 #undef __FUNCT__ 2248 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2249 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2250 { 2251 Mat C = B; 2252 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2253 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2254 IS ip=b->row,iip = b->icol; 2255 PetscErrorCode ierr; 2256 const PetscInt *rip,*riip; 2257 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2258 PetscInt *ai=a->i,*aj=a->j; 2259 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2260 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2261 PetscBool perm_identity; 2262 FactorShiftCtx sctx; 2263 PetscReal rs; 2264 MatScalar d,*v; 2265 2266 PetscFunctionBegin; 2267 /* MatPivotSetUp(): initialize shift context sctx */ 2268 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2269 2270 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2271 sctx.shift_top = info->zeropivot; 2272 for (i=0; i<mbs; i++) { 2273 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2274 d = (aa)[a->diag[i]]; 2275 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2276 v = aa+ai[i]; 2277 nz = ai[i+1] - ai[i]; 2278 for (j=0; j<nz; j++) rs += PetscAbsScalar(v[j]); 2279 if (rs>sctx.shift_top) sctx.shift_top = rs; 2280 } 2281 sctx.shift_top *= 1.1; 2282 sctx.nshift_max = 5; 2283 sctx.shift_lo = 0.; 2284 sctx.shift_hi = 1.; 2285 } 2286 2287 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2288 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2289 2290 /* initialization */ 2291 ierr = PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);CHKERRQ(ierr); 2292 2293 do { 2294 sctx.newshift = PETSC_FALSE; 2295 2296 for (i=0; i<mbs; i++) jl[i] = mbs; 2297 il[0] = 0; 2298 2299 for (k = 0; k<mbs; k++) { 2300 /* zero rtmp */ 2301 nz = bi[k+1] - bi[k]; 2302 bjtmp = bj + bi[k]; 2303 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2304 2305 bval = ba + bi[k]; 2306 /* initialize k-th row by the perm[k]-th row of A */ 2307 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2308 for (j = jmin; j < jmax; j++) { 2309 col = riip[aj[j]]; 2310 if (col >= k) { /* only take upper triangular entry */ 2311 rtmp[col] = aa[j]; 2312 *bval++ = 0.0; /* for in-place factorization */ 2313 } 2314 } 2315 /* shift the diagonal of the matrix */ 2316 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2317 2318 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2319 dk = rtmp[k]; 2320 i = jl[k]; /* first row to be added to k_th row */ 2321 2322 while (i < k) { 2323 nexti = jl[i]; /* next row to be added to k_th row */ 2324 2325 /* compute multiplier, update diag(k) and U(i,k) */ 2326 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2327 uikdi = -ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2328 dk += uikdi*ba[ili]; 2329 ba[ili] = uikdi; /* -U(i,k) */ 2330 2331 /* add multiple of row i to k-th row */ 2332 jmin = ili + 1; jmax = bi[i+1]; 2333 if (jmin < jmax) { 2334 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2335 /* update il and jl for row i */ 2336 il[i] = jmin; 2337 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2338 } 2339 i = nexti; 2340 } 2341 2342 /* shift the diagonals when zero pivot is detected */ 2343 /* compute rs=sum of abs(off-diagonal) */ 2344 rs = 0.0; 2345 jmin = bi[k]+1; 2346 nz = bi[k+1] - jmin; 2347 bcol = bj + jmin; 2348 for (j=0; j<nz; j++) { 2349 rs += PetscAbsScalar(rtmp[bcol[j]]); 2350 } 2351 2352 sctx.rs = rs; 2353 sctx.pv = dk; 2354 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2355 if (sctx.newshift) break; 2356 dk = sctx.pv; 2357 2358 /* copy data into U(k,:) */ 2359 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2360 jmin = bi[k]+1; jmax = bi[k+1]; 2361 if (jmin < jmax) { 2362 for (j=jmin; j<jmax; j++) { 2363 col = bj[j]; ba[j] = rtmp[col]; 2364 } 2365 /* add the k-th row into il and jl */ 2366 il[k] = jmin; 2367 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2368 } 2369 } 2370 } while (sctx.newshift); 2371 2372 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2373 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2374 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2375 2376 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2377 if (perm_identity) { 2378 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2379 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2380 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2381 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2382 } else { 2383 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2384 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2385 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2386 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2387 } 2388 2389 C->assembled = PETSC_TRUE; 2390 C->preallocated = PETSC_TRUE; 2391 2392 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2393 if (sctx.nshift) { 2394 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2395 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2396 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2397 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);CHKERRQ(ierr); 2398 } 2399 } 2400 PetscFunctionReturn(0); 2401 } 2402 2403 /* 2404 icc() under revised new data structure. 2405 Factored arrays bj and ba are stored as 2406 U(0,:),...,U(i,:),U(n-1,:) 2407 2408 ui=fact->i is an array of size n+1, in which 2409 ui+ 2410 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2411 ui[n]: points to U(n-1,n-1)+1 2412 2413 udiag=fact->diag is an array of size n,in which 2414 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2415 2416 U(i,:) contains udiag[i] as its last entry, i.e., 2417 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2418 */ 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2422 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2423 { 2424 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2425 Mat_SeqSBAIJ *b; 2426 PetscErrorCode ierr; 2427 PetscBool perm_identity,missing; 2428 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2429 const PetscInt *rip,*riip; 2430 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2431 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2432 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2433 PetscReal fill =info->fill,levels=info->levels; 2434 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2435 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2436 PetscBT lnkbt; 2437 IS iperm; 2438 2439 PetscFunctionBegin; 2440 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2441 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2442 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2443 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2444 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2445 2446 ierr = PetscMalloc1((am+1),&ui);CHKERRQ(ierr); 2447 ierr = PetscMalloc1((am+1),&udiag);CHKERRQ(ierr); 2448 ui[0] = 0; 2449 2450 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2451 if (!levels && perm_identity) { 2452 for (i=0; i<am; i++) { 2453 ncols = ai[i+1] - a->diag[i]; 2454 ui[i+1] = ui[i] + ncols; 2455 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2456 } 2457 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2458 cols = uj; 2459 for (i=0; i<am; i++) { 2460 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2461 ncols = ai[i+1] - a->diag[i] -1; 2462 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2463 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2464 } 2465 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2466 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2467 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2468 2469 /* initialization */ 2470 ierr = PetscMalloc1((am+1),&ajtmp);CHKERRQ(ierr); 2471 2472 /* jl: linked list for storing indices of the pivot rows 2473 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2474 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2475 for (i=0; i<am; i++) { 2476 jl[i] = am; il[i] = 0; 2477 } 2478 2479 /* create and initialize a linked list for storing column indices of the active row k */ 2480 nlnk = am + 1; 2481 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2482 2483 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2484 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2485 current_space = free_space; 2486 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2487 current_space_lvl = free_space_lvl; 2488 2489 for (k=0; k<am; k++) { /* for each active row k */ 2490 /* initialize lnk by the column indices of row rip[k] of A */ 2491 nzk = 0; 2492 ncols = ai[rip[k]+1] - ai[rip[k]]; 2493 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2494 ncols_upper = 0; 2495 for (j=0; j<ncols; j++) { 2496 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2497 if (riip[i] >= k) { /* only take upper triangular entry */ 2498 ajtmp[ncols_upper] = i; 2499 ncols_upper++; 2500 } 2501 } 2502 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2503 nzk += nlnk; 2504 2505 /* update lnk by computing fill-in for each pivot row to be merged in */ 2506 prow = jl[k]; /* 1st pivot row */ 2507 2508 while (prow < k) { 2509 nextprow = jl[prow]; 2510 2511 /* merge prow into k-th row */ 2512 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2513 jmax = ui[prow+1]; 2514 ncols = jmax-jmin; 2515 i = jmin - ui[prow]; 2516 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2517 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2518 j = *(uj - 1); 2519 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2520 nzk += nlnk; 2521 2522 /* update il and jl for prow */ 2523 if (jmin < jmax) { 2524 il[prow] = jmin; 2525 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2526 } 2527 prow = nextprow; 2528 } 2529 2530 /* if free space is not available, make more free space */ 2531 if (current_space->local_remaining<nzk) { 2532 i = am - k + 1; /* num of unfactored rows */ 2533 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2534 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2535 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2536 reallocs++; 2537 } 2538 2539 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2540 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2541 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2542 2543 /* add the k-th row into il and jl */ 2544 if (nzk > 1) { 2545 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2546 jl[k] = jl[i]; jl[i] = k; 2547 il[k] = ui[k] + 1; 2548 } 2549 uj_ptr[k] = current_space->array; 2550 uj_lvl_ptr[k] = current_space_lvl->array; 2551 2552 current_space->array += nzk; 2553 current_space->local_used += nzk; 2554 current_space->local_remaining -= nzk; 2555 2556 current_space_lvl->array += nzk; 2557 current_space_lvl->local_used += nzk; 2558 current_space_lvl->local_remaining -= nzk; 2559 2560 ui[k+1] = ui[k] + nzk; 2561 } 2562 2563 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2564 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2565 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2566 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2567 2568 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2569 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2570 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2571 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2572 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2573 2574 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2575 2576 /* put together the new matrix in MATSEQSBAIJ format */ 2577 b = (Mat_SeqSBAIJ*)(fact)->data; 2578 b->singlemalloc = PETSC_FALSE; 2579 2580 ierr = PetscMalloc1((ui[am]+1),&b->a);CHKERRQ(ierr); 2581 2582 b->j = uj; 2583 b->i = ui; 2584 b->diag = udiag; 2585 b->free_diag = PETSC_TRUE; 2586 b->ilen = 0; 2587 b->imax = 0; 2588 b->row = perm; 2589 b->col = perm; 2590 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2591 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2592 b->icol = iperm; 2593 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2594 2595 ierr = PetscMalloc1((am+1),&b->solve_work);CHKERRQ(ierr); 2596 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2597 2598 b->maxnz = b->nz = ui[am]; 2599 b->free_a = PETSC_TRUE; 2600 b->free_ij = PETSC_TRUE; 2601 2602 fact->info.factor_mallocs = reallocs; 2603 fact->info.fill_ratio_given = fill; 2604 if (ai[am] != 0) { 2605 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2606 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2607 } else { 2608 fact->info.fill_ratio_needed = 0.0; 2609 } 2610 #if defined(PETSC_USE_INFO) 2611 if (ai[am] != 0) { 2612 PetscReal af = fact->info.fill_ratio_needed; 2613 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2614 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2615 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2616 } else { 2617 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2618 } 2619 #endif 2620 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2621 PetscFunctionReturn(0); 2622 } 2623 2624 #undef __FUNCT__ 2625 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2626 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2627 { 2628 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2629 Mat_SeqSBAIJ *b; 2630 PetscErrorCode ierr; 2631 PetscBool perm_identity,missing; 2632 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2633 const PetscInt *rip,*riip; 2634 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2635 PetscInt nlnk,*lnk,*lnk_lvl=NULL,d; 2636 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2637 PetscReal fill =info->fill,levels=info->levels; 2638 PetscFreeSpaceList free_space =NULL,current_space=NULL; 2639 PetscFreeSpaceList free_space_lvl=NULL,current_space_lvl=NULL; 2640 PetscBT lnkbt; 2641 IS iperm; 2642 2643 PetscFunctionBegin; 2644 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2645 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2646 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2647 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2648 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2649 2650 ierr = PetscMalloc1((am+1),&ui);CHKERRQ(ierr); 2651 ierr = PetscMalloc1((am+1),&udiag);CHKERRQ(ierr); 2652 ui[0] = 0; 2653 2654 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2655 if (!levels && perm_identity) { 2656 2657 for (i=0; i<am; i++) { 2658 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2659 udiag[i] = ui[i]; 2660 } 2661 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2662 cols = uj; 2663 for (i=0; i<am; i++) { 2664 aj = a->j + a->diag[i]; 2665 ncols = ui[i+1] - ui[i]; 2666 for (j=0; j<ncols; j++) *cols++ = *aj++; 2667 } 2668 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2669 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2670 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2671 2672 /* initialization */ 2673 ierr = PetscMalloc1((am+1),&ajtmp);CHKERRQ(ierr); 2674 2675 /* jl: linked list for storing indices of the pivot rows 2676 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2677 ierr = PetscMalloc4(am,&uj_ptr,am,&uj_lvl_ptr,am,&jl,am,&il);CHKERRQ(ierr); 2678 for (i=0; i<am; i++) { 2679 jl[i] = am; il[i] = 0; 2680 } 2681 2682 /* create and initialize a linked list for storing column indices of the active row k */ 2683 nlnk = am + 1; 2684 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2685 2686 /* initial FreeSpace size is fill*(ai[am]+1) */ 2687 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2688 current_space = free_space; 2689 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2690 current_space_lvl = free_space_lvl; 2691 2692 for (k=0; k<am; k++) { /* for each active row k */ 2693 /* initialize lnk by the column indices of row rip[k] of A */ 2694 nzk = 0; 2695 ncols = ai[rip[k]+1] - ai[rip[k]]; 2696 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2697 ncols_upper = 0; 2698 for (j=0; j<ncols; j++) { 2699 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2700 if (riip[i] >= k) { /* only take upper triangular entry */ 2701 ajtmp[ncols_upper] = i; 2702 ncols_upper++; 2703 } 2704 } 2705 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2706 nzk += nlnk; 2707 2708 /* update lnk by computing fill-in for each pivot row to be merged in */ 2709 prow = jl[k]; /* 1st pivot row */ 2710 2711 while (prow < k) { 2712 nextprow = jl[prow]; 2713 2714 /* merge prow into k-th row */ 2715 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2716 jmax = ui[prow+1]; 2717 ncols = jmax-jmin; 2718 i = jmin - ui[prow]; 2719 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2720 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2721 j = *(uj - 1); 2722 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2723 nzk += nlnk; 2724 2725 /* update il and jl for prow */ 2726 if (jmin < jmax) { 2727 il[prow] = jmin; 2728 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2729 } 2730 prow = nextprow; 2731 } 2732 2733 /* if free space is not available, make more free space */ 2734 if (current_space->local_remaining<nzk) { 2735 i = am - k + 1; /* num of unfactored rows */ 2736 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2737 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2738 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2739 reallocs++; 2740 } 2741 2742 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2743 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2744 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2745 2746 /* add the k-th row into il and jl */ 2747 if (nzk > 1) { 2748 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2749 jl[k] = jl[i]; jl[i] = k; 2750 il[k] = ui[k] + 1; 2751 } 2752 uj_ptr[k] = current_space->array; 2753 uj_lvl_ptr[k] = current_space_lvl->array; 2754 2755 current_space->array += nzk; 2756 current_space->local_used += nzk; 2757 current_space->local_remaining -= nzk; 2758 2759 current_space_lvl->array += nzk; 2760 current_space_lvl->local_used += nzk; 2761 current_space_lvl->local_remaining -= nzk; 2762 2763 ui[k+1] = ui[k] + nzk; 2764 } 2765 2766 #if defined(PETSC_USE_INFO) 2767 if (ai[am] != 0) { 2768 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2769 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2770 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2771 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2772 } else { 2773 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2774 } 2775 #endif 2776 2777 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2778 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2779 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2780 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2781 2782 /* destroy list of free space and other temporary array(s) */ 2783 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2784 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2785 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2786 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2787 2788 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2789 2790 /* put together the new matrix in MATSEQSBAIJ format */ 2791 2792 b = (Mat_SeqSBAIJ*)fact->data; 2793 b->singlemalloc = PETSC_FALSE; 2794 2795 ierr = PetscMalloc1((ui[am]+1),&b->a);CHKERRQ(ierr); 2796 2797 b->j = uj; 2798 b->i = ui; 2799 b->diag = udiag; 2800 b->free_diag = PETSC_TRUE; 2801 b->ilen = 0; 2802 b->imax = 0; 2803 b->row = perm; 2804 b->col = perm; 2805 2806 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2807 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2808 2809 b->icol = iperm; 2810 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2811 ierr = PetscMalloc1((am+1),&b->solve_work);CHKERRQ(ierr); 2812 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2813 b->maxnz = b->nz = ui[am]; 2814 b->free_a = PETSC_TRUE; 2815 b->free_ij = PETSC_TRUE; 2816 2817 fact->info.factor_mallocs = reallocs; 2818 fact->info.fill_ratio_given = fill; 2819 if (ai[am] != 0) { 2820 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2821 } else { 2822 fact->info.fill_ratio_needed = 0.0; 2823 } 2824 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2825 PetscFunctionReturn(0); 2826 } 2827 2828 #undef __FUNCT__ 2829 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2830 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2831 { 2832 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2833 Mat_SeqSBAIJ *b; 2834 PetscErrorCode ierr; 2835 PetscBool perm_identity; 2836 PetscReal fill = info->fill; 2837 const PetscInt *rip,*riip; 2838 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2839 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2840 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2841 PetscFreeSpaceList free_space=NULL,current_space=NULL; 2842 PetscBT lnkbt; 2843 IS iperm; 2844 2845 PetscFunctionBegin; 2846 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2847 /* check whether perm is the identity mapping */ 2848 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2849 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2850 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2851 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2852 2853 /* initialization */ 2854 ierr = PetscMalloc1((am+1),&ui);CHKERRQ(ierr); 2855 ierr = PetscMalloc1((am+1),&udiag);CHKERRQ(ierr); 2856 ui[0] = 0; 2857 2858 /* jl: linked list for storing indices of the pivot rows 2859 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2860 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 2861 for (i=0; i<am; i++) { 2862 jl[i] = am; il[i] = 0; 2863 } 2864 2865 /* create and initialize a linked list for storing column indices of the active row k */ 2866 nlnk = am + 1; 2867 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2868 2869 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2870 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2871 current_space = free_space; 2872 2873 for (k=0; k<am; k++) { /* for each active row k */ 2874 /* initialize lnk by the column indices of row rip[k] of A */ 2875 nzk = 0; 2876 ncols = ai[rip[k]+1] - ai[rip[k]]; 2877 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2878 ncols_upper = 0; 2879 for (j=0; j<ncols; j++) { 2880 i = riip[*(aj + ai[rip[k]] + j)]; 2881 if (i >= k) { /* only take upper triangular entry */ 2882 cols[ncols_upper] = i; 2883 ncols_upper++; 2884 } 2885 } 2886 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2887 nzk += nlnk; 2888 2889 /* update lnk by computing fill-in for each pivot row to be merged in */ 2890 prow = jl[k]; /* 1st pivot row */ 2891 2892 while (prow < k) { 2893 nextprow = jl[prow]; 2894 /* merge prow into k-th row */ 2895 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2896 jmax = ui[prow+1]; 2897 ncols = jmax-jmin; 2898 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2899 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2900 nzk += nlnk; 2901 2902 /* update il and jl for prow */ 2903 if (jmin < jmax) { 2904 il[prow] = jmin; 2905 j = *uj_ptr; 2906 jl[prow] = jl[j]; 2907 jl[j] = prow; 2908 } 2909 prow = nextprow; 2910 } 2911 2912 /* if free space is not available, make more free space */ 2913 if (current_space->local_remaining<nzk) { 2914 i = am - k + 1; /* num of unfactored rows */ 2915 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2916 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2917 reallocs++; 2918 } 2919 2920 /* copy data into free space, then initialize lnk */ 2921 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2922 2923 /* add the k-th row into il and jl */ 2924 if (nzk > 1) { 2925 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2926 jl[k] = jl[i]; jl[i] = k; 2927 il[k] = ui[k] + 1; 2928 } 2929 ui_ptr[k] = current_space->array; 2930 2931 current_space->array += nzk; 2932 current_space->local_used += nzk; 2933 current_space->local_remaining -= nzk; 2934 2935 ui[k+1] = ui[k] + nzk; 2936 } 2937 2938 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2939 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2940 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2941 2942 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2943 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 2944 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2945 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2946 2947 /* put together the new matrix in MATSEQSBAIJ format */ 2948 2949 b = (Mat_SeqSBAIJ*)fact->data; 2950 b->singlemalloc = PETSC_FALSE; 2951 b->free_a = PETSC_TRUE; 2952 b->free_ij = PETSC_TRUE; 2953 2954 ierr = PetscMalloc1((ui[am]+1),&b->a);CHKERRQ(ierr); 2955 2956 b->j = uj; 2957 b->i = ui; 2958 b->diag = udiag; 2959 b->free_diag = PETSC_TRUE; 2960 b->ilen = 0; 2961 b->imax = 0; 2962 b->row = perm; 2963 b->col = perm; 2964 2965 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2966 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2967 2968 b->icol = iperm; 2969 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2970 2971 ierr = PetscMalloc1((am+1),&b->solve_work);CHKERRQ(ierr); 2972 ierr = PetscLogObjectMemory((PetscObject)fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2973 2974 b->maxnz = b->nz = ui[am]; 2975 2976 fact->info.factor_mallocs = reallocs; 2977 fact->info.fill_ratio_given = fill; 2978 if (ai[am] != 0) { 2979 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2980 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2981 } else { 2982 fact->info.fill_ratio_needed = 0.0; 2983 } 2984 #if defined(PETSC_USE_INFO) 2985 if (ai[am] != 0) { 2986 PetscReal af = fact->info.fill_ratio_needed; 2987 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 2988 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 2989 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 2990 } else { 2991 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2992 } 2993 #endif 2994 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2995 PetscFunctionReturn(0); 2996 } 2997 2998 #undef __FUNCT__ 2999 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 3000 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 3001 { 3002 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3003 Mat_SeqSBAIJ *b; 3004 PetscErrorCode ierr; 3005 PetscBool perm_identity; 3006 PetscReal fill = info->fill; 3007 const PetscInt *rip,*riip; 3008 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 3009 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 3010 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 3011 PetscFreeSpaceList free_space=NULL,current_space=NULL; 3012 PetscBT lnkbt; 3013 IS iperm; 3014 3015 PetscFunctionBegin; 3016 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 3017 /* check whether perm is the identity mapping */ 3018 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 3019 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 3020 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 3021 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 3022 3023 /* initialization */ 3024 ierr = PetscMalloc1((am+1),&ui);CHKERRQ(ierr); 3025 ui[0] = 0; 3026 3027 /* jl: linked list for storing indices of the pivot rows 3028 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3029 ierr = PetscMalloc4(am,&ui_ptr,am,&jl,am,&il,am,&cols);CHKERRQ(ierr); 3030 for (i=0; i<am; i++) { 3031 jl[i] = am; il[i] = 0; 3032 } 3033 3034 /* create and initialize a linked list for storing column indices of the active row k */ 3035 nlnk = am + 1; 3036 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3037 3038 /* initial FreeSpace size is fill*(ai[am]+1) */ 3039 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3040 current_space = free_space; 3041 3042 for (k=0; k<am; k++) { /* for each active row k */ 3043 /* initialize lnk by the column indices of row rip[k] of A */ 3044 nzk = 0; 3045 ncols = ai[rip[k]+1] - ai[rip[k]]; 3046 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3047 ncols_upper = 0; 3048 for (j=0; j<ncols; j++) { 3049 i = riip[*(aj + ai[rip[k]] + j)]; 3050 if (i >= k) { /* only take upper triangular entry */ 3051 cols[ncols_upper] = i; 3052 ncols_upper++; 3053 } 3054 } 3055 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3056 nzk += nlnk; 3057 3058 /* update lnk by computing fill-in for each pivot row to be merged in */ 3059 prow = jl[k]; /* 1st pivot row */ 3060 3061 while (prow < k) { 3062 nextprow = jl[prow]; 3063 /* merge prow into k-th row */ 3064 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3065 jmax = ui[prow+1]; 3066 ncols = jmax-jmin; 3067 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3068 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3069 nzk += nlnk; 3070 3071 /* update il and jl for prow */ 3072 if (jmin < jmax) { 3073 il[prow] = jmin; 3074 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3075 } 3076 prow = nextprow; 3077 } 3078 3079 /* if free space is not available, make more free space */ 3080 if (current_space->local_remaining<nzk) { 3081 i = am - k + 1; /* num of unfactored rows */ 3082 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3083 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3084 reallocs++; 3085 } 3086 3087 /* copy data into free space, then initialize lnk */ 3088 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3089 3090 /* add the k-th row into il and jl */ 3091 if (nzk-1 > 0) { 3092 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3093 jl[k] = jl[i]; jl[i] = k; 3094 il[k] = ui[k] + 1; 3095 } 3096 ui_ptr[k] = current_space->array; 3097 3098 current_space->array += nzk; 3099 current_space->local_used += nzk; 3100 current_space->local_remaining -= nzk; 3101 3102 ui[k+1] = ui[k] + nzk; 3103 } 3104 3105 #if defined(PETSC_USE_INFO) 3106 if (ai[am] != 0) { 3107 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3108 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);CHKERRQ(ierr); 3109 ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr); 3110 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);CHKERRQ(ierr); 3111 } else { 3112 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3113 } 3114 #endif 3115 3116 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3117 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3118 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3119 3120 /* destroy list of free space and other temporary array(s) */ 3121 ierr = PetscMalloc1((ui[am]+1),&uj);CHKERRQ(ierr); 3122 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3123 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3124 3125 /* put together the new matrix in MATSEQSBAIJ format */ 3126 3127 b = (Mat_SeqSBAIJ*)fact->data; 3128 b->singlemalloc = PETSC_FALSE; 3129 b->free_a = PETSC_TRUE; 3130 b->free_ij = PETSC_TRUE; 3131 3132 ierr = PetscMalloc1((ui[am]+1),&b->a);CHKERRQ(ierr); 3133 3134 b->j = uj; 3135 b->i = ui; 3136 b->diag = 0; 3137 b->ilen = 0; 3138 b->imax = 0; 3139 b->row = perm; 3140 b->col = perm; 3141 3142 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3143 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3144 3145 b->icol = iperm; 3146 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3147 3148 ierr = PetscMalloc1((am+1),&b->solve_work);CHKERRQ(ierr); 3149 ierr = PetscLogObjectMemory((PetscObject)fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3150 b->maxnz = b->nz = ui[am]; 3151 3152 fact->info.factor_mallocs = reallocs; 3153 fact->info.fill_ratio_given = fill; 3154 if (ai[am] != 0) { 3155 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3156 } else { 3157 fact->info.fill_ratio_needed = 0.0; 3158 } 3159 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3160 PetscFunctionReturn(0); 3161 } 3162 3163 #undef __FUNCT__ 3164 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3165 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3166 { 3167 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3168 PetscErrorCode ierr; 3169 PetscInt n = A->rmap->n; 3170 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3171 PetscScalar *x,sum; 3172 const PetscScalar *b; 3173 const MatScalar *aa = a->a,*v; 3174 PetscInt i,nz; 3175 3176 PetscFunctionBegin; 3177 if (!n) PetscFunctionReturn(0); 3178 3179 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3180 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3181 3182 /* forward solve the lower triangular */ 3183 x[0] = b[0]; 3184 v = aa; 3185 vi = aj; 3186 for (i=1; i<n; i++) { 3187 nz = ai[i+1] - ai[i]; 3188 sum = b[i]; 3189 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3190 v += nz; 3191 vi += nz; 3192 x[i] = sum; 3193 } 3194 3195 /* backward solve the upper triangular */ 3196 for (i=n-1; i>=0; i--) { 3197 v = aa + adiag[i+1] + 1; 3198 vi = aj + adiag[i+1] + 1; 3199 nz = adiag[i] - adiag[i+1]-1; 3200 sum = x[i]; 3201 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3202 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3203 } 3204 3205 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3206 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3207 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3208 PetscFunctionReturn(0); 3209 } 3210 3211 #undef __FUNCT__ 3212 #define __FUNCT__ "MatSolve_SeqAIJ" 3213 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3214 { 3215 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3216 IS iscol = a->col,isrow = a->row; 3217 PetscErrorCode ierr; 3218 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3219 const PetscInt *rout,*cout,*r,*c; 3220 PetscScalar *x,*tmp,sum; 3221 const PetscScalar *b; 3222 const MatScalar *aa = a->a,*v; 3223 3224 PetscFunctionBegin; 3225 if (!n) PetscFunctionReturn(0); 3226 3227 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3228 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3229 tmp = a->solve_work; 3230 3231 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3232 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3233 3234 /* forward solve the lower triangular */ 3235 tmp[0] = b[r[0]]; 3236 v = aa; 3237 vi = aj; 3238 for (i=1; i<n; i++) { 3239 nz = ai[i+1] - ai[i]; 3240 sum = b[r[i]]; 3241 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3242 tmp[i] = sum; 3243 v += nz; vi += nz; 3244 } 3245 3246 /* backward solve the upper triangular */ 3247 for (i=n-1; i>=0; i--) { 3248 v = aa + adiag[i+1]+1; 3249 vi = aj + adiag[i+1]+1; 3250 nz = adiag[i]-adiag[i+1]-1; 3251 sum = tmp[i]; 3252 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3253 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3254 } 3255 3256 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3257 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3258 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3259 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3260 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3261 PetscFunctionReturn(0); 3262 } 3263 3264 #undef __FUNCT__ 3265 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3266 /* 3267 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3268 */ 3269 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3270 { 3271 Mat B = *fact; 3272 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3273 IS isicol; 3274 PetscErrorCode ierr; 3275 const PetscInt *r,*ic; 3276 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3277 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3278 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3279 PetscInt nlnk,*lnk; 3280 PetscBT lnkbt; 3281 PetscBool row_identity,icol_identity; 3282 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3283 const PetscInt *ics; 3284 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3285 PetscReal dt =info->dt,shift=info->shiftamount; 3286 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3287 PetscBool missing; 3288 3289 PetscFunctionBegin; 3290 if (dt == PETSC_DEFAULT) dt = 0.005; 3291 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3292 3293 /* ------- symbolic factorization, can be reused ---------*/ 3294 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3295 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3296 adiag=a->diag; 3297 3298 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3299 3300 /* bdiag is location of diagonal in factor */ 3301 ierr = PetscMalloc1((n+1),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3302 ierr = PetscMalloc1((n+1),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3303 3304 /* allocate row pointers bi */ 3305 ierr = PetscMalloc1((2*n+2),&bi);CHKERRQ(ierr); 3306 3307 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3308 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3309 nnz_max = ai[n]+2*n*dtcount+2; 3310 3311 ierr = PetscMalloc1((nnz_max+1),&bj);CHKERRQ(ierr); 3312 ierr = PetscMalloc1((nnz_max+1),&ba);CHKERRQ(ierr); 3313 3314 /* put together the new matrix */ 3315 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr); 3316 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr); 3317 b = (Mat_SeqAIJ*)B->data; 3318 3319 b->free_a = PETSC_TRUE; 3320 b->free_ij = PETSC_TRUE; 3321 b->singlemalloc = PETSC_FALSE; 3322 3323 b->a = ba; 3324 b->j = bj; 3325 b->i = bi; 3326 b->diag = bdiag; 3327 b->ilen = 0; 3328 b->imax = 0; 3329 b->row = isrow; 3330 b->col = iscol; 3331 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3332 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3333 b->icol = isicol; 3334 3335 ierr = PetscMalloc1((n+1),&b->solve_work);CHKERRQ(ierr); 3336 ierr = PetscLogObjectMemory((PetscObject)B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3337 b->maxnz = nnz_max; 3338 3339 B->factortype = MAT_FACTOR_ILUDT; 3340 B->info.factor_mallocs = 0; 3341 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3342 /* ------- end of symbolic factorization ---------*/ 3343 3344 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3345 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3346 ics = ic; 3347 3348 /* linked list for storing column indices of the active row */ 3349 nlnk = n + 1; 3350 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3351 3352 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3353 ierr = PetscMalloc2(n,&im,n,&jtmp);CHKERRQ(ierr); 3354 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3355 ierr = PetscMalloc2(n,&rtmp,n,&vtmp);CHKERRQ(ierr); 3356 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3357 3358 bi[0] = 0; 3359 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3360 bdiag_rev[n] = bdiag[0]; 3361 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3362 for (i=0; i<n; i++) { 3363 /* copy initial fill into linked list */ 3364 nzi = ai[r[i]+1] - ai[r[i]]; 3365 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3366 nzi_al = adiag[r[i]] - ai[r[i]]; 3367 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3368 ajtmp = aj + ai[r[i]]; 3369 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3370 3371 /* load in initial (unfactored row) */ 3372 aatmp = a->a + ai[r[i]]; 3373 for (j=0; j<nzi; j++) { 3374 rtmp[ics[*ajtmp++]] = *aatmp++; 3375 } 3376 3377 /* add pivot rows into linked list */ 3378 row = lnk[n]; 3379 while (row < i) { 3380 nzi_bl = bi[row+1] - bi[row] + 1; 3381 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3382 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3383 nzi += nlnk; 3384 row = lnk[row]; 3385 } 3386 3387 /* copy data from lnk into jtmp, then initialize lnk */ 3388 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3389 3390 /* numerical factorization */ 3391 bjtmp = jtmp; 3392 row = *bjtmp++; /* 1st pivot row */ 3393 while (row < i) { 3394 pc = rtmp + row; 3395 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3396 multiplier = (*pc) * (*pv); 3397 *pc = multiplier; 3398 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3399 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3400 pv = ba + bdiag[row+1] + 1; 3401 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3402 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3403 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3404 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3405 } 3406 row = *bjtmp++; 3407 } 3408 3409 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3410 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3411 nzi_bl = 0; j = 0; 3412 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3413 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3414 nzi_bl++; j++; 3415 } 3416 nzi_bu = nzi - nzi_bl -1; 3417 while (j < nzi) { 3418 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3419 j++; 3420 } 3421 3422 bjtmp = bj + bi[i]; 3423 batmp = ba + bi[i]; 3424 /* apply level dropping rule to L part */ 3425 ncut = nzi_al + dtcount; 3426 if (ncut < nzi_bl) { 3427 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3428 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3429 } else { 3430 ncut = nzi_bl; 3431 } 3432 for (j=0; j<ncut; j++) { 3433 bjtmp[j] = jtmp[j]; 3434 batmp[j] = vtmp[j]; 3435 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3436 } 3437 bi[i+1] = bi[i] + ncut; 3438 nzi = ncut + 1; 3439 3440 /* apply level dropping rule to U part */ 3441 ncut = nzi_au + dtcount; 3442 if (ncut < nzi_bu) { 3443 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3444 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3445 } else { 3446 ncut = nzi_bu; 3447 } 3448 nzi += ncut; 3449 3450 /* mark bdiagonal */ 3451 bdiag[i+1] = bdiag[i] - (ncut + 1); 3452 bdiag_rev[n-i-1] = bdiag[i+1]; 3453 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3454 bjtmp = bj + bdiag[i]; 3455 batmp = ba + bdiag[i]; 3456 *bjtmp = i; 3457 *batmp = diag_tmp; /* rtmp[i]; */ 3458 if (*batmp == 0.0) { 3459 *batmp = dt+shift; 3460 /* printf(" row %d add shift %g\n",i,shift); */ 3461 } 3462 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3463 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3464 3465 bjtmp = bj + bdiag[i+1]+1; 3466 batmp = ba + bdiag[i+1]+1; 3467 for (k=0; k<ncut; k++) { 3468 bjtmp[k] = jtmp[nzi_bl+1+k]; 3469 batmp[k] = vtmp[nzi_bl+1+k]; 3470 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3471 } 3472 /* printf("\n"); */ 3473 3474 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3475 /* 3476 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3477 printf(" ----------------------------\n"); 3478 */ 3479 } /* for (i=0; i<n; i++) */ 3480 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3481 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3482 3483 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3484 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3485 3486 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3487 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3488 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3489 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3490 3491 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3492 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3493 3494 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3495 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3496 if (row_identity && icol_identity) { 3497 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3498 } else { 3499 B->ops->solve = MatSolve_SeqAIJ; 3500 } 3501 3502 B->ops->solveadd = 0; 3503 B->ops->solvetranspose = 0; 3504 B->ops->solvetransposeadd = 0; 3505 B->ops->matsolve = 0; 3506 B->assembled = PETSC_TRUE; 3507 B->preallocated = PETSC_TRUE; 3508 PetscFunctionReturn(0); 3509 } 3510 3511 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3512 #undef __FUNCT__ 3513 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3514 /* 3515 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3516 */ 3517 3518 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3519 { 3520 PetscErrorCode ierr; 3521 3522 PetscFunctionBegin; 3523 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3524 PetscFunctionReturn(0); 3525 } 3526 3527 /* 3528 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3529 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3530 */ 3531 #undef __FUNCT__ 3532 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3533 /* 3534 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3535 */ 3536 3537 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3538 { 3539 Mat C =fact; 3540 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)C->data; 3541 IS isrow = b->row,isicol = b->icol; 3542 PetscErrorCode ierr; 3543 const PetscInt *r,*ic,*ics; 3544 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3545 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3546 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3547 PetscReal dt=info->dt,shift=info->shiftamount; 3548 PetscBool row_identity, col_identity; 3549 3550 PetscFunctionBegin; 3551 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3552 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3553 ierr = PetscMalloc1((n+1),&rtmp);CHKERRQ(ierr); 3554 ics = ic; 3555 3556 for (i=0; i<n; i++) { 3557 /* initialize rtmp array */ 3558 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3559 bjtmp = bj + bi[i]; 3560 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3561 rtmp[i] = 0.0; 3562 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3563 bjtmp = bj + bdiag[i+1] + 1; 3564 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3565 3566 /* load in initial unfactored row of A */ 3567 /* printf("row %d\n",i); */ 3568 nz = ai[r[i]+1] - ai[r[i]]; 3569 ajtmp = aj + ai[r[i]]; 3570 v = aa + ai[r[i]]; 3571 for (j=0; j<nz; j++) { 3572 rtmp[ics[*ajtmp++]] = v[j]; 3573 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3574 } 3575 /* printf("\n"); */ 3576 3577 /* numerical factorization */ 3578 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3579 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3580 k = 0; 3581 while (k < nzl) { 3582 row = *bjtmp++; 3583 /* printf(" prow %d\n",row); */ 3584 pc = rtmp + row; 3585 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3586 multiplier = (*pc) * (*pv); 3587 *pc = multiplier; 3588 if (PetscAbsScalar(multiplier) > dt) { 3589 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3590 pv = b->a + bdiag[row+1] + 1; 3591 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3592 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3593 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3594 } 3595 k++; 3596 } 3597 3598 /* finished row so stick it into b->a */ 3599 /* L-part */ 3600 pv = b->a + bi[i]; 3601 pj = bj + bi[i]; 3602 nzl = bi[i+1] - bi[i]; 3603 for (j=0; j<nzl; j++) { 3604 pv[j] = rtmp[pj[j]]; 3605 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3606 } 3607 3608 /* diagonal: invert diagonal entries for simplier triangular solves */ 3609 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3610 b->a[bdiag[i]] = 1.0/rtmp[i]; 3611 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3612 3613 /* U-part */ 3614 pv = b->a + bdiag[i+1] + 1; 3615 pj = bj + bdiag[i+1] + 1; 3616 nzu = bdiag[i] - bdiag[i+1] - 1; 3617 for (j=0; j<nzu; j++) { 3618 pv[j] = rtmp[pj[j]]; 3619 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3620 } 3621 /* printf("\n"); */ 3622 } 3623 3624 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3625 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3626 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3627 3628 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3629 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3630 if (row_identity && col_identity) { 3631 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3632 } else { 3633 C->ops->solve = MatSolve_SeqAIJ; 3634 } 3635 C->ops->solveadd = 0; 3636 C->ops->solvetranspose = 0; 3637 C->ops->solvetransposeadd = 0; 3638 C->ops->matsolve = 0; 3639 C->assembled = PETSC_TRUE; 3640 C->preallocated = PETSC_TRUE; 3641 3642 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3643 PetscFunctionReturn(0); 3644 } 3645