1 #define PETSCMAT_DLL 2 3 4 #include "../src/mat/impls/aij/seq/aij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 #include "petscbt.h" 7 #include "../src/mat/utils/freespace.h" 8 9 EXTERN_C_BEGIN 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 12 /* 13 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 14 */ 15 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 16 { 17 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 20 const PetscInt *ai = a->i, *aj = a->j; 21 const PetscScalar *aa = a->a; 22 PetscTruth *done; 23 PetscReal best,past = 0,future; 24 25 PetscFunctionBegin; 26 /* pick initial row */ 27 best = -1; 28 for (i=0; i<n; i++) { 29 future = 0.0; 30 for (j=ai[i]; j<ai[i+1]; j++) { 31 if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc(n*sizeof(PetscTruth),&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscTruth));CHKERRQ(ierr); 42 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 EXTERN_C_END 94 95 EXTERN_C_BEGIN 96 #undef __FUNCT__ 97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 99 { 100 PetscFunctionBegin; 101 *flg = PETSC_TRUE; 102 PetscFunctionReturn(0); 103 } 104 EXTERN_C_END 105 106 EXTERN_C_BEGIN 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 110 { 111 PetscInt n = A->rmap->n; 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 116 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 117 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 118 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 119 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 120 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 121 (*B)->ops->iludtfactor = MatILUDTFactor_SeqAIJ; 122 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 123 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 124 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 125 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 126 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 127 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 128 (*B)->factor = ftype; 129 PetscFunctionReturn(0); 130 } 131 EXTERN_C_END 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 135 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 136 { 137 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 138 IS isicol; 139 PetscErrorCode ierr; 140 const PetscInt *r,*ic; 141 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 142 PetscInt *bi,*bj,*ajtmp; 143 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 144 PetscReal f; 145 PetscInt nlnk,*lnk,k,**bi_ptr; 146 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 147 PetscBT lnkbt; 148 PetscTruth newdatastruct= PETSC_FALSE; 149 150 PetscFunctionBegin; 151 ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 152 if(newdatastruct){ 153 ierr = MatLUFactorSymbolic_SeqAIJ_newdatastruct(B,A,isrow,iscol,info);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 158 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 159 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 160 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 161 162 /* get new row pointers */ 163 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 164 bi[0] = 0; 165 166 /* bdiag is location of diagonal in factor */ 167 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 168 bdiag[0] = 0; 169 170 /* linked list for storing column indices of the active row */ 171 nlnk = n + 1; 172 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 173 174 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 175 176 /* initial FreeSpace size is f*(ai[n]+1) */ 177 f = info->fill; 178 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 179 current_space = free_space; 180 181 for (i=0; i<n; i++) { 182 /* copy previous fill into linked list */ 183 nzi = 0; 184 nnz = ai[r[i]+1] - ai[r[i]]; 185 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 186 ajtmp = aj + ai[r[i]]; 187 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 188 nzi += nlnk; 189 190 /* add pivot rows into linked list */ 191 row = lnk[n]; 192 while (row < i) { 193 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 194 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 195 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 196 nzi += nlnk; 197 row = lnk[row]; 198 } 199 bi[i+1] = bi[i] + nzi; 200 im[i] = nzi; 201 202 /* mark bdiag */ 203 nzbd = 0; 204 nnz = nzi; 205 k = lnk[n]; 206 while (nnz-- && k < i){ 207 nzbd++; 208 k = lnk[k]; 209 } 210 bdiag[i] = bi[i] + nzbd; 211 212 /* if free space is not available, make more free space */ 213 if (current_space->local_remaining<nzi) { 214 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 215 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 216 reallocs++; 217 } 218 219 /* copy data into free space, then initialize lnk */ 220 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 221 bi_ptr[i] = current_space->array; 222 current_space->array += nzi; 223 current_space->local_used += nzi; 224 current_space->local_remaining -= nzi; 225 } 226 #if defined(PETSC_USE_INFO) 227 if (ai[n] != 0) { 228 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 229 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 230 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 231 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 232 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 233 } else { 234 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 235 } 236 #endif 237 238 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 239 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 240 241 /* destroy list of free space and other temporary array(s) */ 242 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 243 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 244 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 245 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 246 247 /* put together the new matrix */ 248 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 249 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 250 b = (Mat_SeqAIJ*)(B)->data; 251 b->free_a = PETSC_TRUE; 252 b->free_ij = PETSC_TRUE; 253 b->singlemalloc = PETSC_FALSE; 254 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 255 b->j = bj; 256 b->i = bi; 257 b->diag = bdiag; 258 b->ilen = 0; 259 b->imax = 0; 260 b->row = isrow; 261 b->col = iscol; 262 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 263 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 264 b->icol = isicol; 265 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 266 267 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 268 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 269 b->maxnz = b->nz = bi[n] ; 270 271 (B)->factor = MAT_FACTOR_LU; 272 (B)->info.factor_mallocs = reallocs; 273 (B)->info.fill_ratio_given = f; 274 275 if (ai[n]) { 276 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 277 } else { 278 (B)->info.fill_ratio_needed = 0.0; 279 } 280 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 281 (B)->ops->solve = MatSolve_SeqAIJ; 282 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 283 /* switch to inodes if appropriate */ 284 ierr = MatLUFactorSymbolic_SeqAIJ_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_newdatastruct" 290 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_newdatastruct(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 291 { 292 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 293 IS isicol; 294 PetscErrorCode ierr; 295 const PetscInt *r,*ic; 296 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 297 PetscInt *bi,*bj,*ajtmp; 298 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 299 PetscReal f; 300 PetscInt nlnk,*lnk,k,**bi_ptr; 301 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 302 PetscBT lnkbt; 303 304 PetscFunctionBegin; 305 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 306 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 307 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 308 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 309 310 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 311 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 312 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 313 bi[0] = bdiag[0] = 0; 314 315 /* linked list for storing column indices of the active row */ 316 nlnk = n + 1; 317 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 318 319 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 320 321 /* initial FreeSpace size is f*(ai[n]+1) */ 322 f = info->fill; 323 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 324 current_space = free_space; 325 326 for (i=0; i<n; i++) { 327 /* copy previous fill into linked list */ 328 nzi = 0; 329 nnz = ai[r[i]+1] - ai[r[i]]; 330 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 331 ajtmp = aj + ai[r[i]]; 332 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 333 nzi += nlnk; 334 335 /* add pivot rows into linked list */ 336 row = lnk[n]; 337 while (row < i){ 338 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 339 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 340 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 341 nzi += nlnk; 342 row = lnk[row]; 343 } 344 bi[i+1] = bi[i] + nzi; 345 im[i] = nzi; 346 347 /* mark bdiag */ 348 nzbd = 0; 349 nnz = nzi; 350 k = lnk[n]; 351 while (nnz-- && k < i){ 352 nzbd++; 353 k = lnk[k]; 354 } 355 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 356 357 /* if free space is not available, make more free space */ 358 if (current_space->local_remaining<nzi) { 359 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 360 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 361 reallocs++; 362 } 363 364 /* copy data into free space, then initialize lnk */ 365 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 366 bi_ptr[i] = current_space->array; 367 current_space->array += nzi; 368 current_space->local_used += nzi; 369 current_space->local_remaining -= nzi; 370 } 371 #if defined(PETSC_USE_INFO) 372 if (ai[n] != 0) { 373 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 374 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 375 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 376 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 377 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 378 } else { 379 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 380 } 381 #endif 382 383 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 384 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 385 386 /* destroy list of free space and other temporary array(s) */ 387 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 388 ierr = PetscFreeSpaceContiguous_LU_v2(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 389 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 390 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 391 392 /* put together the new matrix */ 393 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 394 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 395 b = (Mat_SeqAIJ*)(B)->data; 396 b->free_a = PETSC_TRUE; 397 b->free_ij = PETSC_TRUE; 398 b->singlemalloc = PETSC_FALSE; 399 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 400 b->j = bj; 401 b->i = bi; 402 b->diag = bdiag; 403 b->ilen = 0; 404 b->imax = 0; 405 b->row = isrow; 406 b->col = iscol; 407 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 408 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 409 b->icol = isicol; 410 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 411 412 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 413 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 414 b->maxnz = b->nz = bdiag[0]+1; 415 B->factor = MAT_FACTOR_LU; 416 B->info.factor_mallocs = reallocs; 417 B->info.fill_ratio_given = f; 418 419 if (ai[n]) { 420 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 421 } else { 422 B->info.fill_ratio_needed = 0.0; 423 } 424 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 425 /* switch to inodes if appropriate */ 426 ierr = Mat_CheckInode_FactorLU(B,PETSC_FALSE);CHKERRQ(ierr); 427 PetscFunctionReturn(0); 428 } 429 430 /* 431 Trouble in factorization, should we dump the original matrix? 432 */ 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatFactorDumpMatrix" 435 PetscErrorCode MatFactorDumpMatrix(Mat A) 436 { 437 PetscErrorCode ierr; 438 PetscTruth flg = PETSC_FALSE; 439 440 PetscFunctionBegin; 441 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 442 if (flg) { 443 PetscViewer viewer; 444 char filename[PETSC_MAX_PATH_LEN]; 445 446 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 447 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 448 ierr = MatView(A,viewer);CHKERRQ(ierr); 449 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 450 } 451 PetscFunctionReturn(0); 452 } 453 454 #undef __FUNCT__ 455 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_newdatastruct" 456 PetscErrorCode MatLUFactorNumeric_SeqAIJ_newdatastruct(Mat B,Mat A,const MatFactorInfo *info) 457 { 458 Mat C=B; 459 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 460 IS isrow = b->row,isicol = b->icol; 461 PetscErrorCode ierr; 462 const PetscInt *r,*ic,*ics; 463 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 464 PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 465 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 466 PetscTruth row_identity,col_identity; 467 468 LUShift_Ctx sctx; 469 PetscInt *ddiag,newshift; 470 PetscReal rs; 471 MatScalar d; 472 473 PetscFunctionBegin; 474 /* MatPivotSetUp(): initialize shift context sctx */ 475 ierr = PetscMemzero(&sctx,sizeof(LUShift_Ctx));CHKERRQ(ierr); 476 477 /* if both shift schemes are chosen by user, only use info->shiftpd */ 478 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 479 ddiag = a->diag; 480 sctx.shift_top = info->zeropivot; 481 for (i=0; i<n; i++) { 482 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 483 d = (aa)[ddiag[i]]; 484 rs = -PetscAbsScalar(d) - PetscRealPart(d); 485 v = aa+ai[i]; 486 nz = ai[i+1] - ai[i]; 487 for (j=0; j<nz; j++) 488 rs += PetscAbsScalar(v[j]); 489 if (rs>sctx.shift_top) sctx.shift_top = rs; 490 } 491 sctx.shift_top *= 1.1; 492 sctx.nshift_max = 5; 493 sctx.shift_lo = 0.; 494 sctx.shift_hi = 1.; 495 } 496 497 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 498 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 499 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 500 ics = ic; 501 502 do { 503 sctx.lushift = PETSC_FALSE; 504 for (i=0; i<n; i++){ 505 /* zero rtmp */ 506 /* L part */ 507 nz = bi[i+1] - bi[i]; 508 bjtmp = bj + bi[i]; 509 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 510 511 /* U part */ 512 nz = bdiag[i]-bdiag[i+1]; 513 bjtmp = bj + bdiag[i+1]+1; 514 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 515 516 /* load in initial (unfactored row) */ 517 nz = ai[r[i]+1] - ai[r[i]]; 518 ajtmp = aj + ai[r[i]]; 519 v = aa + ai[r[i]]; 520 for (j=0; j<nz; j++) { 521 rtmp[ics[ajtmp[j]]] = v[j]; 522 } 523 /* ZeropivotApply() */ 524 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 525 526 /* elimination */ 527 bjtmp = bj + bi[i]; 528 row = *bjtmp++; 529 nzL = bi[i+1] - bi[i]; 530 for(k=0; k < nzL;k++) { 531 pc = rtmp + row; 532 if (*pc != 0.0) { 533 pv = b->a + bdiag[row]; 534 multiplier = *pc * (*pv); 535 *pc = multiplier; 536 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 537 pv = b->a + bdiag[row+1]+1; 538 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 539 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 540 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 541 } 542 row = *bjtmp++; 543 } 544 545 /* finished row so stick it into b->a */ 546 rs = 0.0; 547 /* L part */ 548 pv = b->a + bi[i] ; 549 pj = b->j + bi[i] ; 550 nz = bi[i+1] - bi[i]; 551 for (j=0; j<nz; j++) { 552 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 553 } 554 555 /* U part */ 556 pv = b->a + bdiag[i+1]+1; 557 pj = b->j + bdiag[i+1]+1; 558 nz = bdiag[i] - bdiag[i+1]-1; 559 for (j=0; j<nz; j++) { 560 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 561 } 562 563 /* MatPivotCheck() */ 564 sctx.rs = rs; 565 sctx.pv = rtmp[i]; 566 if (info->shiftnz){ 567 ierr = MatPivotCheck_nz(info,sctx,i,newshift);CHKERRQ(ierr); 568 } else if (info->shiftpd){ 569 ierr = MatPivotCheck_pd(info,sctx,i,newshift);CHKERRQ(ierr); 570 } else if (info->shiftinblocks){ 571 ierr = MatPivotCheck_inblocks(info,sctx,i,newshift);CHKERRQ(ierr); 572 } else { 573 ierr = MatPivotCheck_none(info,sctx,i,newshift);CHKERRQ(ierr); 574 } 575 rtmp[i] = sctx.pv; 576 if (newshift == 1) break; 577 578 /* Mark diagonal and invert diagonal for simplier triangular solves */ 579 pv = b->a + bdiag[i]; 580 *pv = 1.0/rtmp[i]; 581 582 } /* endof for (i=0; i<n; i++){ */ 583 584 /* MatPivotRefine() */ 585 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){ 586 /* 587 * if no shift in this attempt & shifting & started shifting & can refine, 588 * then try lower shift 589 */ 590 sctx.shift_hi = sctx.shift_fraction; 591 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 592 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 593 sctx.lushift = PETSC_TRUE; 594 sctx.nshift++; 595 } 596 } while (sctx.lushift); 597 598 ierr = PetscFree(rtmp);CHKERRQ(ierr); 599 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 600 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 601 if (b->inode.use){ 602 C->ops->solve = MatSolve_SeqAIJ_Inode_newdatastruct; 603 } else { 604 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 605 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 606 if (row_identity && col_identity) { 607 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 608 } else { 609 C->ops->solve = MatSolve_SeqAIJ_newdatastruct; 610 } 611 } 612 C->ops->solveadd = MatSolveAdd_SeqAIJ_newdatastruct; 613 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_newdatastruct; 614 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_newdatastruct; 615 C->ops->matsolve = MatMatSolve_SeqAIJ_newdatastruct; 616 C->assembled = PETSC_TRUE; 617 C->preallocated = PETSC_TRUE; 618 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 619 620 /* MatPivotView() */ 621 if (sctx.nshift){ 622 if (info->shiftpd) { 623 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 624 } else if (info->shiftnz) { 625 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 626 } else if (info->shiftinblocks){ 627 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftinblocks);CHKERRQ(ierr); 628 } 629 } 630 PetscFunctionReturn(0); 631 } 632 633 #undef __FUNCT__ 634 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 635 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 636 { 637 Mat C=B; 638 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 639 IS isrow = b->row,isicol = b->icol; 640 PetscErrorCode ierr; 641 const PetscInt *r,*ic,*ics; 642 PetscInt nz,row,i,j,n=A->rmap->n,diag; 643 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 644 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 645 MatScalar *pv,*rtmp,*pc,multiplier,d; 646 const MatScalar *v,*aa=a->a; 647 PetscReal rs=0.0; 648 LUShift_Ctx sctx; 649 PetscInt newshift,*ddiag; 650 651 PetscFunctionBegin; 652 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 653 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 654 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 655 ics = ic; 656 657 /* initialize shift context sctx */ 658 sctx.nshift = 0; 659 sctx.nshift_max = 0; 660 sctx.shift_top = 0.0; 661 sctx.shift_lo = 0.0; 662 sctx.shift_hi = 0.0; 663 sctx.shift_fraction = 0.0; 664 sctx.shift_amount = 0.0; 665 666 /* if both shift schemes are chosen by user, only use info->shiftpd */ 667 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 668 ddiag = a->diag; 669 sctx.shift_top = info->zeropivot; 670 for (i=0; i<n; i++) { 671 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 672 d = (aa)[ddiag[i]]; 673 rs = -PetscAbsScalar(d) - PetscRealPart(d); 674 v = aa+ai[i]; 675 nz = ai[i+1] - ai[i]; 676 for (j=0; j<nz; j++) 677 rs += PetscAbsScalar(v[j]); 678 if (rs>sctx.shift_top) sctx.shift_top = rs; 679 } 680 sctx.shift_top *= 1.1; 681 sctx.nshift_max = 5; 682 sctx.shift_lo = 0.; 683 sctx.shift_hi = 1.; 684 } 685 686 do { 687 sctx.lushift = PETSC_FALSE; 688 for (i=0; i<n; i++){ 689 nz = bi[i+1] - bi[i]; 690 bjtmp = bj + bi[i]; 691 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 692 693 /* load in initial (unfactored row) */ 694 nz = ai[r[i]+1] - ai[r[i]]; 695 ajtmp = aj + ai[r[i]]; 696 v = aa + ai[r[i]]; 697 for (j=0; j<nz; j++) { 698 rtmp[ics[ajtmp[j]]] = v[j]; 699 } 700 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 701 /* if (sctx.shift_amount > 0.0) printf("row %d, shift %g\n",i,sctx.shift_amount); */ 702 703 row = *bjtmp++; 704 while (row < i) { 705 pc = rtmp + row; 706 if (*pc != 0.0) { 707 pv = b->a + diag_offset[row]; 708 pj = b->j + diag_offset[row] + 1; 709 multiplier = *pc / *pv++; 710 *pc = multiplier; 711 nz = bi[row+1] - diag_offset[row] - 1; 712 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 713 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 714 } 715 row = *bjtmp++; 716 } 717 /* finished row so stick it into b->a */ 718 pv = b->a + bi[i] ; 719 pj = b->j + bi[i] ; 720 nz = bi[i+1] - bi[i]; 721 diag = diag_offset[i] - bi[i]; 722 rs = 0.0; 723 for (j=0; j<nz; j++) { 724 pv[j] = rtmp[pj[j]]; 725 rs += PetscAbsScalar(pv[j]); 726 } 727 rs -= PetscAbsScalar(pv[diag]); 728 729 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 730 sctx.rs = rs; 731 sctx.pv = pv[diag]; 732 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 733 if (newshift == 1) break; 734 } 735 736 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 737 /* 738 * if no shift in this attempt & shifting & started shifting & can refine, 739 * then try lower shift 740 */ 741 sctx.shift_hi = sctx.shift_fraction; 742 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 743 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 744 sctx.lushift = PETSC_TRUE; 745 sctx.nshift++; 746 } 747 } while (sctx.lushift); 748 749 /* invert diagonal entries for simplier triangular solves */ 750 for (i=0; i<n; i++) { 751 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 752 } 753 ierr = PetscFree(rtmp);CHKERRQ(ierr); 754 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 755 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 756 if (b->inode.use) { 757 C->ops->solve = MatSolve_SeqAIJ_Inode; 758 } else { 759 PetscTruth row_identity, col_identity; 760 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 761 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 762 if (row_identity && col_identity) { 763 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 764 } else { 765 C->ops->solve = MatSolve_SeqAIJ; 766 } 767 } 768 C->ops->solveadd = MatSolveAdd_SeqAIJ; 769 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 770 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 771 C->ops->matsolve = MatMatSolve_SeqAIJ; 772 C->assembled = PETSC_TRUE; 773 C->preallocated = PETSC_TRUE; 774 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 775 if (sctx.nshift){ 776 if (info->shiftpd) { 777 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 778 } else if (info->shiftnz) { 779 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 780 } 781 } 782 PetscFunctionReturn(0); 783 } 784 785 /* 786 This routine implements inplace ILU(0) with row or/and column permutations. 787 Input: 788 A - original matrix 789 Output; 790 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 791 a->j (col index) is permuted by the inverse of colperm, then sorted 792 a->a reordered accordingly with a->j 793 a->diag (ptr to diagonal elements) is updated. 794 */ 795 #undef __FUNCT__ 796 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 797 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 798 { 799 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 800 IS isrow = a->row,isicol = a->icol; 801 PetscErrorCode ierr; 802 const PetscInt *r,*ic,*ics; 803 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 804 PetscInt *ajtmp,nz,row; 805 PetscInt *diag = a->diag,nbdiag,*pj; 806 PetscScalar *rtmp,*pc,multiplier,d; 807 MatScalar *v,*pv; 808 PetscReal rs; 809 LUShift_Ctx sctx; 810 PetscInt newshift; 811 812 PetscFunctionBegin; 813 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 814 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 815 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 816 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 817 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 818 ics = ic; 819 820 sctx.shift_top = 0.; 821 sctx.nshift_max = 0; 822 sctx.shift_lo = 0.; 823 sctx.shift_hi = 0.; 824 sctx.shift_fraction = 0.; 825 826 /* if both shift schemes are chosen by user, only use info->shiftpd */ 827 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 828 sctx.shift_top = 0.; 829 for (i=0; i<n; i++) { 830 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 831 d = (a->a)[diag[i]]; 832 rs = -PetscAbsScalar(d) - PetscRealPart(d); 833 v = a->a+ai[i]; 834 nz = ai[i+1] - ai[i]; 835 for (j=0; j<nz; j++) 836 rs += PetscAbsScalar(v[j]); 837 if (rs>sctx.shift_top) sctx.shift_top = rs; 838 } 839 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 840 sctx.shift_top *= 1.1; 841 sctx.nshift_max = 5; 842 sctx.shift_lo = 0.; 843 sctx.shift_hi = 1.; 844 } 845 846 sctx.shift_amount = 0.; 847 sctx.nshift = 0; 848 do { 849 sctx.lushift = PETSC_FALSE; 850 for (i=0; i<n; i++){ 851 /* load in initial unfactored row */ 852 nz = ai[r[i]+1] - ai[r[i]]; 853 ajtmp = aj + ai[r[i]]; 854 v = a->a + ai[r[i]]; 855 /* sort permuted ajtmp and values v accordingly */ 856 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 857 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 858 859 diag[r[i]] = ai[r[i]]; 860 for (j=0; j<nz; j++) { 861 rtmp[ajtmp[j]] = v[j]; 862 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 863 } 864 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 865 866 row = *ajtmp++; 867 while (row < i) { 868 pc = rtmp + row; 869 if (*pc != 0.0) { 870 pv = a->a + diag[r[row]]; 871 pj = aj + diag[r[row]] + 1; 872 873 multiplier = *pc / *pv++; 874 *pc = multiplier; 875 nz = ai[r[row]+1] - diag[r[row]] - 1; 876 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 877 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 878 } 879 row = *ajtmp++; 880 } 881 /* finished row so overwrite it onto a->a */ 882 pv = a->a + ai[r[i]] ; 883 pj = aj + ai[r[i]] ; 884 nz = ai[r[i]+1] - ai[r[i]]; 885 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 886 887 rs = 0.0; 888 for (j=0; j<nz; j++) { 889 pv[j] = rtmp[pj[j]]; 890 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 891 } 892 893 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 894 sctx.rs = rs; 895 sctx.pv = pv[nbdiag]; 896 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 897 if (newshift == 1) break; 898 } 899 900 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 901 /* 902 * if no shift in this attempt & shifting & started shifting & can refine, 903 * then try lower shift 904 */ 905 sctx.shift_hi = sctx.shift_fraction; 906 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 907 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 908 sctx.lushift = PETSC_TRUE; 909 sctx.nshift++; 910 } 911 } while (sctx.lushift); 912 913 /* invert diagonal entries for simplier triangular solves */ 914 for (i=0; i<n; i++) { 915 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 916 } 917 918 ierr = PetscFree(rtmp);CHKERRQ(ierr); 919 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 920 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 921 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 922 A->ops->solveadd = MatSolveAdd_SeqAIJ; 923 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 924 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 925 A->assembled = PETSC_TRUE; 926 A->preallocated = PETSC_TRUE; 927 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 928 if (sctx.nshift){ 929 if (info->shiftpd) { 930 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 931 } else if (info->shiftnz) { 932 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 933 } 934 } 935 PetscFunctionReturn(0); 936 } 937 938 /* ----------------------------------------------------------- */ 939 #undef __FUNCT__ 940 #define __FUNCT__ "MatLUFactor_SeqAIJ" 941 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 942 { 943 PetscErrorCode ierr; 944 Mat C; 945 946 PetscFunctionBegin; 947 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 948 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 949 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 950 A->ops->solve = C->ops->solve; 951 A->ops->solvetranspose = C->ops->solvetranspose; 952 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 953 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 954 PetscFunctionReturn(0); 955 } 956 /* ----------------------------------------------------------- */ 957 958 959 #undef __FUNCT__ 960 #define __FUNCT__ "MatSolve_SeqAIJ" 961 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 962 { 963 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 964 IS iscol = a->col,isrow = a->row; 965 PetscErrorCode ierr; 966 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 967 PetscInt nz; 968 const PetscInt *rout,*cout,*r,*c; 969 PetscScalar *x,*tmp,*tmps,sum; 970 const PetscScalar *b; 971 const MatScalar *aa = a->a,*v; 972 973 PetscFunctionBegin; 974 if (!n) PetscFunctionReturn(0); 975 976 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 977 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 978 tmp = a->solve_work; 979 980 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 981 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 982 983 /* forward solve the lower triangular */ 984 tmp[0] = b[*r++]; 985 tmps = tmp; 986 for (i=1; i<n; i++) { 987 v = aa + ai[i] ; 988 vi = aj + ai[i] ; 989 nz = a->diag[i] - ai[i]; 990 sum = b[*r++]; 991 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 992 tmp[i] = sum; 993 } 994 995 /* backward solve the upper triangular */ 996 for (i=n-1; i>=0; i--){ 997 v = aa + a->diag[i] + 1; 998 vi = aj + a->diag[i] + 1; 999 nz = ai[i+1] - a->diag[i] - 1; 1000 sum = tmp[i]; 1001 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1002 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1003 } 1004 1005 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1006 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1007 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1008 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1009 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1010 PetscFunctionReturn(0); 1011 } 1012 1013 #undef __FUNCT__ 1014 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1015 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1016 { 1017 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1018 IS iscol = a->col,isrow = a->row; 1019 PetscErrorCode ierr; 1020 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1021 PetscInt nz,neq; 1022 const PetscInt *rout,*cout,*r,*c; 1023 PetscScalar *x,*b,*tmp,*tmps,sum; 1024 const MatScalar *aa = a->a,*v; 1025 PetscTruth bisdense,xisdense; 1026 1027 PetscFunctionBegin; 1028 if (!n) PetscFunctionReturn(0); 1029 1030 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1031 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1032 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1033 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1034 1035 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1036 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1037 1038 tmp = a->solve_work; 1039 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1040 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1041 1042 for (neq=0; neq<B->cmap->n; neq++){ 1043 /* forward solve the lower triangular */ 1044 tmp[0] = b[r[0]]; 1045 tmps = tmp; 1046 for (i=1; i<n; i++) { 1047 v = aa + ai[i] ; 1048 vi = aj + ai[i] ; 1049 nz = a->diag[i] - ai[i]; 1050 sum = b[r[i]]; 1051 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1052 tmp[i] = sum; 1053 } 1054 /* backward solve the upper triangular */ 1055 for (i=n-1; i>=0; i--){ 1056 v = aa + a->diag[i] + 1; 1057 vi = aj + a->diag[i] + 1; 1058 nz = ai[i+1] - a->diag[i] - 1; 1059 sum = tmp[i]; 1060 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1061 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1062 } 1063 1064 b += n; 1065 x += n; 1066 } 1067 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1068 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1069 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1070 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1071 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1072 PetscFunctionReturn(0); 1073 } 1074 1075 #undef __FUNCT__ 1076 #define __FUNCT__ "MatMatSolve_SeqAIJ_newdatastruct" 1077 PetscErrorCode MatMatSolve_SeqAIJ_newdatastruct(Mat A,Mat B,Mat X) 1078 { 1079 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1080 IS iscol = a->col,isrow = a->row; 1081 PetscErrorCode ierr; 1082 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1083 PetscInt nz,neq; 1084 const PetscInt *rout,*cout,*r,*c; 1085 PetscScalar *x,*b,*tmp,sum; 1086 const MatScalar *aa = a->a,*v; 1087 PetscTruth bisdense,xisdense; 1088 1089 PetscFunctionBegin; 1090 if (!n) PetscFunctionReturn(0); 1091 1092 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1093 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1094 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1095 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1096 1097 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1098 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1099 1100 tmp = a->solve_work; 1101 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1102 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1103 1104 for (neq=0; neq<B->cmap->n; neq++){ 1105 /* forward solve the lower triangular */ 1106 tmp[0] = b[r[0]]; 1107 v = aa; 1108 vi = aj; 1109 for (i=1; i<n; i++) { 1110 nz = ai[i+1] - ai[i]; 1111 sum = b[r[i]]; 1112 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1113 tmp[i] = sum; 1114 v += nz; vi += nz; 1115 } 1116 1117 /* backward solve the upper triangular */ 1118 for (i=n-1; i>=0; i--){ 1119 v = aa + adiag[i+1]+1; 1120 vi = aj + adiag[i+1]+1; 1121 nz = adiag[i]-adiag[i+1]-1; 1122 sum = tmp[i]; 1123 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1124 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1125 } 1126 1127 b += n; 1128 x += n; 1129 } 1130 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1131 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1132 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1133 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1134 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1135 PetscFunctionReturn(0); 1136 } 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1140 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1141 { 1142 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1143 IS iscol = a->col,isrow = a->row; 1144 PetscErrorCode ierr; 1145 const PetscInt *r,*c,*rout,*cout; 1146 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1147 PetscInt nz,row; 1148 PetscScalar *x,*b,*tmp,*tmps,sum; 1149 const MatScalar *aa = a->a,*v; 1150 1151 PetscFunctionBegin; 1152 if (!n) PetscFunctionReturn(0); 1153 1154 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1155 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1156 tmp = a->solve_work; 1157 1158 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1159 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1160 1161 /* forward solve the lower triangular */ 1162 tmp[0] = b[*r++]; 1163 tmps = tmp; 1164 for (row=1; row<n; row++) { 1165 i = rout[row]; /* permuted row */ 1166 v = aa + ai[i] ; 1167 vi = aj + ai[i] ; 1168 nz = a->diag[i] - ai[i]; 1169 sum = b[*r++]; 1170 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1171 tmp[row] = sum; 1172 } 1173 1174 /* backward solve the upper triangular */ 1175 for (row=n-1; row>=0; row--){ 1176 i = rout[row]; /* permuted row */ 1177 v = aa + a->diag[i] + 1; 1178 vi = aj + a->diag[i] + 1; 1179 nz = ai[i+1] - a->diag[i] - 1; 1180 sum = tmp[row]; 1181 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1182 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1183 } 1184 1185 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1186 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1187 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1188 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1189 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1190 PetscFunctionReturn(0); 1191 } 1192 1193 /* ----------------------------------------------------------- */ 1194 #include "../src/mat/impls/aij/seq/ftn-kernels/fsolve.h" 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 1197 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 1198 { 1199 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 PetscInt n = A->rmap->n; 1202 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1203 PetscScalar *x; 1204 const PetscScalar *b; 1205 const MatScalar *aa = a->a; 1206 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1207 PetscInt adiag_i,i,nz,ai_i; 1208 const PetscInt *vi; 1209 const MatScalar *v; 1210 PetscScalar sum; 1211 #endif 1212 1213 PetscFunctionBegin; 1214 if (!n) PetscFunctionReturn(0); 1215 1216 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1217 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1218 1219 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1220 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1221 #else 1222 /* forward solve the lower triangular */ 1223 x[0] = b[0]; 1224 for (i=1; i<n; i++) { 1225 ai_i = ai[i]; 1226 v = aa + ai_i; 1227 vi = aj + ai_i; 1228 nz = adiag[i] - ai_i; 1229 sum = b[i]; 1230 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1231 x[i] = sum; 1232 } 1233 1234 /* backward solve the upper triangular */ 1235 for (i=n-1; i>=0; i--){ 1236 adiag_i = adiag[i]; 1237 v = aa + adiag_i + 1; 1238 vi = aj + adiag_i + 1; 1239 nz = ai[i+1] - adiag_i - 1; 1240 sum = x[i]; 1241 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1242 x[i] = sum*aa[adiag_i]; 1243 } 1244 #endif 1245 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1246 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1247 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 #undef __FUNCT__ 1252 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1253 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1254 { 1255 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1256 IS iscol = a->col,isrow = a->row; 1257 PetscErrorCode ierr; 1258 PetscInt i, n = A->rmap->n,j; 1259 PetscInt nz; 1260 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1261 PetscScalar *x,*tmp,sum; 1262 const PetscScalar *b; 1263 const MatScalar *aa = a->a,*v; 1264 1265 PetscFunctionBegin; 1266 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1267 1268 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1269 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1270 tmp = a->solve_work; 1271 1272 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1273 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1274 1275 /* forward solve the lower triangular */ 1276 tmp[0] = b[*r++]; 1277 for (i=1; i<n; i++) { 1278 v = aa + ai[i] ; 1279 vi = aj + ai[i] ; 1280 nz = a->diag[i] - ai[i]; 1281 sum = b[*r++]; 1282 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1283 tmp[i] = sum; 1284 } 1285 1286 /* backward solve the upper triangular */ 1287 for (i=n-1; i>=0; i--){ 1288 v = aa + a->diag[i] + 1; 1289 vi = aj + a->diag[i] + 1; 1290 nz = ai[i+1] - a->diag[i] - 1; 1291 sum = tmp[i]; 1292 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1293 tmp[i] = sum*aa[a->diag[i]]; 1294 x[*c--] += tmp[i]; 1295 } 1296 1297 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1298 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1299 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1300 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1301 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1302 1303 PetscFunctionReturn(0); 1304 } 1305 1306 #undef __FUNCT__ 1307 #define __FUNCT__ "MatSolveAdd_SeqAIJ_newdatastruct" 1308 PetscErrorCode MatSolveAdd_SeqAIJ_newdatastruct(Mat A,Vec bb,Vec yy,Vec xx) 1309 { 1310 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1311 IS iscol = a->col,isrow = a->row; 1312 PetscErrorCode ierr; 1313 PetscInt i, n = A->rmap->n,j; 1314 PetscInt nz; 1315 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1316 PetscScalar *x,*tmp,sum; 1317 const PetscScalar *b; 1318 const MatScalar *aa = a->a,*v; 1319 1320 PetscFunctionBegin; 1321 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1322 1323 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1324 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1325 tmp = a->solve_work; 1326 1327 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1328 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1329 1330 /* forward solve the lower triangular */ 1331 tmp[0] = b[r[0]]; 1332 v = aa; 1333 vi = aj; 1334 for (i=1; i<n; i++) { 1335 nz = ai[i+1] - ai[i]; 1336 sum = b[r[i]]; 1337 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1338 tmp[i] = sum; 1339 v += nz; vi += nz; 1340 } 1341 1342 /* backward solve the upper triangular */ 1343 v = aa + adiag[n-1]; 1344 vi = aj + adiag[n-1]; 1345 for (i=n-1; i>=0; i--){ 1346 nz = adiag[i] - adiag[i+1] - 1; 1347 sum = tmp[i]; 1348 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1349 tmp[i] = sum*v[nz]; 1350 x[c[i]] += tmp[i]; 1351 v += nz+1; vi += nz+1; 1352 } 1353 1354 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1355 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1356 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1357 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1358 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1359 1360 PetscFunctionReturn(0); 1361 } 1362 1363 #undef __FUNCT__ 1364 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1365 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1366 { 1367 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1368 IS iscol = a->col,isrow = a->row; 1369 PetscErrorCode ierr; 1370 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1371 PetscInt i,n = A->rmap->n,j; 1372 PetscInt nz; 1373 PetscScalar *x,*tmp,s1; 1374 const MatScalar *aa = a->a,*v; 1375 const PetscScalar *b; 1376 1377 PetscFunctionBegin; 1378 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1379 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1380 tmp = a->solve_work; 1381 1382 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1383 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1384 1385 /* copy the b into temp work space according to permutation */ 1386 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1387 1388 /* forward solve the U^T */ 1389 for (i=0; i<n; i++) { 1390 v = aa + diag[i] ; 1391 vi = aj + diag[i] + 1; 1392 nz = ai[i+1] - diag[i] - 1; 1393 s1 = tmp[i]; 1394 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1395 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1396 tmp[i] = s1; 1397 } 1398 1399 /* backward solve the L^T */ 1400 for (i=n-1; i>=0; i--){ 1401 v = aa + diag[i] - 1 ; 1402 vi = aj + diag[i] - 1 ; 1403 nz = diag[i] - ai[i]; 1404 s1 = tmp[i]; 1405 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1406 } 1407 1408 /* copy tmp into x according to permutation */ 1409 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1410 1411 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1412 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1413 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1414 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1415 1416 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1417 PetscFunctionReturn(0); 1418 } 1419 1420 #undef __FUNCT__ 1421 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_newdatastruct" 1422 PetscErrorCode MatSolveTranspose_SeqAIJ_newdatastruct(Mat A,Vec bb,Vec xx) 1423 { 1424 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1425 IS iscol = a->col,isrow = a->row; 1426 PetscErrorCode ierr; 1427 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1428 PetscInt i,n = A->rmap->n,j; 1429 PetscInt nz; 1430 PetscScalar *x,*tmp,s1; 1431 const MatScalar *aa = a->a,*v; 1432 const PetscScalar *b; 1433 1434 PetscFunctionBegin; 1435 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1436 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1437 tmp = a->solve_work; 1438 1439 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1440 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1441 1442 /* copy the b into temp work space according to permutation */ 1443 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1444 1445 /* forward solve the U^T */ 1446 for (i=0; i<n; i++) { 1447 v = aa + adiag[i+1] + 1; 1448 vi = aj + adiag[i+1] + 1; 1449 nz = adiag[i] - adiag[i+1] - 1; 1450 s1 = tmp[i]; 1451 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1452 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1453 tmp[i] = s1; 1454 } 1455 1456 /* backward solve the L^T */ 1457 for (i=n-1; i>=0; i--){ 1458 v = aa + ai[i]; 1459 vi = aj + ai[i]; 1460 nz = ai[i+1] - ai[i]; 1461 s1 = tmp[i]; 1462 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1463 } 1464 1465 /* copy tmp into x according to permutation */ 1466 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1467 1468 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1469 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1470 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1471 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1472 1473 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1474 PetscFunctionReturn(0); 1475 } 1476 1477 #undef __FUNCT__ 1478 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1479 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1480 { 1481 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1482 IS iscol = a->col,isrow = a->row; 1483 PetscErrorCode ierr; 1484 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1485 PetscInt i,n = A->rmap->n,j; 1486 PetscInt nz; 1487 PetscScalar *x,*tmp,s1; 1488 const MatScalar *aa = a->a,*v; 1489 const PetscScalar *b; 1490 1491 PetscFunctionBegin; 1492 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1493 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1494 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1495 tmp = a->solve_work; 1496 1497 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1498 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1499 1500 /* copy the b into temp work space according to permutation */ 1501 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1502 1503 /* forward solve the U^T */ 1504 for (i=0; i<n; i++) { 1505 v = aa + diag[i] ; 1506 vi = aj + diag[i] + 1; 1507 nz = ai[i+1] - diag[i] - 1; 1508 s1 = tmp[i]; 1509 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1510 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1511 tmp[i] = s1; 1512 } 1513 1514 /* backward solve the L^T */ 1515 for (i=n-1; i>=0; i--){ 1516 v = aa + diag[i] - 1 ; 1517 vi = aj + diag[i] - 1 ; 1518 nz = diag[i] - ai[i]; 1519 s1 = tmp[i]; 1520 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1521 } 1522 1523 /* copy tmp into x according to permutation */ 1524 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1525 1526 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1527 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1528 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1529 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1530 1531 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 #undef __FUNCT__ 1536 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_newdatastruct" 1537 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_newdatastruct(Mat A,Vec bb,Vec zz,Vec xx) 1538 { 1539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1540 IS iscol = a->col,isrow = a->row; 1541 PetscErrorCode ierr; 1542 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1543 PetscInt i,n = A->rmap->n,j; 1544 PetscInt nz; 1545 PetscScalar *x,*tmp,s1; 1546 const MatScalar *aa = a->a,*v; 1547 const PetscScalar *b; 1548 1549 PetscFunctionBegin; 1550 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1551 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1552 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1553 tmp = a->solve_work; 1554 1555 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1556 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1557 1558 /* copy the b into temp work space according to permutation */ 1559 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1560 1561 /* forward solve the U^T */ 1562 for (i=0; i<n; i++) { 1563 v = aa + adiag[i+1] + 1; 1564 vi = aj + adiag[i+1] + 1; 1565 nz = adiag[i] - adiag[i+1] - 1; 1566 s1 = tmp[i]; 1567 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1568 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1569 tmp[i] = s1; 1570 } 1571 1572 1573 /* backward solve the L^T */ 1574 for (i=n-1; i>=0; i--){ 1575 v = aa + ai[i] ; 1576 vi = aj + ai[i]; 1577 nz = ai[i+1] - ai[i]; 1578 s1 = tmp[i]; 1579 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1580 } 1581 1582 /* copy tmp into x according to permutation */ 1583 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1584 1585 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1586 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1587 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1588 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1589 1590 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1591 PetscFunctionReturn(0); 1592 } 1593 1594 /* ----------------------------------------------------------------*/ 1595 1596 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscTruth); 1597 1598 /* 1599 ilu() under revised new data structure. 1600 Factored arrays bj and ba are stored as 1601 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1602 1603 bi=fact->i is an array of size n+1, in which 1604 bi+ 1605 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1606 bi[n]: points to L(n-1,n-1)+1 1607 1608 bdiag=fact->diag is an array of size n+1,in which 1609 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1610 bdiag[n]: points to entry of U(n-1,0)-1 1611 1612 U(i,:) contains bdiag[i] as its last entry, i.e., 1613 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1614 */ 1615 #undef __FUNCT__ 1616 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct" 1617 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1618 { 1619 1620 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1621 PetscErrorCode ierr; 1622 PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1623 PetscInt i,j,nz,*bi,*bj,*bdiag; 1624 PetscTruth missing; 1625 IS isicol; 1626 1627 PetscFunctionBegin; 1628 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1629 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1630 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1631 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1632 1633 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1634 b = (Mat_SeqAIJ*)(fact)->data; 1635 1636 /* allocate matrix arrays for new data structure */ 1637 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1638 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1639 b->singlemalloc = PETSC_TRUE; 1640 if (!b->diag){ 1641 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1642 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1643 } 1644 bdiag = b->diag; 1645 1646 if (n > 0) { 1647 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1648 } 1649 1650 /* set bi and bj with new data structure */ 1651 bi = b->i; 1652 bj = b->j; 1653 1654 /* L part */ 1655 bi[0] = 0; 1656 for (i=0; i<n; i++){ 1657 nz = adiag[i] - ai[i]; 1658 bi[i+1] = bi[i] + nz; 1659 aj = a->j + ai[i]; 1660 for (j=0; j<nz; j++){ 1661 *bj = aj[j]; bj++; 1662 } 1663 } 1664 1665 /* U part */ 1666 bdiag[n] = bi[n]-1; 1667 for (i=n-1; i>=0; i--){ 1668 nz = ai[i+1] - adiag[i] - 1; 1669 aj = a->j + adiag[i] + 1; 1670 for (j=0; j<nz; j++){ 1671 *bj = aj[j]; bj++; 1672 } 1673 /* diag[i] */ 1674 *bj = i; bj++; 1675 bdiag[i] = bdiag[i+1] + nz + 1; 1676 } 1677 1678 fact->factor = MAT_FACTOR_ILU; 1679 fact->info.factor_mallocs = 0; 1680 fact->info.fill_ratio_given = info->fill; 1681 fact->info.fill_ratio_needed = 1.0; 1682 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 1683 1684 b = (Mat_SeqAIJ*)(fact)->data; 1685 b->row = isrow; 1686 b->col = iscol; 1687 b->icol = isicol; 1688 ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1689 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1690 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1691 PetscFunctionReturn(0); 1692 } 1693 1694 #undef __FUNCT__ 1695 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_newdatastruct" 1696 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_newdatastruct(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1697 { 1698 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1699 IS isicol; 1700 PetscErrorCode ierr; 1701 const PetscInt *r,*ic; 1702 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1703 PetscInt *bi,*cols,nnz,*cols_lvl; 1704 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1705 PetscInt i,levels,diagonal_fill; 1706 PetscTruth col_identity,row_identity; 1707 PetscReal f; 1708 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1709 PetscBT lnkbt; 1710 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1711 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1712 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1713 1714 PetscFunctionBegin; 1715 /* printf("MatILUFactorSymbolic_SeqAIJ_newdatastruct ...\n"); */ 1716 levels = (PetscInt)info->levels; 1717 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1718 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1719 1720 if (!levels && row_identity && col_identity) { 1721 /* special case: ilu(0) with natural ordering */ 1722 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1723 ierr = Mat_CheckInode_FactorLU(fact,PETSC_FALSE);CHKERRQ(ierr); 1724 PetscFunctionReturn(0); 1725 } 1726 1727 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1728 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1729 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1730 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1731 1732 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1733 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1734 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1735 bi[0] = bdiag[0] = 0; 1736 1737 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1738 1739 /* create a linked list for storing column indices of the active row */ 1740 nlnk = n + 1; 1741 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1742 1743 /* initial FreeSpace size is f*(ai[n]+1) */ 1744 f = info->fill; 1745 diagonal_fill = (PetscInt)info->diagonal_fill; 1746 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1747 current_space = free_space; 1748 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1749 current_space_lvl = free_space_lvl; 1750 1751 for (i=0; i<n; i++) { 1752 nzi = 0; 1753 /* copy current row into linked list */ 1754 nnz = ai[r[i]+1] - ai[r[i]]; 1755 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1756 cols = aj + ai[r[i]]; 1757 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1758 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1759 nzi += nlnk; 1760 1761 /* make sure diagonal entry is included */ 1762 if (diagonal_fill && lnk[i] == -1) { 1763 fm = n; 1764 while (lnk[fm] < i) fm = lnk[fm]; 1765 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1766 lnk[fm] = i; 1767 lnk_lvl[i] = 0; 1768 nzi++; dcount++; 1769 } 1770 1771 /* add pivot rows into the active row */ 1772 nzbd = 0; 1773 prow = lnk[n]; 1774 while (prow < i) { 1775 nnz = bdiag[prow]; 1776 cols = bj_ptr[prow] + nnz + 1; 1777 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1778 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1779 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1780 nzi += nlnk; 1781 prow = lnk[prow]; 1782 nzbd++; 1783 } 1784 bdiag[i] = nzbd; 1785 bi[i+1] = bi[i] + nzi; 1786 1787 /* if free space is not available, make more free space */ 1788 if (current_space->local_remaining<nzi) { 1789 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1790 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1791 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1792 reallocs++; 1793 } 1794 1795 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1796 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1797 bj_ptr[i] = current_space->array; 1798 bjlvl_ptr[i] = current_space_lvl->array; 1799 1800 /* make sure the active row i has diagonal entry */ 1801 if (*(bj_ptr[i]+bdiag[i]) != i) { 1802 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1803 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1804 } 1805 1806 current_space->array += nzi; 1807 current_space->local_used += nzi; 1808 current_space->local_remaining -= nzi; 1809 current_space_lvl->array += nzi; 1810 current_space_lvl->local_used += nzi; 1811 current_space_lvl->local_remaining -= nzi; 1812 } 1813 1814 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1815 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1816 1817 /* destroy list of free space and other temporary arrays */ 1818 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1819 1820 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1821 ierr = PetscFreeSpaceContiguous_LU_v2(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1822 1823 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1824 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1825 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1826 1827 #if defined(PETSC_USE_INFO) 1828 { 1829 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1830 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1831 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1832 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1833 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1834 if (diagonal_fill) { 1835 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1836 } 1837 } 1838 #endif 1839 1840 /* put together the new matrix */ 1841 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1842 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1843 b = (Mat_SeqAIJ*)(fact)->data; 1844 b->free_a = PETSC_TRUE; 1845 b->free_ij = PETSC_TRUE; 1846 b->singlemalloc = PETSC_FALSE; 1847 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1848 b->j = bj; 1849 b->i = bi; 1850 b->diag = bdiag; 1851 b->ilen = 0; 1852 b->imax = 0; 1853 b->row = isrow; 1854 b->col = iscol; 1855 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1856 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1857 b->icol = isicol; 1858 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1859 /* In b structure: Free imax, ilen, old a, old j. 1860 Allocate bdiag, solve_work, new a, new j */ 1861 ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1862 b->maxnz = b->nz = bdiag[0]+1; 1863 (fact)->info.factor_mallocs = reallocs; 1864 (fact)->info.fill_ratio_given = f; 1865 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1866 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 1867 ierr = Mat_CheckInode_FactorLU(fact,PETSC_FALSE);CHKERRQ(ierr); 1868 PetscFunctionReturn(0); 1869 } 1870 1871 #undef __FUNCT__ 1872 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1873 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1874 { 1875 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1876 IS isicol; 1877 PetscErrorCode ierr; 1878 const PetscInt *r,*ic; 1879 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1880 PetscInt *bi,*cols,nnz,*cols_lvl; 1881 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1882 PetscInt i,levels,diagonal_fill; 1883 PetscTruth col_identity,row_identity; 1884 PetscReal f; 1885 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1886 PetscBT lnkbt; 1887 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1888 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1889 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1890 PetscTruth missing; 1891 PetscTruth newdatastruct=PETSC_FALSE; 1892 1893 PetscFunctionBegin; 1894 ierr = PetscOptionsGetTruth(PETSC_NULL,"-ilu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 1895 if(newdatastruct){ 1896 ierr = MatILUFactorSymbolic_SeqAIJ_newdatastruct(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1897 PetscFunctionReturn(0); 1898 } 1899 1900 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1901 f = info->fill; 1902 levels = (PetscInt)info->levels; 1903 diagonal_fill = (PetscInt)info->diagonal_fill; 1904 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1905 1906 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1907 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1908 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1909 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1910 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1911 1912 fact->factor = MAT_FACTOR_ILU; 1913 (fact)->info.factor_mallocs = 0; 1914 (fact)->info.fill_ratio_given = info->fill; 1915 (fact)->info.fill_ratio_needed = 1.0; 1916 b = (Mat_SeqAIJ*)(fact)->data; 1917 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1918 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1919 b->row = isrow; 1920 b->col = iscol; 1921 b->icol = isicol; 1922 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1923 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1924 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1925 ierr = MatILUFactorSymbolic_SeqAIJ_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1926 PetscFunctionReturn(0); 1927 } 1928 1929 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1930 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1931 1932 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1933 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1934 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1935 bi[0] = bdiag[0] = 0; 1936 1937 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1938 1939 /* create a linked list for storing column indices of the active row */ 1940 nlnk = n + 1; 1941 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1942 1943 /* initial FreeSpace size is f*(ai[n]+1) */ 1944 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1945 current_space = free_space; 1946 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1947 current_space_lvl = free_space_lvl; 1948 1949 for (i=0; i<n; i++) { 1950 nzi = 0; 1951 /* copy current row into linked list */ 1952 nnz = ai[r[i]+1] - ai[r[i]]; 1953 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1954 cols = aj + ai[r[i]]; 1955 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1956 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1957 nzi += nlnk; 1958 1959 /* make sure diagonal entry is included */ 1960 if (diagonal_fill && lnk[i] == -1) { 1961 fm = n; 1962 while (lnk[fm] < i) fm = lnk[fm]; 1963 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1964 lnk[fm] = i; 1965 lnk_lvl[i] = 0; 1966 nzi++; dcount++; 1967 } 1968 1969 /* add pivot rows into the active row */ 1970 nzbd = 0; 1971 prow = lnk[n]; 1972 while (prow < i) { 1973 nnz = bdiag[prow]; 1974 cols = bj_ptr[prow] + nnz + 1; 1975 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1976 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1977 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1978 nzi += nlnk; 1979 prow = lnk[prow]; 1980 nzbd++; 1981 } 1982 bdiag[i] = nzbd; 1983 bi[i+1] = bi[i] + nzi; 1984 1985 /* if free space is not available, make more free space */ 1986 if (current_space->local_remaining<nzi) { 1987 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1988 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1989 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1990 reallocs++; 1991 } 1992 1993 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1994 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1995 bj_ptr[i] = current_space->array; 1996 bjlvl_ptr[i] = current_space_lvl->array; 1997 1998 /* make sure the active row i has diagonal entry */ 1999 if (*(bj_ptr[i]+bdiag[i]) != i) { 2000 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 2001 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2002 } 2003 2004 current_space->array += nzi; 2005 current_space->local_used += nzi; 2006 current_space->local_remaining -= nzi; 2007 current_space_lvl->array += nzi; 2008 current_space_lvl->local_used += nzi; 2009 current_space_lvl->local_remaining -= nzi; 2010 } 2011 2012 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2013 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2014 2015 /* destroy list of free space and other temporary arrays */ 2016 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2017 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2018 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2019 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2020 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2021 2022 #if defined(PETSC_USE_INFO) 2023 { 2024 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2025 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2026 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2027 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2028 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2029 if (diagonal_fill) { 2030 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2031 } 2032 } 2033 #endif 2034 2035 /* put together the new matrix */ 2036 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2037 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2038 b = (Mat_SeqAIJ*)(fact)->data; 2039 b->free_a = PETSC_TRUE; 2040 b->free_ij = PETSC_TRUE; 2041 b->singlemalloc = PETSC_FALSE; 2042 ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2043 b->j = bj; 2044 b->i = bi; 2045 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2046 b->diag = bdiag; 2047 b->ilen = 0; 2048 b->imax = 0; 2049 b->row = isrow; 2050 b->col = iscol; 2051 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2052 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2053 b->icol = isicol; 2054 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2055 /* In b structure: Free imax, ilen, old a, old j. 2056 Allocate bdiag, solve_work, new a, new j */ 2057 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2058 b->maxnz = b->nz = bi[n] ; 2059 (fact)->info.factor_mallocs = reallocs; 2060 (fact)->info.fill_ratio_given = f; 2061 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2062 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 2063 ierr = MatILUFactorSymbolic_SeqAIJ_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 2064 PetscFunctionReturn(0); 2065 } 2066 2067 #undef __FUNCT__ 2068 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_newdatastruct" 2069 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_newdatastruct(Mat B,Mat A,const MatFactorInfo *info) 2070 { 2071 Mat C = B; 2072 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2073 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2074 IS ip=b->row,iip = b->icol; 2075 PetscErrorCode ierr; 2076 const PetscInt *rip,*riip; 2077 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2078 PetscInt *ai=a->i,*aj=a->j; 2079 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2080 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2081 PetscTruth perm_identity; 2082 2083 LUShift_Ctx sctx; 2084 PetscInt newshift; 2085 PetscReal rs; 2086 MatScalar d,*v; 2087 2088 PetscFunctionBegin; 2089 /* MatPivotSetUp(): initialize shift context sctx */ 2090 ierr = PetscMemzero(&sctx,sizeof(LUShift_Ctx));CHKERRQ(ierr); 2091 2092 /* if both shift schemes are chosen by user, only use info->shiftpd */ 2093 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 2094 sctx.shift_top = info->zeropivot; 2095 for (i=0; i<mbs; i++) { 2096 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2097 d = (aa)[a->diag[i]]; 2098 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2099 v = aa+ai[i]; 2100 nz = ai[i+1] - ai[i]; 2101 for (j=0; j<nz; j++) 2102 rs += PetscAbsScalar(v[j]); 2103 if (rs>sctx.shift_top) sctx.shift_top = rs; 2104 } 2105 sctx.shift_top *= 1.1; 2106 sctx.nshift_max = 5; 2107 sctx.shift_lo = 0.; 2108 sctx.shift_hi = 1.; 2109 } 2110 2111 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2112 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2113 2114 /* allocate working arrays 2115 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2116 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2117 */ 2118 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 2119 2120 do { 2121 sctx.lushift = PETSC_FALSE; 2122 2123 for (i=0; i<mbs; i++) c2r[i] = mbs; 2124 il[0] = 0; 2125 2126 for (k = 0; k<mbs; k++){ 2127 /* zero rtmp */ 2128 nz = bi[k+1] - bi[k]; 2129 bjtmp = bj + bi[k]; 2130 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2131 2132 /* load in initial unfactored row */ 2133 bval = ba + bi[k]; 2134 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2135 for (j = jmin; j < jmax; j++){ 2136 col = riip[aj[j]]; 2137 if (col >= k){ /* only take upper triangular entry */ 2138 rtmp[col] = aa[j]; 2139 *bval++ = 0.0; /* for in-place factorization */ 2140 } 2141 } 2142 /* shift the diagonal of the matrix: ZeropivotApply() */ 2143 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2144 2145 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2146 dk = rtmp[k]; 2147 i = c2r[k]; /* first row to be added to k_th row */ 2148 2149 while (i < k){ 2150 nexti = c2r[i]; /* next row to be added to k_th row */ 2151 2152 /* compute multiplier, update diag(k) and U(i,k) */ 2153 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2154 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2155 dk += uikdi*ba[ili]; /* update diag[k] */ 2156 ba[ili] = uikdi; /* -U(i,k) */ 2157 2158 /* add multiple of row i to k-th row */ 2159 jmin = ili + 1; jmax = bi[i+1]; 2160 if (jmin < jmax){ 2161 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2162 /* update il and c2r for row i */ 2163 il[i] = jmin; 2164 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2165 } 2166 i = nexti; 2167 } 2168 2169 /* copy data into U(k,:) */ 2170 rs = 0.0; 2171 jmin = bi[k]; jmax = bi[k+1]-1; 2172 if (jmin < jmax) { 2173 for (j=jmin; j<jmax; j++){ 2174 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2175 } 2176 /* add the k-th row into il and c2r */ 2177 il[k] = jmin; 2178 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2179 } 2180 2181 /* MatPivotCheck() */ 2182 sctx.rs = rs; 2183 sctx.pv = dk; 2184 if (info->shiftnz){ 2185 ierr = MatPivotCheck_nz(info,sctx,k,newshift);CHKERRQ(ierr); 2186 } else if (info->shiftpd){ 2187 ierr = MatPivotCheck_pd(info,sctx,k,newshift);CHKERRQ(ierr); 2188 } else if (info->shiftinblocks){ 2189 ierr = MatPivotCheck_inblocks(info,sctx,k,newshift);CHKERRQ(ierr); 2190 } else { 2191 ierr = MatPivotCheck_none(info,sctx,k,newshift);CHKERRQ(ierr); 2192 } 2193 dk = sctx.pv; 2194 if (newshift == 1) break; 2195 2196 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2197 } 2198 } while (sctx.lushift); 2199 2200 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2201 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2202 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2203 2204 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2205 if (perm_identity){ 2206 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_newdatastruct; 2207 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_newdatastruct; 2208 B->ops->forwardsolve = 0; 2209 B->ops->backwardsolve = 0; 2210 } else { 2211 B->ops->solve = MatSolve_SeqSBAIJ_1_newdatastruct; 2212 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_newdatastruct; 2213 B->ops->forwardsolve = 0; 2214 B->ops->backwardsolve = 0; 2215 } 2216 2217 C->assembled = PETSC_TRUE; 2218 C->preallocated = PETSC_TRUE; 2219 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2220 2221 /* MatPivotView() */ 2222 if (sctx.nshift){ 2223 if (info->shiftpd) { 2224 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2225 } else if (info->shiftnz) { 2226 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2227 } else if (info->shiftinblocks){ 2228 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftinblocks);CHKERRQ(ierr); 2229 } 2230 } 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2236 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2237 { 2238 Mat C = B; 2239 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2240 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2241 IS ip=b->row,iip = b->icol; 2242 PetscErrorCode ierr; 2243 const PetscInt *rip,*riip; 2244 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2245 PetscInt *ai=a->i,*aj=a->j; 2246 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2247 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2248 PetscReal zeropivot,rs,shiftnz; 2249 PetscReal shiftpd; 2250 ChShift_Ctx sctx; 2251 PetscInt newshift; 2252 PetscTruth perm_identity; 2253 2254 PetscFunctionBegin; 2255 shiftnz = info->shiftnz; 2256 shiftpd = info->shiftpd; 2257 zeropivot = info->zeropivot; 2258 2259 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2260 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2261 2262 /* initialization */ 2263 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2264 sctx.shift_amount = 0; 2265 sctx.nshift = 0; 2266 do { 2267 sctx.chshift = PETSC_FALSE; 2268 for (i=0; i<mbs; i++) jl[i] = mbs; 2269 il[0] = 0; 2270 2271 for (k = 0; k<mbs; k++){ 2272 /* zero rtmp */ 2273 nz = bi[k+1] - bi[k]; 2274 bjtmp = bj + bi[k]; 2275 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2276 2277 bval = ba + bi[k]; 2278 /* initialize k-th row by the perm[k]-th row of A */ 2279 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2280 for (j = jmin; j < jmax; j++){ 2281 col = riip[aj[j]]; 2282 if (col >= k){ /* only take upper triangular entry */ 2283 rtmp[col] = aa[j]; 2284 *bval++ = 0.0; /* for in-place factorization */ 2285 } 2286 } 2287 /* shift the diagonal of the matrix */ 2288 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2289 2290 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2291 dk = rtmp[k]; 2292 i = jl[k]; /* first row to be added to k_th row */ 2293 2294 while (i < k){ 2295 nexti = jl[i]; /* next row to be added to k_th row */ 2296 2297 /* compute multiplier, update diag(k) and U(i,k) */ 2298 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2299 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2300 dk += uikdi*ba[ili]; 2301 ba[ili] = uikdi; /* -U(i,k) */ 2302 2303 /* add multiple of row i to k-th row */ 2304 jmin = ili + 1; jmax = bi[i+1]; 2305 if (jmin < jmax){ 2306 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2307 /* update il and jl for row i */ 2308 il[i] = jmin; 2309 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2310 } 2311 i = nexti; 2312 } 2313 2314 /* shift the diagonals when zero pivot is detected */ 2315 /* compute rs=sum of abs(off-diagonal) */ 2316 rs = 0.0; 2317 jmin = bi[k]+1; 2318 nz = bi[k+1] - jmin; 2319 bcol = bj + jmin; 2320 for (j=0; j<nz; j++) { 2321 rs += PetscAbsScalar(rtmp[bcol[j]]); 2322 } 2323 2324 sctx.rs = rs; 2325 sctx.pv = dk; 2326 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 2327 2328 if (newshift == 1) { 2329 if (!sctx.shift_amount) { 2330 sctx.shift_amount = 1e-5; 2331 } 2332 break; 2333 } 2334 2335 /* copy data into U(k,:) */ 2336 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2337 jmin = bi[k]+1; jmax = bi[k+1]; 2338 if (jmin < jmax) { 2339 for (j=jmin; j<jmax; j++){ 2340 col = bj[j]; ba[j] = rtmp[col]; 2341 } 2342 /* add the k-th row into il and jl */ 2343 il[k] = jmin; 2344 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2345 } 2346 } 2347 } while (sctx.chshift); 2348 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2349 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2350 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2351 2352 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2353 if (perm_identity){ 2354 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2355 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2356 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2357 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2358 } else { 2359 B->ops->solve = MatSolve_SeqSBAIJ_1; 2360 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2361 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2362 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2363 } 2364 2365 C->assembled = PETSC_TRUE; 2366 C->preallocated = PETSC_TRUE; 2367 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2368 if (sctx.nshift){ 2369 if (shiftnz) { 2370 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2371 } else if (shiftpd) { 2372 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2373 } 2374 } 2375 PetscFunctionReturn(0); 2376 } 2377 2378 /* 2379 icc() under revised new data structure. 2380 Factored arrays bj and ba are stored as 2381 U(0,:),...,U(i,:),U(n-1,:) 2382 2383 ui=fact->i is an array of size n+1, in which 2384 ui+ 2385 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2386 ui[n]: points to U(n-1,n-1)+1 2387 2388 udiag=fact->diag is an array of size n,in which 2389 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2390 2391 U(i,:) contains udiag[i] as its last entry, i.e., 2392 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2393 */ 2394 2395 #undef __FUNCT__ 2396 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_newdatastruct" 2397 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_newdatastruct(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2398 { 2399 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2400 Mat_SeqSBAIJ *b; 2401 PetscErrorCode ierr; 2402 PetscTruth perm_identity,missing; 2403 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2404 const PetscInt *rip,*riip; 2405 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2406 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2407 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2408 PetscReal fill=info->fill,levels=info->levels; 2409 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2410 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2411 PetscBT lnkbt; 2412 IS iperm; 2413 2414 PetscFunctionBegin; 2415 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2416 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2417 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2418 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2419 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2420 2421 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2422 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2423 ui[0] = 0; 2424 2425 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2426 if (!levels && perm_identity) { 2427 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2428 cols = uj; 2429 for (i=0; i<am; i++) { 2430 ncols = ai[i+1] - a->diag[i]; 2431 ui[i+1] = ui[i] + ncols; 2432 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2433 2434 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2435 ncols--; /* exclude diagonal */ 2436 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2437 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2438 } 2439 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2440 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2441 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2442 2443 /* initialization */ 2444 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2445 2446 /* jl: linked list for storing indices of the pivot rows 2447 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2448 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2449 for (i=0; i<am; i++){ 2450 jl[i] = am; il[i] = 0; 2451 } 2452 2453 /* create and initialize a linked list for storing column indices of the active row k */ 2454 nlnk = am + 1; 2455 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2456 2457 /* initial FreeSpace size is fill*(ai[am]+1) */ 2458 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2459 current_space = free_space; 2460 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2461 current_space_lvl = free_space_lvl; 2462 2463 for (k=0; k<am; k++){ /* for each active row k */ 2464 /* initialize lnk by the column indices of row rip[k] of A */ 2465 nzk = 0; 2466 ncols = ai[rip[k]+1] - ai[rip[k]]; 2467 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2468 ncols_upper = 0; 2469 for (j=0; j<ncols; j++){ 2470 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2471 if (riip[i] >= k){ /* only take upper triangular entry */ 2472 ajtmp[ncols_upper] = i; 2473 ncols_upper++; 2474 } 2475 } 2476 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2477 nzk += nlnk; 2478 2479 /* update lnk by computing fill-in for each pivot row to be merged in */ 2480 prow = jl[k]; /* 1st pivot row */ 2481 2482 while (prow < k){ 2483 nextprow = jl[prow]; 2484 2485 /* merge prow into k-th row */ 2486 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2487 jmax = ui[prow+1]; 2488 ncols = jmax-jmin; 2489 i = jmin - ui[prow]; 2490 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2491 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2492 j = *(uj - 1); 2493 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2494 nzk += nlnk; 2495 2496 /* update il and jl for prow */ 2497 if (jmin < jmax){ 2498 il[prow] = jmin; 2499 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2500 } 2501 prow = nextprow; 2502 } 2503 2504 /* if free space is not available, make more free space */ 2505 if (current_space->local_remaining<nzk) { 2506 i = am - k + 1; /* num of unfactored rows */ 2507 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2508 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2509 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2510 reallocs++; 2511 } 2512 2513 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2514 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2515 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2516 2517 /* add the k-th row into il and jl */ 2518 if (nzk > 1){ 2519 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2520 jl[k] = jl[i]; jl[i] = k; 2521 il[k] = ui[k] + 1; 2522 } 2523 uj_ptr[k] = current_space->array; 2524 uj_lvl_ptr[k] = current_space_lvl->array; 2525 2526 current_space->array += nzk; 2527 current_space->local_used += nzk; 2528 current_space->local_remaining -= nzk; 2529 2530 current_space_lvl->array += nzk; 2531 current_space_lvl->local_used += nzk; 2532 current_space_lvl->local_remaining -= nzk; 2533 2534 ui[k+1] = ui[k] + nzk; 2535 } 2536 2537 #if defined(PETSC_USE_INFO) 2538 if (ai[am] != 0) { 2539 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2540 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2541 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2542 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2543 } else { 2544 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2545 } 2546 #endif 2547 2548 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2549 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2550 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2551 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2552 2553 /* destroy list of free space and other temporary array(s) */ 2554 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2555 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor in newdatastruct */ 2556 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2557 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2558 2559 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2560 2561 /* put together the new matrix in MATSEQSBAIJ format */ 2562 2563 b = (Mat_SeqSBAIJ*)(fact)->data; 2564 b->singlemalloc = PETSC_FALSE; 2565 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2566 b->j = uj; 2567 b->i = ui; 2568 b->diag = udiag; 2569 b->free_diag = PETSC_TRUE; 2570 b->ilen = 0; 2571 b->imax = 0; 2572 b->row = perm; 2573 b->col = perm; 2574 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2575 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2576 b->icol = iperm; 2577 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2578 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2579 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2580 b->maxnz = b->nz = ui[am]; 2581 b->free_a = PETSC_TRUE; 2582 b->free_ij = PETSC_TRUE; 2583 2584 fact->info.factor_mallocs = reallocs; 2585 fact->info.fill_ratio_given = fill; 2586 if (ai[am] != 0) { 2587 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2588 } else { 2589 fact->info.fill_ratio_needed = 0.0; 2590 } 2591 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_newdatastruct; 2592 PetscFunctionReturn(0); 2593 } 2594 2595 #undef __FUNCT__ 2596 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2597 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2598 { 2599 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2600 Mat_SeqSBAIJ *b; 2601 PetscErrorCode ierr; 2602 PetscTruth perm_identity,missing; 2603 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2604 const PetscInt *rip,*riip; 2605 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2606 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2607 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2608 PetscReal fill=info->fill,levels=info->levels; 2609 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2610 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2611 PetscBT lnkbt; 2612 IS iperm; 2613 PetscTruth newdatastruct=PETSC_FALSE; 2614 2615 PetscFunctionBegin; 2616 ierr = PetscOptionsGetTruth(PETSC_NULL,"-icc_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 2617 if(newdatastruct){ 2618 ierr = MatICCFactorSymbolic_SeqAIJ_newdatastruct(fact,A,perm,info);CHKERRQ(ierr); 2619 PetscFunctionReturn(0); 2620 } 2621 2622 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2623 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2624 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2625 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2626 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2627 2628 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2629 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2630 ui[0] = 0; 2631 2632 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2633 if (!levels && perm_identity) { 2634 2635 for (i=0; i<am; i++) { 2636 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2637 udiag[i] = ui[i]; 2638 } 2639 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2640 cols = uj; 2641 for (i=0; i<am; i++) { 2642 aj = a->j + a->diag[i]; 2643 ncols = ui[i+1] - ui[i]; 2644 for (j=0; j<ncols; j++) *cols++ = *aj++; 2645 } 2646 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2647 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2648 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2649 2650 /* initialization */ 2651 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2652 2653 /* jl: linked list for storing indices of the pivot rows 2654 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2655 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2656 for (i=0; i<am; i++){ 2657 jl[i] = am; il[i] = 0; 2658 } 2659 2660 /* create and initialize a linked list for storing column indices of the active row k */ 2661 nlnk = am + 1; 2662 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2663 2664 /* initial FreeSpace size is fill*(ai[am]+1) */ 2665 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2666 current_space = free_space; 2667 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2668 current_space_lvl = free_space_lvl; 2669 2670 for (k=0; k<am; k++){ /* for each active row k */ 2671 /* initialize lnk by the column indices of row rip[k] of A */ 2672 nzk = 0; 2673 ncols = ai[rip[k]+1] - ai[rip[k]]; 2674 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2675 ncols_upper = 0; 2676 for (j=0; j<ncols; j++){ 2677 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2678 if (riip[i] >= k){ /* only take upper triangular entry */ 2679 ajtmp[ncols_upper] = i; 2680 ncols_upper++; 2681 } 2682 } 2683 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2684 nzk += nlnk; 2685 2686 /* update lnk by computing fill-in for each pivot row to be merged in */ 2687 prow = jl[k]; /* 1st pivot row */ 2688 2689 while (prow < k){ 2690 nextprow = jl[prow]; 2691 2692 /* merge prow into k-th row */ 2693 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2694 jmax = ui[prow+1]; 2695 ncols = jmax-jmin; 2696 i = jmin - ui[prow]; 2697 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2698 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2699 j = *(uj - 1); 2700 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2701 nzk += nlnk; 2702 2703 /* update il and jl for prow */ 2704 if (jmin < jmax){ 2705 il[prow] = jmin; 2706 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2707 } 2708 prow = nextprow; 2709 } 2710 2711 /* if free space is not available, make more free space */ 2712 if (current_space->local_remaining<nzk) { 2713 i = am - k + 1; /* num of unfactored rows */ 2714 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2715 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2716 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2717 reallocs++; 2718 } 2719 2720 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2721 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2722 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2723 2724 /* add the k-th row into il and jl */ 2725 if (nzk > 1){ 2726 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2727 jl[k] = jl[i]; jl[i] = k; 2728 il[k] = ui[k] + 1; 2729 } 2730 uj_ptr[k] = current_space->array; 2731 uj_lvl_ptr[k] = current_space_lvl->array; 2732 2733 current_space->array += nzk; 2734 current_space->local_used += nzk; 2735 current_space->local_remaining -= nzk; 2736 2737 current_space_lvl->array += nzk; 2738 current_space_lvl->local_used += nzk; 2739 current_space_lvl->local_remaining -= nzk; 2740 2741 ui[k+1] = ui[k] + nzk; 2742 } 2743 2744 #if defined(PETSC_USE_INFO) 2745 if (ai[am] != 0) { 2746 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2747 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2748 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2749 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2750 } else { 2751 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2752 } 2753 #endif 2754 2755 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2756 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2757 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2758 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2759 2760 /* destroy list of free space and other temporary array(s) */ 2761 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2762 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2763 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2764 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2765 2766 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2767 2768 /* put together the new matrix in MATSEQSBAIJ format */ 2769 2770 b = (Mat_SeqSBAIJ*)fact->data; 2771 b->singlemalloc = PETSC_FALSE; 2772 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2773 b->j = uj; 2774 b->i = ui; 2775 b->diag = udiag; 2776 b->free_diag = PETSC_TRUE; 2777 b->ilen = 0; 2778 b->imax = 0; 2779 b->row = perm; 2780 b->col = perm; 2781 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2782 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2783 b->icol = iperm; 2784 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2785 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2786 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2787 b->maxnz = b->nz = ui[am]; 2788 b->free_a = PETSC_TRUE; 2789 b->free_ij = PETSC_TRUE; 2790 2791 fact->info.factor_mallocs = reallocs; 2792 fact->info.fill_ratio_given = fill; 2793 if (ai[am] != 0) { 2794 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2795 } else { 2796 fact->info.fill_ratio_needed = 0.0; 2797 } 2798 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2799 PetscFunctionReturn(0); 2800 } 2801 2802 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_newdatastruct(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2803 { 2804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2805 Mat_SeqSBAIJ *b; 2806 PetscErrorCode ierr; 2807 PetscTruth perm_identity; 2808 PetscReal fill = info->fill; 2809 const PetscInt *rip,*riip; 2810 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2811 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2812 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2813 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2814 PetscBT lnkbt; 2815 IS iperm; 2816 2817 PetscFunctionBegin; 2818 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2819 /* check whether perm is the identity mapping */ 2820 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2821 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2822 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2823 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2824 2825 /* initialization */ 2826 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2827 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2828 ui[0] = 0; 2829 2830 /* jl: linked list for storing indices of the pivot rows 2831 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2832 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2833 for (i=0; i<am; i++){ 2834 jl[i] = am; il[i] = 0; 2835 } 2836 2837 /* create and initialize a linked list for storing column indices of the active row k */ 2838 nlnk = am + 1; 2839 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2840 2841 /* initial FreeSpace size is fill*(ai[am]+1) */ 2842 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2843 current_space = free_space; 2844 2845 for (k=0; k<am; k++){ /* for each active row k */ 2846 /* initialize lnk by the column indices of row rip[k] of A */ 2847 nzk = 0; 2848 ncols = ai[rip[k]+1] - ai[rip[k]]; 2849 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2850 ncols_upper = 0; 2851 for (j=0; j<ncols; j++){ 2852 i = riip[*(aj + ai[rip[k]] + j)]; 2853 if (i >= k){ /* only take upper triangular entry */ 2854 cols[ncols_upper] = i; 2855 ncols_upper++; 2856 } 2857 } 2858 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2859 nzk += nlnk; 2860 2861 /* update lnk by computing fill-in for each pivot row to be merged in */ 2862 prow = jl[k]; /* 1st pivot row */ 2863 2864 while (prow < k){ 2865 nextprow = jl[prow]; 2866 /* merge prow into k-th row */ 2867 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2868 jmax = ui[prow+1]; 2869 ncols = jmax-jmin; 2870 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2871 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2872 nzk += nlnk; 2873 2874 /* update il and jl for prow */ 2875 if (jmin < jmax){ 2876 il[prow] = jmin; 2877 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2878 } 2879 prow = nextprow; 2880 } 2881 2882 /* if free space is not available, make more free space */ 2883 if (current_space->local_remaining<nzk) { 2884 i = am - k + 1; /* num of unfactored rows */ 2885 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2886 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2887 reallocs++; 2888 } 2889 2890 /* copy data into free space, then initialize lnk */ 2891 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2892 2893 /* add the k-th row into il and jl */ 2894 if (nzk-1 > 0){ 2895 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2896 jl[k] = jl[i]; jl[i] = k; 2897 il[k] = ui[k] + 1; 2898 } 2899 ui_ptr[k] = current_space->array; 2900 current_space->array += nzk; 2901 current_space->local_used += nzk; 2902 current_space->local_remaining -= nzk; 2903 2904 ui[k+1] = ui[k] + nzk; 2905 } 2906 2907 #if defined(PETSC_USE_INFO) 2908 if (ai[am] != 0) { 2909 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 2910 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2911 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2912 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2913 } else { 2914 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2915 } 2916 #endif 2917 2918 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2919 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2920 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2921 2922 /* destroy list of free space and other temporary array(s) */ 2923 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2924 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor in newdatastruct */ 2925 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2926 2927 /* put together the new matrix in MATSEQSBAIJ format */ 2928 2929 b = (Mat_SeqSBAIJ*)fact->data; 2930 b->singlemalloc = PETSC_FALSE; 2931 b->free_a = PETSC_TRUE; 2932 b->free_ij = PETSC_TRUE; 2933 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2934 b->j = uj; 2935 b->i = ui; 2936 b->diag = udiag; 2937 b->free_diag = PETSC_TRUE; 2938 b->ilen = 0; 2939 b->imax = 0; 2940 b->row = perm; 2941 b->col = perm; 2942 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2943 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2944 b->icol = iperm; 2945 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2946 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2947 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2948 b->maxnz = b->nz = ui[am]; 2949 2950 fact->info.factor_mallocs = reallocs; 2951 fact->info.fill_ratio_given = fill; 2952 if (ai[am] != 0) { 2953 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2954 } else { 2955 fact->info.fill_ratio_needed = 0.0; 2956 } 2957 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_newdatastruct; 2958 PetscFunctionReturn(0); 2959 } 2960 2961 #undef __FUNCT__ 2962 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2963 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2964 { 2965 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2966 Mat_SeqSBAIJ *b; 2967 PetscErrorCode ierr; 2968 PetscTruth perm_identity; 2969 PetscReal fill = info->fill; 2970 const PetscInt *rip,*riip; 2971 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2972 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2973 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2974 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2975 PetscBT lnkbt; 2976 IS iperm; 2977 PetscTruth newdatastruct=PETSC_FALSE; 2978 2979 PetscFunctionBegin; 2980 ierr = PetscOptionsGetTruth(PETSC_NULL,"-cholesky_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 2981 if(newdatastruct){ 2982 ierr = MatCholeskyFactorSymbolic_SeqAIJ_newdatastruct(fact,A,perm,info);CHKERRQ(ierr); 2983 PetscFunctionReturn(0); 2984 } 2985 2986 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2987 /* check whether perm is the identity mapping */ 2988 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2989 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2990 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2991 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2992 2993 /* initialization */ 2994 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2995 ui[0] = 0; 2996 2997 /* jl: linked list for storing indices of the pivot rows 2998 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2999 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 3000 for (i=0; i<am; i++){ 3001 jl[i] = am; il[i] = 0; 3002 } 3003 3004 /* create and initialize a linked list for storing column indices of the active row k */ 3005 nlnk = am + 1; 3006 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3007 3008 /* initial FreeSpace size is fill*(ai[am]+1) */ 3009 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3010 current_space = free_space; 3011 3012 for (k=0; k<am; k++){ /* for each active row k */ 3013 /* initialize lnk by the column indices of row rip[k] of A */ 3014 nzk = 0; 3015 ncols = ai[rip[k]+1] - ai[rip[k]]; 3016 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3017 ncols_upper = 0; 3018 for (j=0; j<ncols; j++){ 3019 i = riip[*(aj + ai[rip[k]] + j)]; 3020 if (i >= k){ /* only take upper triangular entry */ 3021 cols[ncols_upper] = i; 3022 ncols_upper++; 3023 } 3024 } 3025 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3026 nzk += nlnk; 3027 3028 /* update lnk by computing fill-in for each pivot row to be merged in */ 3029 prow = jl[k]; /* 1st pivot row */ 3030 3031 while (prow < k){ 3032 nextprow = jl[prow]; 3033 /* merge prow into k-th row */ 3034 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3035 jmax = ui[prow+1]; 3036 ncols = jmax-jmin; 3037 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3038 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3039 nzk += nlnk; 3040 3041 /* update il and jl for prow */ 3042 if (jmin < jmax){ 3043 il[prow] = jmin; 3044 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3045 } 3046 prow = nextprow; 3047 } 3048 3049 /* if free space is not available, make more free space */ 3050 if (current_space->local_remaining<nzk) { 3051 i = am - k + 1; /* num of unfactored rows */ 3052 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3053 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3054 reallocs++; 3055 } 3056 3057 /* copy data into free space, then initialize lnk */ 3058 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3059 3060 /* add the k-th row into il and jl */ 3061 if (nzk-1 > 0){ 3062 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3063 jl[k] = jl[i]; jl[i] = k; 3064 il[k] = ui[k] + 1; 3065 } 3066 ui_ptr[k] = current_space->array; 3067 current_space->array += nzk; 3068 current_space->local_used += nzk; 3069 current_space->local_remaining -= nzk; 3070 3071 ui[k+1] = ui[k] + nzk; 3072 } 3073 3074 #if defined(PETSC_USE_INFO) 3075 if (ai[am] != 0) { 3076 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3077 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3078 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3079 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3080 } else { 3081 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3082 } 3083 #endif 3084 3085 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3086 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3087 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3088 3089 /* destroy list of free space and other temporary array(s) */ 3090 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3091 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3092 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3093 3094 /* put together the new matrix in MATSEQSBAIJ format */ 3095 3096 b = (Mat_SeqSBAIJ*)fact->data; 3097 b->singlemalloc = PETSC_FALSE; 3098 b->free_a = PETSC_TRUE; 3099 b->free_ij = PETSC_TRUE; 3100 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3101 b->j = uj; 3102 b->i = ui; 3103 b->diag = 0; 3104 b->ilen = 0; 3105 b->imax = 0; 3106 b->row = perm; 3107 b->col = perm; 3108 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3109 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3110 b->icol = iperm; 3111 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3112 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3113 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3114 b->maxnz = b->nz = ui[am]; 3115 3116 fact->info.factor_mallocs = reallocs; 3117 fact->info.fill_ratio_given = fill; 3118 if (ai[am] != 0) { 3119 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3120 } else { 3121 fact->info.fill_ratio_needed = 0.0; 3122 } 3123 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 3124 PetscFunctionReturn(0); 3125 } 3126 3127 #undef __FUNCT__ 3128 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_newdatastruct" 3129 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct(Mat A,Vec bb,Vec xx) 3130 { 3131 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3132 PetscErrorCode ierr; 3133 PetscInt n = A->rmap->n; 3134 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3135 PetscScalar *x,sum; 3136 const PetscScalar *b; 3137 const MatScalar *aa = a->a,*v; 3138 PetscInt i,nz; 3139 3140 PetscFunctionBegin; 3141 if (!n) PetscFunctionReturn(0); 3142 3143 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 3144 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3145 3146 /* forward solve the lower triangular */ 3147 x[0] = b[0]; 3148 v = aa; 3149 vi = aj; 3150 for (i=1; i<n; i++) { 3151 nz = ai[i+1] - ai[i]; 3152 sum = b[i]; 3153 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3154 v += nz; 3155 vi += nz; 3156 x[i] = sum; 3157 } 3158 3159 /* backward solve the upper triangular */ 3160 for (i=n-1; i>=0; i--){ 3161 v = aa + adiag[i+1] + 1; 3162 vi = aj + adiag[i+1] + 1; 3163 nz = adiag[i] - adiag[i+1]-1; 3164 sum = x[i]; 3165 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3166 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3167 } 3168 3169 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3170 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 3171 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3172 PetscFunctionReturn(0); 3173 } 3174 3175 #undef __FUNCT__ 3176 #define __FUNCT__ "MatSolve_SeqAIJ_newdatastruct" 3177 PetscErrorCode MatSolve_SeqAIJ_newdatastruct(Mat A,Vec bb,Vec xx) 3178 { 3179 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3180 IS iscol = a->col,isrow = a->row; 3181 PetscErrorCode ierr; 3182 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3183 const PetscInt *rout,*cout,*r,*c; 3184 PetscScalar *x,*tmp,sum; 3185 const PetscScalar *b; 3186 const MatScalar *aa = a->a,*v; 3187 3188 PetscFunctionBegin; 3189 if (!n) PetscFunctionReturn(0); 3190 3191 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 3192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3193 tmp = a->solve_work; 3194 3195 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3196 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3197 3198 /* forward solve the lower triangular */ 3199 tmp[0] = b[r[0]]; 3200 v = aa; 3201 vi = aj; 3202 for (i=1; i<n; i++) { 3203 nz = ai[i+1] - ai[i]; 3204 sum = b[r[i]]; 3205 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3206 tmp[i] = sum; 3207 v += nz; vi += nz; 3208 } 3209 3210 /* backward solve the upper triangular */ 3211 for (i=n-1; i>=0; i--){ 3212 v = aa + adiag[i+1]+1; 3213 vi = aj + adiag[i+1]+1; 3214 nz = adiag[i]-adiag[i+1]-1; 3215 sum = tmp[i]; 3216 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3217 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3218 } 3219 3220 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3221 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3222 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 3223 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3224 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3225 PetscFunctionReturn(0); 3226 } 3227 3228 #undef __FUNCT__ 3229 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3230 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3231 { 3232 Mat B = *fact; 3233 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3234 IS isicol; 3235 PetscErrorCode ierr; 3236 const PetscInt *r,*ic; 3237 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3238 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3239 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3240 PetscInt nlnk,*lnk; 3241 PetscBT lnkbt; 3242 PetscTruth row_identity,icol_identity,both_identity; 3243 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3244 const PetscInt *ics; 3245 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3246 PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftinblocks; 3247 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3248 PetscTruth missing; 3249 3250 PetscFunctionBegin; 3251 3252 if (dt == PETSC_DEFAULT) dt = 0.005; 3253 if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */ 3254 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3255 3256 /* ------- symbolic factorization, can be reused ---------*/ 3257 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3258 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3259 adiag=a->diag; 3260 3261 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3262 3263 /* bdiag is location of diagonal in factor */ 3264 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3265 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3266 3267 /* allocate row pointers bi */ 3268 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3269 3270 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3271 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3272 nnz_max = ai[n]+2*n*dtcount+2; 3273 3274 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3275 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3276 3277 /* put together the new matrix */ 3278 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3279 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3280 b = (Mat_SeqAIJ*)B->data; 3281 b->free_a = PETSC_TRUE; 3282 b->free_ij = PETSC_TRUE; 3283 b->singlemalloc = PETSC_FALSE; 3284 b->a = ba; 3285 b->j = bj; 3286 b->i = bi; 3287 b->diag = bdiag; 3288 b->ilen = 0; 3289 b->imax = 0; 3290 b->row = isrow; 3291 b->col = iscol; 3292 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3293 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3294 b->icol = isicol; 3295 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3296 3297 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3298 b->maxnz = nnz_max; 3299 3300 B->factor = MAT_FACTOR_ILUDT; 3301 B->info.factor_mallocs = 0; 3302 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3303 CHKMEMQ; 3304 /* ------- end of symbolic factorization ---------*/ 3305 3306 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3307 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3308 ics = ic; 3309 3310 /* linked list for storing column indices of the active row */ 3311 nlnk = n + 1; 3312 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3313 3314 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3315 ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 3316 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3317 ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 3318 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3319 3320 bi[0] = 0; 3321 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3322 bdiag_rev[n] = bdiag[0]; 3323 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3324 for (i=0; i<n; i++) { 3325 /* copy initial fill into linked list */ 3326 nzi = 0; /* nonzeros for active row i */ 3327 nzi = ai[r[i]+1] - ai[r[i]]; 3328 if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3329 nzi_al = adiag[r[i]] - ai[r[i]]; 3330 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3331 ajtmp = aj + ai[r[i]]; 3332 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3333 3334 /* load in initial (unfactored row) */ 3335 aatmp = a->a + ai[r[i]]; 3336 for (j=0; j<nzi; j++) { 3337 rtmp[ics[*ajtmp++]] = *aatmp++; 3338 } 3339 3340 /* add pivot rows into linked list */ 3341 row = lnk[n]; 3342 while (row < i ) { 3343 nzi_bl = bi[row+1] - bi[row] + 1; 3344 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3345 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3346 nzi += nlnk; 3347 row = lnk[row]; 3348 } 3349 3350 /* copy data from lnk into jtmp, then initialize lnk */ 3351 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3352 3353 /* numerical factorization */ 3354 bjtmp = jtmp; 3355 row = *bjtmp++; /* 1st pivot row */ 3356 while ( row < i ) { 3357 pc = rtmp + row; 3358 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3359 multiplier = (*pc) * (*pv); 3360 *pc = multiplier; 3361 if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 3362 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3363 pv = ba + bdiag[row+1] + 1; 3364 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3365 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3366 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3367 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 3368 } 3369 row = *bjtmp++; 3370 } 3371 3372 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3373 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3374 nzi_bl = 0; j = 0; 3375 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 3376 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3377 nzi_bl++; j++; 3378 } 3379 nzi_bu = nzi - nzi_bl -1; 3380 while (j < nzi){ 3381 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3382 j++; 3383 } 3384 3385 bjtmp = bj + bi[i]; 3386 batmp = ba + bi[i]; 3387 /* apply level dropping rule to L part */ 3388 ncut = nzi_al + dtcount; 3389 if (ncut < nzi_bl){ 3390 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3391 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3392 } else { 3393 ncut = nzi_bl; 3394 } 3395 for (j=0; j<ncut; j++){ 3396 bjtmp[j] = jtmp[j]; 3397 batmp[j] = vtmp[j]; 3398 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3399 } 3400 bi[i+1] = bi[i] + ncut; 3401 nzi = ncut + 1; 3402 3403 /* apply level dropping rule to U part */ 3404 ncut = nzi_au + dtcount; 3405 if (ncut < nzi_bu){ 3406 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3407 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3408 } else { 3409 ncut = nzi_bu; 3410 } 3411 nzi += ncut; 3412 3413 /* mark bdiagonal */ 3414 bdiag[i+1] = bdiag[i] - (ncut + 1); 3415 bdiag_rev[n-i-1] = bdiag[i+1]; 3416 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3417 bjtmp = bj + bdiag[i]; 3418 batmp = ba + bdiag[i]; 3419 *bjtmp = i; 3420 *batmp = diag_tmp; /* rtmp[i]; */ 3421 if (*batmp == 0.0) { 3422 *batmp = dt+shift; 3423 /* printf(" row %d add shift %g\n",i,shift); */ 3424 } 3425 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3426 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3427 3428 bjtmp = bj + bdiag[i+1]+1; 3429 batmp = ba + bdiag[i+1]+1; 3430 for (k=0; k<ncut; k++){ 3431 bjtmp[k] = jtmp[nzi_bl+1+k]; 3432 batmp[k] = vtmp[nzi_bl+1+k]; 3433 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3434 } 3435 /* printf("\n"); */ 3436 3437 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3438 /* 3439 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3440 printf(" ----------------------------\n"); 3441 */ 3442 } /* for (i=0; i<n; i++) */ 3443 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3444 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3445 3446 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3447 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3448 3449 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3450 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3451 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3452 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3453 3454 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3455 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3456 3457 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3458 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3459 both_identity = (PetscTruth) (row_identity && icol_identity); 3460 if (row_identity && icol_identity) { 3461 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 3462 } else { 3463 B->ops->solve = MatSolve_SeqAIJ_newdatastruct; 3464 } 3465 3466 B->ops->lufactorsymbolic = MatILUDTFactorSymbolic_SeqAIJ; 3467 B->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 3468 B->ops->solveadd = 0; 3469 B->ops->solvetranspose = 0; 3470 B->ops->solvetransposeadd = 0; 3471 B->ops->matsolve = 0; 3472 B->assembled = PETSC_TRUE; 3473 B->preallocated = PETSC_TRUE; 3474 PetscFunctionReturn(0); 3475 } 3476 3477 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3478 #undef __FUNCT__ 3479 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3480 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3481 { 3482 PetscErrorCode ierr; 3483 3484 PetscFunctionBegin; 3485 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3486 3487 fact->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 3488 PetscFunctionReturn(0); 3489 } 3490 3491 /* 3492 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3493 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3494 */ 3495 #undef __FUNCT__ 3496 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3497 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3498 { 3499 Mat C=fact; 3500 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 3501 IS isrow = b->row,isicol = b->icol; 3502 PetscErrorCode ierr; 3503 const PetscInt *r,*ic,*ics; 3504 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3505 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3506 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3507 PetscReal dt=info->dt,shift=info->shiftinblocks; 3508 PetscTruth row_identity, col_identity; 3509 3510 PetscFunctionBegin; 3511 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3512 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3513 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3514 ics = ic; 3515 3516 for (i=0; i<n; i++){ 3517 /* initialize rtmp array */ 3518 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3519 bjtmp = bj + bi[i]; 3520 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3521 rtmp[i] = 0.0; 3522 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3523 bjtmp = bj + bdiag[i+1] + 1; 3524 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3525 3526 /* load in initial unfactored row of A */ 3527 /* printf("row %d\n",i); */ 3528 nz = ai[r[i]+1] - ai[r[i]]; 3529 ajtmp = aj + ai[r[i]]; 3530 v = aa + ai[r[i]]; 3531 for (j=0; j<nz; j++) { 3532 rtmp[ics[*ajtmp++]] = v[j]; 3533 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3534 } 3535 /* printf("\n"); */ 3536 3537 /* numerical factorization */ 3538 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3539 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3540 k = 0; 3541 while (k < nzl){ 3542 row = *bjtmp++; 3543 /* printf(" prow %d\n",row); */ 3544 pc = rtmp + row; 3545 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3546 multiplier = (*pc) * (*pv); 3547 *pc = multiplier; 3548 if (PetscAbsScalar(multiplier) > dt){ 3549 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3550 pv = b->a + bdiag[row+1] + 1; 3551 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3552 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3553 /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */ 3554 } 3555 k++; 3556 } 3557 3558 /* finished row so stick it into b->a */ 3559 /* L-part */ 3560 pv = b->a + bi[i] ; 3561 pj = bj + bi[i] ; 3562 nzl = bi[i+1] - bi[i]; 3563 for (j=0; j<nzl; j++) { 3564 pv[j] = rtmp[pj[j]]; 3565 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3566 } 3567 3568 /* diagonal: invert diagonal entries for simplier triangular solves */ 3569 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3570 b->a[bdiag[i]] = 1.0/rtmp[i]; 3571 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3572 3573 /* U-part */ 3574 pv = b->a + bdiag[i+1] + 1; 3575 pj = bj + bdiag[i+1] + 1; 3576 nzu = bdiag[i] - bdiag[i+1] - 1; 3577 for (j=0; j<nzu; j++) { 3578 pv[j] = rtmp[pj[j]]; 3579 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3580 } 3581 /* printf("\n"); */ 3582 } 3583 3584 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3585 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3586 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3587 3588 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3589 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3590 if (row_identity && col_identity) { 3591 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 3592 } else { 3593 C->ops->solve = MatSolve_SeqAIJ_newdatastruct; 3594 } 3595 C->ops->solveadd = 0; 3596 C->ops->solvetranspose = 0; 3597 C->ops->solvetransposeadd = 0; 3598 C->ops->matsolve = 0; 3599 C->assembled = PETSC_TRUE; 3600 C->preallocated = PETSC_TRUE; 3601 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3602 PetscFunctionReturn(0); 3603 } 3604