1 #define PETSCMAT_DLL 2 3 4 #include "../src/mat/impls/aij/seq/aij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 #include "petscbt.h" 7 #include "../src/mat/utils/freespace.h" 8 9 EXTERN_C_BEGIN 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 12 /* 13 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 14 */ 15 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 16 { 17 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 20 const PetscInt *ai = a->i, *aj = a->j; 21 const PetscScalar *aa = a->a; 22 PetscTruth *done; 23 PetscReal best,past = 0,future; 24 25 PetscFunctionBegin; 26 /* pick initial row */ 27 best = -1; 28 for (i=0; i<n; i++) { 29 future = 0; 30 for (j=ai[i]; j<ai[i+1]; j++) { 31 if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc(n*sizeof(PetscTruth),&done);CHKERRQ(ierr); 41 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 42 ierr = PetscMemzero(done,n*sizeof(PetscTruth));CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0; 69 past = 0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 EXTERN_C_END 94 95 EXTERN_C_BEGIN 96 #undef __FUNCT__ 97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 99 { 100 PetscFunctionBegin; 101 *flg = PETSC_TRUE; 102 PetscFunctionReturn(0); 103 } 104 EXTERN_C_END 105 106 EXTERN_C_BEGIN 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 110 { 111 PetscInt n = A->rmap->n; 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 116 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 117 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 118 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 119 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 120 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 121 (*B)->ops->iludtfactor = MatILUDTFactor_SeqAIJ; 122 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 123 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 124 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 125 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 126 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 127 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 128 (*B)->factor = ftype; 129 PetscFunctionReturn(0); 130 } 131 EXTERN_C_END 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 135 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 136 { 137 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 138 IS isicol; 139 PetscErrorCode ierr; 140 const PetscInt *r,*ic; 141 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 142 PetscInt *bi,*bj,*ajtmp; 143 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 144 PetscReal f; 145 PetscInt nlnk,*lnk,k,**bi_ptr; 146 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 147 PetscBT lnkbt; 148 PetscTruth newdatastruct=PETSC_FALSE; 149 150 PetscFunctionBegin; 151 ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 152 if(newdatastruct){ 153 ierr = MatLUFactorSymbolic_SeqAIJ_newdatastruct(B,A,isrow,iscol,info);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 158 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 159 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 160 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 161 162 /* get new row pointers */ 163 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 164 bi[0] = 0; 165 166 /* bdiag is location of diagonal in factor */ 167 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 168 bdiag[0] = 0; 169 170 /* linked list for storing column indices of the active row */ 171 nlnk = n + 1; 172 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 173 174 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 175 176 /* initial FreeSpace size is f*(ai[n]+1) */ 177 f = info->fill; 178 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 179 current_space = free_space; 180 181 for (i=0; i<n; i++) { 182 /* copy previous fill into linked list */ 183 nzi = 0; 184 nnz = ai[r[i]+1] - ai[r[i]]; 185 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 186 ajtmp = aj + ai[r[i]]; 187 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 188 nzi += nlnk; 189 190 /* add pivot rows into linked list */ 191 row = lnk[n]; 192 while (row < i) { 193 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 194 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 195 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 196 nzi += nlnk; 197 row = lnk[row]; 198 } 199 bi[i+1] = bi[i] + nzi; 200 im[i] = nzi; 201 202 /* mark bdiag */ 203 nzbd = 0; 204 nnz = nzi; 205 k = lnk[n]; 206 while (nnz-- && k < i){ 207 nzbd++; 208 k = lnk[k]; 209 } 210 bdiag[i] = bi[i] + nzbd; 211 212 /* if free space is not available, make more free space */ 213 if (current_space->local_remaining<nzi) { 214 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 215 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 216 reallocs++; 217 } 218 219 /* copy data into free space, then initialize lnk */ 220 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 221 bi_ptr[i] = current_space->array; 222 current_space->array += nzi; 223 current_space->local_used += nzi; 224 current_space->local_remaining -= nzi; 225 } 226 #if defined(PETSC_USE_INFO) 227 if (ai[n] != 0) { 228 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 229 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 230 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 231 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 232 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 233 } else { 234 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 235 } 236 #endif 237 238 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 239 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 240 241 /* destroy list of free space and other temporary array(s) */ 242 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 243 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 244 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 245 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 246 247 /* put together the new matrix */ 248 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 249 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 250 b = (Mat_SeqAIJ*)(B)->data; 251 b->free_a = PETSC_TRUE; 252 b->free_ij = PETSC_TRUE; 253 b->singlemalloc = PETSC_FALSE; 254 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 255 b->j = bj; 256 b->i = bi; 257 b->diag = bdiag; 258 b->ilen = 0; 259 b->imax = 0; 260 b->row = isrow; 261 b->col = iscol; 262 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 263 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 264 b->icol = isicol; 265 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 266 267 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 268 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 269 b->maxnz = b->nz = bi[n] ; 270 271 (B)->factor = MAT_FACTOR_LU; 272 (B)->info.factor_mallocs = reallocs; 273 (B)->info.fill_ratio_given = f; 274 275 if (ai[n]) { 276 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 277 } else { 278 (B)->info.fill_ratio_needed = 0.0; 279 } 280 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 281 (B)->ops->solve = MatSolve_SeqAIJ; 282 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 283 /* switch to inodes if appropriate */ 284 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 #undef __FUNCT__ 289 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_newdatastruct" 290 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_newdatastruct(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 291 { 292 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 293 IS isicol; 294 PetscErrorCode ierr; 295 const PetscInt *r,*ic; 296 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 297 PetscInt *bi,*bj,*ajtmp; 298 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 299 PetscReal f; 300 PetscInt nlnk,*lnk,k,**bi_ptr; 301 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 302 PetscBT lnkbt; 303 304 PetscFunctionBegin; 305 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 306 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 307 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 308 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 309 310 /* get new row pointers */ 311 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 312 bi[0] = 0; 313 314 /* bdiag is location of diagonal in factor */ 315 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 316 bdiag[0] = 0; 317 318 /* linked list for storing column indices of the active row */ 319 nlnk = n + 1; 320 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 321 322 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 323 324 /* initial FreeSpace size is f*(ai[n]+1) */ 325 f = info->fill; 326 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 327 current_space = free_space; 328 329 for (i=0; i<n; i++) { 330 /* copy previous fill into linked list */ 331 nzi = 0; 332 nnz = ai[r[i]+1] - ai[r[i]]; 333 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 334 ajtmp = aj + ai[r[i]]; 335 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 336 nzi += nlnk; 337 338 /* add pivot rows into linked list */ 339 row = lnk[n]; 340 while (row < i){ 341 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 342 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 343 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 344 nzi += nlnk; 345 row = lnk[row]; 346 } 347 bi[i+1] = bi[i] + nzi; 348 im[i] = nzi; 349 350 /* mark bdiag */ 351 nzbd = 0; 352 nnz = nzi; 353 k = lnk[n]; 354 while (nnz-- && k < i){ 355 nzbd++; 356 k = lnk[k]; 357 } 358 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 359 360 /* if free space is not available, make more free space */ 361 if (current_space->local_remaining<nzi) { 362 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 363 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 364 reallocs++; 365 } 366 367 /* copy data into free space, then initialize lnk */ 368 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 369 bi_ptr[i] = current_space->array; 370 current_space->array += nzi; 371 current_space->local_used += nzi; 372 current_space->local_remaining -= nzi; 373 } 374 #if defined(PETSC_USE_INFO) 375 if (ai[n] != 0) { 376 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 377 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 378 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 379 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 380 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 381 } else { 382 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 383 } 384 #endif 385 386 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 387 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 388 389 /* destroy list of free space and other temporary array(s) */ 390 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 391 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 392 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 393 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 394 395 /* put together the new matrix */ 396 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 397 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 398 b = (Mat_SeqAIJ*)(B)->data; 399 b->free_a = PETSC_TRUE; 400 b->free_ij = PETSC_TRUE; 401 b->singlemalloc = PETSC_FALSE; 402 ierr = PetscMalloc((bi[2*n+1])*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 403 b->j = bj; 404 b->i = bi; 405 b->diag = bdiag; 406 b->ilen = 0; 407 b->imax = 0; 408 b->row = isrow; 409 b->col = iscol; 410 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 411 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 412 b->icol = isicol; 413 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 414 415 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 416 ierr = PetscLogObjectMemory(B,bi[2*n+1]*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 417 b->maxnz = b->nz = bi[2*n+1] ; 418 419 (B)->factor = MAT_FACTOR_LU; 420 (B)->info.factor_mallocs = reallocs; 421 (B)->info.fill_ratio_given = f; 422 423 if (ai[n]) { 424 (B)->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]); 425 } else { 426 (B)->info.fill_ratio_needed = 0.0; 427 } 428 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 429 PetscFunctionReturn(0); 430 } 431 432 /* 433 Trouble in factorization, should we dump the original matrix? 434 */ 435 #undef __FUNCT__ 436 #define __FUNCT__ "MatFactorDumpMatrix" 437 PetscErrorCode MatFactorDumpMatrix(Mat A) 438 { 439 PetscErrorCode ierr; 440 PetscTruth flg = PETSC_FALSE; 441 442 PetscFunctionBegin; 443 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 444 if (flg) { 445 PetscViewer viewer; 446 char filename[PETSC_MAX_PATH_LEN]; 447 448 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 449 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 450 ierr = MatView(A,viewer);CHKERRQ(ierr); 451 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 452 } 453 PetscFunctionReturn(0); 454 } 455 456 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 457 458 /* ----------------------------------------------------------- */ 459 extern PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 460 extern PetscErrorCode MatSolve_SeqAIJ_newdatastruct(Mat,Vec,Vec); 461 462 #undef __FUNCT__ 463 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_newdatastruct" 464 PetscErrorCode MatLUFactorNumeric_SeqAIJ_newdatastruct(Mat B,Mat A,const MatFactorInfo *info) 465 { 466 Mat C=B; 467 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 468 IS isrow = b->row,isicol = b->icol; 469 PetscErrorCode ierr; 470 const PetscInt *r,*ic,*ics; 471 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 472 PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 473 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 474 PetscTruth row_identity,col_identity; 475 476 LUShift_Ctx sctx; 477 PetscInt *ddiag,newshift; 478 PetscReal rs; 479 MatScalar d; 480 481 PetscFunctionBegin; 482 /* ZeropivotSetUp(): initialize shift context sctx */ 483 sctx.nshift = 0; 484 sctx.nshift_max = 0; 485 sctx.shift_top = 0.0; 486 sctx.shift_lo = 0.0; 487 sctx.shift_hi = 0.0; 488 sctx.shift_fraction = 0.0; 489 sctx.shift_amount = 0.0; 490 491 /* if both shift schemes are chosen by user, only use info->shiftpd */ 492 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 493 ddiag = a->diag; 494 sctx.shift_top = info->zeropivot; 495 for (i=0; i<n; i++) { 496 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 497 d = (aa)[ddiag[i]]; 498 rs = -PetscAbsScalar(d) - PetscRealPart(d); 499 v = aa+ai[i]; 500 nz = ai[i+1] - ai[i]; 501 for (j=0; j<nz; j++) 502 rs += PetscAbsScalar(v[j]); 503 if (rs>sctx.shift_top) sctx.shift_top = rs; 504 } 505 sctx.shift_top *= 1.1; 506 sctx.nshift_max = 5; 507 sctx.shift_lo = 0.; 508 sctx.shift_hi = 1.; 509 } 510 511 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 512 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 513 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 514 ics = ic; 515 516 do { 517 sctx.lushift = PETSC_FALSE; 518 for (i=0; i<n; i++){ 519 /* zero rtmp */ 520 /* L part */ 521 nz = bi[i+1] - bi[i]; 522 bjtmp = bj + bi[i]; 523 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 524 525 /* U part */ 526 nz = bi[2*n-i+1] - bi[2*n-i]; 527 bjtmp = bj + bi[2*n-i]; 528 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 529 530 /* load in initial (unfactored row) */ 531 nz = ai[r[i]+1] - ai[r[i]]; 532 ajtmp = aj + ai[r[i]]; 533 v = aa + ai[r[i]]; 534 for (j=0; j<nz; j++) { 535 rtmp[ics[ajtmp[j]]] = v[j]; 536 } 537 /* ZeropivotApply() */ 538 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 539 540 /* elimination */ 541 bjtmp = bj + bi[i]; 542 row = *bjtmp++; 543 nzL = bi[i+1] - bi[i]; 544 k = 0; 545 while (k < nzL) { 546 pc = rtmp + row; 547 if (*pc != 0.0) { 548 pv = b->a + bdiag[row]; 549 multiplier = *pc * (*pv); 550 *pc = multiplier; 551 pj = b->j + bi[2*n-row]; /* begining of U(row,:) */ 552 pv = b->a + bi[2*n-row]; 553 nz = bi[2*n-row+1] - bi[2*n-row] - 1; /* num of entries in U(row,:), excluding diag */ 554 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 555 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 556 } 557 row = *bjtmp++; k++; 558 } 559 560 /* finished row so stick it into b->a */ 561 rs = 0.0; 562 /* L part */ 563 pv = b->a + bi[i] ; 564 pj = b->j + bi[i] ; 565 nz = bi[i+1] - bi[i]; 566 for (j=0; j<nz; j++) { 567 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 568 } 569 570 /* U part */ 571 pv = b->a + bi[2*n-i]; 572 pj = b->j + bi[2*n-i]; 573 nz = bi[2*n-i+1] - bi[2*n-i] - 1; 574 for (j=0; j<nz; j++) { 575 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 576 } 577 578 /* ZeropivotCheck() */ 579 sctx.rs = rs; 580 sctx.pv = rtmp[i]; 581 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 582 if (newshift == 1) break; 583 584 /* Mark diagonal and invert diagonal for simplier triangular solves */ 585 pv = b->a + bdiag[i]; 586 *pv = 1.0/rtmp[i]; 587 588 } /* endof for (i=0; i<n; i++){ */ 589 590 /* ZeropivotRefine() */ 591 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){ 592 /* 593 * if no shift in this attempt & shifting & started shifting & can refine, 594 * then try lower shift 595 */ 596 sctx.shift_hi = sctx.shift_fraction; 597 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 598 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 599 sctx.lushift = PETSC_TRUE; 600 sctx.nshift++; 601 } 602 } while (sctx.lushift); 603 604 ierr = PetscFree(rtmp);CHKERRQ(ierr); 605 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 606 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 607 608 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 609 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 610 if (row_identity && col_identity) { 611 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 612 } else { 613 C->ops->solve = MatSolve_SeqAIJ_newdatastruct; 614 } 615 616 C->ops->solveadd = 0; 617 C->ops->solvetranspose = 0; 618 C->ops->solvetransposeadd = 0; 619 C->ops->matsolve = 0; 620 C->assembled = PETSC_TRUE; 621 C->preallocated = PETSC_TRUE; 622 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 623 624 /* ZeropivotView() */ 625 if (sctx.nshift){ 626 if (info->shiftpd) { 627 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 628 } else if (info->shiftnz) { 629 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 630 } 631 } 632 PetscFunctionReturn(0); 633 } 634 635 /* ----------------------------------------------------------- */ 636 extern PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct_v2(Mat,Vec,Vec); 637 /* extern PetscErrorCode MatSolve_SeqAIJ_newdatastruct(Mat,Vec,Vec); */ 638 639 #undef __FUNCT__ 640 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_newdatastruct_v2" 641 PetscErrorCode MatLUFactorNumeric_SeqAIJ_newdatastruct_v2(Mat B,Mat A,const MatFactorInfo *info) 642 { 643 Mat C=B; 644 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 645 IS isrow = b->row,isicol = b->icol; 646 PetscErrorCode ierr; 647 const PetscInt *r,*ic,*ics; 648 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 649 PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 650 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 651 PetscTruth row_identity,col_identity; 652 653 LUShift_Ctx sctx; 654 PetscInt *ddiag,newshift; 655 PetscReal rs; 656 MatScalar d; 657 658 PetscFunctionBegin; 659 /* ZeropivotSetUp(): initialize shift context sctx */ 660 sctx.nshift = 0; 661 sctx.nshift_max = 0; 662 sctx.shift_top = 0.0; 663 sctx.shift_lo = 0.0; 664 sctx.shift_hi = 0.0; 665 sctx.shift_fraction = 0.0; 666 sctx.shift_amount = 0.0; 667 668 /* if both shift schemes are chosen by user, only use info->shiftpd */ 669 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 670 ddiag = a->diag; 671 sctx.shift_top = info->zeropivot; 672 for (i=0; i<n; i++) { 673 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 674 d = (aa)[ddiag[i]]; 675 rs = -PetscAbsScalar(d) - PetscRealPart(d); 676 v = aa+ai[i]; 677 nz = ai[i+1] - ai[i]; 678 for (j=0; j<nz; j++) 679 rs += PetscAbsScalar(v[j]); 680 if (rs>sctx.shift_top) sctx.shift_top = rs; 681 } 682 sctx.shift_top *= 1.1; 683 sctx.nshift_max = 5; 684 sctx.shift_lo = 0.; 685 sctx.shift_hi = 1.; 686 } 687 688 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 689 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 690 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 691 ics = ic; 692 693 do { 694 sctx.lushift = PETSC_FALSE; 695 for (i=0; i<n; i++){ 696 /* zero rtmp */ 697 /* L part */ 698 nz = bi[i+1] - bi[i]; 699 bjtmp = bj + bi[i]; 700 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 701 702 /* U part */ 703 nz = bi[2*n-i+1] - bi[2*n-i]; 704 bjtmp = bj + bi[2*n-i]; 705 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 706 707 /* load in initial (unfactored row) */ 708 nz = ai[r[i]+1] - ai[r[i]]; 709 ajtmp = aj + ai[r[i]]; 710 v = aa + ai[r[i]]; 711 for (j=0; j<nz; j++) { 712 rtmp[ics[ajtmp[j]]] = v[j]; 713 } 714 /* ZeropivotApply() */ 715 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 716 717 /* elimination */ 718 bjtmp = bj + bi[i]; 719 row = *bjtmp++; 720 nzL = bi[i+1] - bi[i]; 721 k = 0; 722 while (k < nzL) { 723 pc = rtmp + row; 724 if (*pc != 0.0) { 725 pv = b->a + bdiag[row]; 726 multiplier = *pc * (*pv); 727 *pc = multiplier; 728 pj = b->j + bi[2*n-row]; /* begining of U(row,:) */ 729 pv = b->a + bi[2*n-row]; 730 nz = bi[2*n-row+1] - bi[2*n-row] - 1; /* num of entries in U(row,:), excluding diag */ 731 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 732 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 733 } 734 row = *bjtmp++; k++; 735 } 736 737 /* finished row so stick it into b->a */ 738 rs = 0.0; 739 /* L part */ 740 pv = b->a + bi[i] ; 741 pj = b->j + bi[i] ; 742 nz = bi[i+1] - bi[i]; 743 for (j=0; j<nz; j++) { 744 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 745 } 746 747 /* U part */ 748 pv = b->a + bi[2*n-i]; 749 pj = b->j + bi[2*n-i]; 750 nz = bi[2*n-i+1] - bi[2*n-i] - 1; 751 for (j=0; j<nz; j++) { 752 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 753 } 754 755 /* ZeropivotCheck() */ 756 sctx.rs = rs; 757 sctx.pv = rtmp[i]; 758 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 759 if (newshift == 1) break; 760 761 /* Mark diagonal and invert diagonal for simplier triangular solves */ 762 pv = b->a + bdiag[i]; 763 *pv = 1.0/rtmp[i]; 764 765 } /* endof for (i=0; i<n; i++){ */ 766 767 /* ZeropivotRefine() */ 768 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){ 769 /* 770 * if no shift in this attempt & shifting & started shifting & can refine, 771 * then try lower shift 772 */ 773 sctx.shift_hi = sctx.shift_fraction; 774 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 775 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 776 sctx.lushift = PETSC_TRUE; 777 sctx.nshift++; 778 } 779 } while (sctx.lushift); 780 781 ierr = PetscFree(rtmp);CHKERRQ(ierr); 782 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 783 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 784 785 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 786 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 787 if (row_identity && col_identity) { 788 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 789 } else { 790 C->ops->solve = MatSolve_SeqAIJ_newdatastruct; 791 } 792 793 C->ops->solveadd = 0; 794 C->ops->solvetranspose = 0; 795 C->ops->solvetransposeadd = 0; 796 C->ops->matsolve = 0; 797 C->assembled = PETSC_TRUE; 798 C->preallocated = PETSC_TRUE; 799 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 800 801 /* ZeropivotView() */ 802 if (sctx.nshift){ 803 if (info->shiftpd) { 804 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 805 } else if (info->shiftnz) { 806 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 807 } 808 } 809 PetscFunctionReturn(0); 810 } 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 814 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 815 { 816 Mat C=B; 817 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 818 IS isrow = b->row,isicol = b->icol; 819 PetscErrorCode ierr; 820 const PetscInt *r,*ic,*ics; 821 PetscInt nz,row,i,j,n=A->rmap->n,diag; 822 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 823 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 824 MatScalar *pv,*rtmp,*pc,multiplier,d; 825 const MatScalar *v,*aa=a->a; 826 PetscReal rs=0.0; 827 LUShift_Ctx sctx; 828 PetscInt newshift,*ddiag; 829 830 PetscFunctionBegin; 831 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 832 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 833 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 834 ics = ic; 835 836 /* initialize shift context sctx */ 837 sctx.nshift = 0; 838 sctx.nshift_max = 0; 839 sctx.shift_top = 0.0; 840 sctx.shift_lo = 0.0; 841 sctx.shift_hi = 0.0; 842 sctx.shift_fraction = 0.0; 843 sctx.shift_amount = 0.0; 844 845 /* if both shift schemes are chosen by user, only use info->shiftpd */ 846 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 847 ddiag = a->diag; 848 sctx.shift_top = info->zeropivot; 849 for (i=0; i<n; i++) { 850 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 851 d = (aa)[ddiag[i]]; 852 rs = -PetscAbsScalar(d) - PetscRealPart(d); 853 v = aa+ai[i]; 854 nz = ai[i+1] - ai[i]; 855 for (j=0; j<nz; j++) 856 rs += PetscAbsScalar(v[j]); 857 if (rs>sctx.shift_top) sctx.shift_top = rs; 858 } 859 sctx.shift_top *= 1.1; 860 sctx.nshift_max = 5; 861 sctx.shift_lo = 0.; 862 sctx.shift_hi = 1.; 863 } 864 865 do { 866 sctx.lushift = PETSC_FALSE; 867 for (i=0; i<n; i++){ 868 nz = bi[i+1] - bi[i]; 869 bjtmp = bj + bi[i]; 870 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 871 872 /* load in initial (unfactored row) */ 873 nz = ai[r[i]+1] - ai[r[i]]; 874 ajtmp = aj + ai[r[i]]; 875 v = aa + ai[r[i]]; 876 for (j=0; j<nz; j++) { 877 rtmp[ics[ajtmp[j]]] = v[j]; 878 } 879 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 880 /* if (sctx.shift_amount > 0.0) printf("row %d, shift %g\n",i,sctx.shift_amount); */ 881 882 row = *bjtmp++; 883 while (row < i) { 884 pc = rtmp + row; 885 if (*pc != 0.0) { 886 pv = b->a + diag_offset[row]; 887 pj = b->j + diag_offset[row] + 1; 888 multiplier = *pc / *pv++; 889 *pc = multiplier; 890 nz = bi[row+1] - diag_offset[row] - 1; 891 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 892 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 893 } 894 row = *bjtmp++; 895 } 896 /* finished row so stick it into b->a */ 897 pv = b->a + bi[i] ; 898 pj = b->j + bi[i] ; 899 nz = bi[i+1] - bi[i]; 900 diag = diag_offset[i] - bi[i]; 901 rs = 0.0; 902 for (j=0; j<nz; j++) { 903 pv[j] = rtmp[pj[j]]; 904 rs += PetscAbsScalar(pv[j]); 905 } 906 rs -= PetscAbsScalar(pv[diag]); 907 908 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 909 sctx.rs = rs; 910 sctx.pv = pv[diag]; 911 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 912 if (newshift == 1) break; 913 } 914 915 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 916 /* 917 * if no shift in this attempt & shifting & started shifting & can refine, 918 * then try lower shift 919 */ 920 sctx.shift_hi = sctx.shift_fraction; 921 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 922 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 923 sctx.lushift = PETSC_TRUE; 924 sctx.nshift++; 925 } 926 } while (sctx.lushift); 927 928 /* invert diagonal entries for simplier triangular solves */ 929 for (i=0; i<n; i++) { 930 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 931 } 932 ierr = PetscFree(rtmp);CHKERRQ(ierr); 933 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 934 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 935 if (b->inode.use) { 936 C->ops->solve = MatSolve_Inode; 937 } else { 938 PetscTruth row_identity, col_identity; 939 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 940 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 941 if (row_identity && col_identity) { 942 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 943 } else { 944 C->ops->solve = MatSolve_SeqAIJ; 945 } 946 } 947 C->ops->solveadd = MatSolveAdd_SeqAIJ; 948 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 949 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 950 C->ops->matsolve = MatMatSolve_SeqAIJ; 951 C->assembled = PETSC_TRUE; 952 C->preallocated = PETSC_TRUE; 953 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 954 if (sctx.nshift){ 955 if (info->shiftpd) { 956 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 957 } else if (info->shiftnz) { 958 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 959 } 960 } 961 PetscFunctionReturn(0); 962 } 963 964 /* 965 This routine implements inplace ILU(0) with row or/and column permutations. 966 Input: 967 A - original matrix 968 Output; 969 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 970 a->j (col index) is permuted by the inverse of colperm, then sorted 971 a->a reordered accordingly with a->j 972 a->diag (ptr to diagonal elements) is updated. 973 */ 974 #undef __FUNCT__ 975 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 976 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 977 { 978 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 979 IS isrow = a->row,isicol = a->icol; 980 PetscErrorCode ierr; 981 const PetscInt *r,*ic,*ics; 982 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 983 PetscInt *ajtmp,nz,row; 984 PetscInt *diag = a->diag,nbdiag,*pj; 985 PetscScalar *rtmp,*pc,multiplier,d; 986 MatScalar *v,*pv; 987 PetscReal rs; 988 LUShift_Ctx sctx; 989 PetscInt newshift; 990 991 PetscFunctionBegin; 992 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 993 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 994 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 995 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 996 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 997 ics = ic; 998 999 sctx.shift_top = 0; 1000 sctx.nshift_max = 0; 1001 sctx.shift_lo = 0; 1002 sctx.shift_hi = 0; 1003 sctx.shift_fraction = 0; 1004 1005 /* if both shift schemes are chosen by user, only use info->shiftpd */ 1006 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 1007 sctx.shift_top = 0; 1008 for (i=0; i<n; i++) { 1009 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 1010 d = (a->a)[diag[i]]; 1011 rs = -PetscAbsScalar(d) - PetscRealPart(d); 1012 v = a->a+ai[i]; 1013 nz = ai[i+1] - ai[i]; 1014 for (j=0; j<nz; j++) 1015 rs += PetscAbsScalar(v[j]); 1016 if (rs>sctx.shift_top) sctx.shift_top = rs; 1017 } 1018 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 1019 sctx.shift_top *= 1.1; 1020 sctx.nshift_max = 5; 1021 sctx.shift_lo = 0.; 1022 sctx.shift_hi = 1.; 1023 } 1024 1025 sctx.shift_amount = 0; 1026 sctx.nshift = 0; 1027 do { 1028 sctx.lushift = PETSC_FALSE; 1029 for (i=0; i<n; i++){ 1030 /* load in initial unfactored row */ 1031 nz = ai[r[i]+1] - ai[r[i]]; 1032 ajtmp = aj + ai[r[i]]; 1033 v = a->a + ai[r[i]]; 1034 /* sort permuted ajtmp and values v accordingly */ 1035 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 1036 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 1037 1038 diag[r[i]] = ai[r[i]]; 1039 for (j=0; j<nz; j++) { 1040 rtmp[ajtmp[j]] = v[j]; 1041 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 1042 } 1043 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 1044 1045 row = *ajtmp++; 1046 while (row < i) { 1047 pc = rtmp + row; 1048 if (*pc != 0.0) { 1049 pv = a->a + diag[r[row]]; 1050 pj = aj + diag[r[row]] + 1; 1051 1052 multiplier = *pc / *pv++; 1053 *pc = multiplier; 1054 nz = ai[r[row]+1] - diag[r[row]] - 1; 1055 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 1056 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 1057 } 1058 row = *ajtmp++; 1059 } 1060 /* finished row so overwrite it onto a->a */ 1061 pv = a->a + ai[r[i]] ; 1062 pj = aj + ai[r[i]] ; 1063 nz = ai[r[i]+1] - ai[r[i]]; 1064 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 1065 1066 rs = 0.0; 1067 for (j=0; j<nz; j++) { 1068 pv[j] = rtmp[pj[j]]; 1069 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 1070 } 1071 1072 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 1073 sctx.rs = rs; 1074 sctx.pv = pv[nbdiag]; 1075 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 1076 if (newshift == 1) break; 1077 } 1078 1079 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 1080 /* 1081 * if no shift in this attempt & shifting & started shifting & can refine, 1082 * then try lower shift 1083 */ 1084 sctx.shift_hi = sctx.shift_fraction; 1085 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 1086 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 1087 sctx.lushift = PETSC_TRUE; 1088 sctx.nshift++; 1089 } 1090 } while (sctx.lushift); 1091 1092 /* invert diagonal entries for simplier triangular solves */ 1093 for (i=0; i<n; i++) { 1094 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 1095 } 1096 1097 ierr = PetscFree(rtmp);CHKERRQ(ierr); 1098 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1099 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1100 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 1101 A->ops->solveadd = MatSolveAdd_SeqAIJ; 1102 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 1103 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 1104 A->assembled = PETSC_TRUE; 1105 A->preallocated = PETSC_TRUE; 1106 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 1107 if (sctx.nshift){ 1108 if (info->shiftpd) { 1109 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 1110 } else if (info->shiftnz) { 1111 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1112 } 1113 } 1114 PetscFunctionReturn(0); 1115 } 1116 1117 /* ----------------------------------------------------------- */ 1118 #undef __FUNCT__ 1119 #define __FUNCT__ "MatLUFactor_SeqAIJ" 1120 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 1121 { 1122 PetscErrorCode ierr; 1123 Mat C; 1124 1125 PetscFunctionBegin; 1126 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 1127 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 1128 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 1129 A->ops->solve = C->ops->solve; 1130 A->ops->solvetranspose = C->ops->solvetranspose; 1131 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 1132 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 /* ----------------------------------------------------------- */ 1136 1137 1138 #undef __FUNCT__ 1139 #define __FUNCT__ "MatSolve_SeqAIJ" 1140 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 1141 { 1142 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1143 IS iscol = a->col,isrow = a->row; 1144 PetscErrorCode ierr; 1145 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1146 PetscInt nz; 1147 const PetscInt *rout,*cout,*r,*c; 1148 PetscScalar *x,*tmp,*tmps,sum; 1149 const PetscScalar *b; 1150 const MatScalar *aa = a->a,*v; 1151 1152 PetscFunctionBegin; 1153 if (!n) PetscFunctionReturn(0); 1154 1155 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1156 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1157 tmp = a->solve_work; 1158 1159 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1160 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1161 1162 /* forward solve the lower triangular */ 1163 tmp[0] = b[*r++]; 1164 tmps = tmp; 1165 for (i=1; i<n; i++) { 1166 v = aa + ai[i] ; 1167 vi = aj + ai[i] ; 1168 nz = a->diag[i] - ai[i]; 1169 sum = b[*r++]; 1170 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1171 tmp[i] = sum; 1172 } 1173 1174 /* backward solve the upper triangular */ 1175 for (i=n-1; i>=0; i--){ 1176 v = aa + a->diag[i] + 1; 1177 vi = aj + a->diag[i] + 1; 1178 nz = ai[i+1] - a->diag[i] - 1; 1179 sum = tmp[i]; 1180 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1181 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1182 } 1183 1184 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1185 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1186 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1187 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1188 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 #undef __FUNCT__ 1193 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1194 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1195 { 1196 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1197 IS iscol = a->col,isrow = a->row; 1198 PetscErrorCode ierr; 1199 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1200 PetscInt nz,neq; 1201 const PetscInt *rout,*cout,*r,*c; 1202 PetscScalar *x,*b,*tmp,*tmps,sum; 1203 const MatScalar *aa = a->a,*v; 1204 PetscTruth bisdense,xisdense; 1205 1206 PetscFunctionBegin; 1207 if (!n) PetscFunctionReturn(0); 1208 1209 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1210 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1211 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1212 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1213 1214 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1215 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1216 1217 tmp = a->solve_work; 1218 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1219 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1220 1221 for (neq=0; neq<B->cmap->n; neq++){ 1222 /* forward solve the lower triangular */ 1223 tmp[0] = b[r[0]]; 1224 tmps = tmp; 1225 for (i=1; i<n; i++) { 1226 v = aa + ai[i] ; 1227 vi = aj + ai[i] ; 1228 nz = a->diag[i] - ai[i]; 1229 sum = b[r[i]]; 1230 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1231 tmp[i] = sum; 1232 } 1233 /* backward solve the upper triangular */ 1234 for (i=n-1; i>=0; i--){ 1235 v = aa + a->diag[i] + 1; 1236 vi = aj + a->diag[i] + 1; 1237 nz = ai[i+1] - a->diag[i] - 1; 1238 sum = tmp[i]; 1239 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1240 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1241 } 1242 1243 b += n; 1244 x += n; 1245 } 1246 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1247 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1248 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1249 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1250 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1251 PetscFunctionReturn(0); 1252 } 1253 1254 #undef __FUNCT__ 1255 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1256 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1257 { 1258 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1259 IS iscol = a->col,isrow = a->row; 1260 PetscErrorCode ierr; 1261 const PetscInt *r,*c,*rout,*cout; 1262 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1263 PetscInt nz,row; 1264 PetscScalar *x,*b,*tmp,*tmps,sum; 1265 const MatScalar *aa = a->a,*v; 1266 1267 PetscFunctionBegin; 1268 if (!n) PetscFunctionReturn(0); 1269 1270 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1271 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1272 tmp = a->solve_work; 1273 1274 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1275 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1276 1277 /* forward solve the lower triangular */ 1278 tmp[0] = b[*r++]; 1279 tmps = tmp; 1280 for (row=1; row<n; row++) { 1281 i = rout[row]; /* permuted row */ 1282 v = aa + ai[i] ; 1283 vi = aj + ai[i] ; 1284 nz = a->diag[i] - ai[i]; 1285 sum = b[*r++]; 1286 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1287 tmp[row] = sum; 1288 } 1289 1290 /* backward solve the upper triangular */ 1291 for (row=n-1; row>=0; row--){ 1292 i = rout[row]; /* permuted row */ 1293 v = aa + a->diag[i] + 1; 1294 vi = aj + a->diag[i] + 1; 1295 nz = ai[i+1] - a->diag[i] - 1; 1296 sum = tmp[row]; 1297 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1298 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1299 } 1300 1301 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1302 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1303 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1304 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1305 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 /* ----------------------------------------------------------- */ 1310 #include "../src/mat/impls/aij/seq/ftn-kernels/fsolve.h" 1311 #undef __FUNCT__ 1312 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 1313 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 1314 { 1315 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1316 PetscErrorCode ierr; 1317 PetscInt n = A->rmap->n; 1318 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1319 PetscScalar *x; 1320 const PetscScalar *b; 1321 const MatScalar *aa = a->a; 1322 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1323 PetscInt adiag_i,i,nz,ai_i; 1324 const PetscInt *vi; 1325 const MatScalar *v; 1326 PetscScalar sum; 1327 #endif 1328 1329 PetscFunctionBegin; 1330 if (!n) PetscFunctionReturn(0); 1331 1332 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1333 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1334 1335 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1336 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1337 #else 1338 /* forward solve the lower triangular */ 1339 x[0] = b[0]; 1340 for (i=1; i<n; i++) { 1341 ai_i = ai[i]; 1342 v = aa + ai_i; 1343 vi = aj + ai_i; 1344 nz = adiag[i] - ai_i; 1345 sum = b[i]; 1346 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1347 x[i] = sum; 1348 } 1349 1350 /* backward solve the upper triangular */ 1351 for (i=n-1; i>=0; i--){ 1352 adiag_i = adiag[i]; 1353 v = aa + adiag_i + 1; 1354 vi = aj + adiag_i + 1; 1355 nz = ai[i+1] - adiag_i - 1; 1356 sum = x[i]; 1357 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1358 x[i] = sum*aa[adiag_i]; 1359 } 1360 #endif 1361 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1362 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1363 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1364 PetscFunctionReturn(0); 1365 } 1366 1367 #undef __FUNCT__ 1368 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1369 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1370 { 1371 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1372 IS iscol = a->col,isrow = a->row; 1373 PetscErrorCode ierr; 1374 PetscInt i, n = A->rmap->n,j; 1375 PetscInt nz; 1376 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1377 PetscScalar *x,*tmp,sum; 1378 const PetscScalar *b; 1379 const MatScalar *aa = a->a,*v; 1380 1381 PetscFunctionBegin; 1382 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1383 1384 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1385 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1386 tmp = a->solve_work; 1387 1388 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1389 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1390 1391 /* forward solve the lower triangular */ 1392 tmp[0] = b[*r++]; 1393 for (i=1; i<n; i++) { 1394 v = aa + ai[i] ; 1395 vi = aj + ai[i] ; 1396 nz = a->diag[i] - ai[i]; 1397 sum = b[*r++]; 1398 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1399 tmp[i] = sum; 1400 } 1401 1402 /* backward solve the upper triangular */ 1403 for (i=n-1; i>=0; i--){ 1404 v = aa + a->diag[i] + 1; 1405 vi = aj + a->diag[i] + 1; 1406 nz = ai[i+1] - a->diag[i] - 1; 1407 sum = tmp[i]; 1408 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1409 tmp[i] = sum*aa[a->diag[i]]; 1410 x[*c--] += tmp[i]; 1411 } 1412 1413 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1414 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1415 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1416 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1417 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1418 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1424 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1425 { 1426 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1427 IS iscol = a->col,isrow = a->row; 1428 PetscErrorCode ierr; 1429 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1430 PetscInt i,n = A->rmap->n,j; 1431 PetscInt nz; 1432 PetscScalar *x,*tmp,s1; 1433 const MatScalar *aa = a->a,*v; 1434 const PetscScalar *b; 1435 1436 PetscFunctionBegin; 1437 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1438 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1439 tmp = a->solve_work; 1440 1441 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1442 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1443 1444 /* copy the b into temp work space according to permutation */ 1445 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1446 1447 /* forward solve the U^T */ 1448 for (i=0; i<n; i++) { 1449 v = aa + diag[i] ; 1450 vi = aj + diag[i] + 1; 1451 nz = ai[i+1] - diag[i] - 1; 1452 s1 = tmp[i]; 1453 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1454 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1455 tmp[i] = s1; 1456 } 1457 1458 /* backward solve the L^T */ 1459 for (i=n-1; i>=0; i--){ 1460 v = aa + diag[i] - 1 ; 1461 vi = aj + diag[i] - 1 ; 1462 nz = diag[i] - ai[i]; 1463 s1 = tmp[i]; 1464 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1465 } 1466 1467 /* copy tmp into x according to permutation */ 1468 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1469 1470 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1471 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1472 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1473 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1474 1475 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1476 PetscFunctionReturn(0); 1477 } 1478 1479 #undef __FUNCT__ 1480 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1481 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1482 { 1483 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1484 IS iscol = a->col,isrow = a->row; 1485 PetscErrorCode ierr; 1486 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1487 PetscInt i,n = A->rmap->n,j; 1488 PetscInt nz; 1489 PetscScalar *x,*tmp,s1; 1490 const MatScalar *aa = a->a,*v; 1491 const PetscScalar *b; 1492 1493 PetscFunctionBegin; 1494 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1495 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1496 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1497 tmp = a->solve_work; 1498 1499 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1500 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1501 1502 /* copy the b into temp work space according to permutation */ 1503 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1504 1505 /* forward solve the U^T */ 1506 for (i=0; i<n; i++) { 1507 v = aa + diag[i] ; 1508 vi = aj + diag[i] + 1; 1509 nz = ai[i+1] - diag[i] - 1; 1510 s1 = tmp[i]; 1511 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1512 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1513 tmp[i] = s1; 1514 } 1515 1516 /* backward solve the L^T */ 1517 for (i=n-1; i>=0; i--){ 1518 v = aa + diag[i] - 1 ; 1519 vi = aj + diag[i] - 1 ; 1520 nz = diag[i] - ai[i]; 1521 s1 = tmp[i]; 1522 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1523 } 1524 1525 /* copy tmp into x according to permutation */ 1526 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1527 1528 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1529 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1530 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1531 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1532 1533 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1534 PetscFunctionReturn(0); 1535 } 1536 1537 /* ----------------------------------------------------------------*/ 1538 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1539 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscTruth); 1540 1541 /* 1542 ilu(0) with natural ordering under new data structure. 1543 Factored arrays bj and ba are stored as 1544 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1545 1546 bi=fact->i is an array of size 2n+2, in which 1547 bi+ 1548 bi[i] -> 1st entry of L(i,:),i=0,...,i-1 1549 bi[n] -> points to L(n-1,:)+1 1550 bi[n+1] -> 1st entry of U(n-1,:) 1551 bi[2n-i] -> 1st entry of U(i,:) 1552 bi[2n-i+1] -> end of U(i,:)+1, the 1st entry of U(i-1,:) 1553 bi[2n] -> 1st entry of U(0,:) 1554 bi[2n+1] -> points to U(0,:)+1 1555 1556 U(i,:) contains diag[i] as its last entry, i.e., 1557 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1558 */ 1559 #undef __FUNCT__ 1560 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct" 1561 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1562 { 1563 1564 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1565 PetscErrorCode ierr; 1566 PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1567 PetscInt i,j,nz,*bi,*bj,*bdiag; 1568 1569 PetscFunctionBegin; 1570 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1571 b = (Mat_SeqAIJ*)(fact)->data; 1572 1573 /* allocate matrix arrays for new data structure */ 1574 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,2*n+2,PetscInt,&b->i);CHKERRQ(ierr); 1575 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(2*n+2)*sizeof(PetscInt));CHKERRQ(ierr); 1576 b->singlemalloc = PETSC_TRUE; 1577 if (!b->diag){ 1578 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1579 } 1580 bdiag = b->diag; 1581 1582 if (n > 0) { 1583 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1584 } 1585 1586 /* set bi and bj with new data structure */ 1587 bi = b->i; 1588 bj = b->j; 1589 1590 /* L part */ 1591 bi[0] = 0; 1592 for (i=0; i<n; i++){ 1593 nz = adiag[i] - ai[i]; 1594 bi[i+1] = bi[i] + nz; 1595 aj = a->j + ai[i]; 1596 for (j=0; j<nz; j++){ 1597 *bj = aj[j]; bj++; 1598 } 1599 } 1600 1601 /* U part */ 1602 bi[n+1] = bi[n]; 1603 for (i=n-1; i>=0; i--){ 1604 nz = ai[i+1] - adiag[i] - 1; 1605 bi[2*n-i+1] = bi[2*n-i] + nz + 1; 1606 aj = a->j + adiag[i] + 1; 1607 for (j=0; j<nz; j++){ 1608 *bj = aj[j]; bj++; 1609 } 1610 /* diag[i] */ 1611 *bj = i; bj++; 1612 bdiag[i] = bi[2*n-i+1]-1; 1613 } 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #undef __FUNCT__ 1618 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_newdatastruct" 1619 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_newdatastruct(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1620 { 1621 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1622 IS isicol; 1623 PetscErrorCode ierr; 1624 const PetscInt *r,*ic; 1625 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1626 PetscInt *bi,*cols,nnz,*cols_lvl; 1627 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1628 PetscInt i,levels,diagonal_fill; 1629 PetscTruth col_identity,row_identity; 1630 PetscReal f; 1631 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1632 PetscBT lnkbt; 1633 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1634 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1635 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1636 PetscTruth missing; 1637 1638 PetscFunctionBegin; 1639 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1640 f = info->fill; 1641 levels = (PetscInt)info->levels; 1642 diagonal_fill = (PetscInt)info->diagonal_fill; 1643 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1644 1645 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1646 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1647 1648 if (!levels && row_identity && col_identity) { 1649 /* special case: ilu(0) with natural ordering */ 1650 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1651 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 1652 1653 fact->factor = MAT_FACTOR_ILU; 1654 (fact)->info.factor_mallocs = 0; 1655 (fact)->info.fill_ratio_given = info->fill; 1656 (fact)->info.fill_ratio_needed = 1.0; 1657 b = (Mat_SeqAIJ*)(fact)->data; 1658 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1659 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1660 b->row = isrow; 1661 b->col = iscol; 1662 b->icol = isicol; 1663 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1664 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1665 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1666 /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */ 1667 PetscFunctionReturn(0); 1668 } 1669 1670 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1671 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1672 1673 /* get new row pointers */ 1674 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1675 bi[0] = 0; 1676 /* bdiag is location of diagonal in factor */ 1677 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1678 bdiag[0] = 0; 1679 1680 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1681 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1682 1683 /* create a linked list for storing column indices of the active row */ 1684 nlnk = n + 1; 1685 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1686 1687 /* initial FreeSpace size is f*(ai[n]+1) */ 1688 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1689 current_space = free_space; 1690 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1691 current_space_lvl = free_space_lvl; 1692 1693 for (i=0; i<n; i++) { 1694 nzi = 0; 1695 /* copy current row into linked list */ 1696 nnz = ai[r[i]+1] - ai[r[i]]; 1697 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1698 cols = aj + ai[r[i]]; 1699 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1700 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1701 nzi += nlnk; 1702 1703 /* make sure diagonal entry is included */ 1704 if (diagonal_fill && lnk[i] == -1) { 1705 fm = n; 1706 while (lnk[fm] < i) fm = lnk[fm]; 1707 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1708 lnk[fm] = i; 1709 lnk_lvl[i] = 0; 1710 nzi++; dcount++; 1711 } 1712 1713 /* add pivot rows into the active row */ 1714 nzbd = 0; 1715 prow = lnk[n]; 1716 while (prow < i) { 1717 nnz = bdiag[prow]; 1718 cols = bj_ptr[prow] + nnz + 1; 1719 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1720 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1721 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1722 nzi += nlnk; 1723 prow = lnk[prow]; 1724 nzbd++; 1725 } 1726 bdiag[i] = nzbd; 1727 bi[i+1] = bi[i] + nzi; 1728 1729 /* if free space is not available, make more free space */ 1730 if (current_space->local_remaining<nzi) { 1731 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1732 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1733 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1734 reallocs++; 1735 } 1736 1737 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1738 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1739 bj_ptr[i] = current_space->array; 1740 bjlvl_ptr[i] = current_space_lvl->array; 1741 1742 /* make sure the active row i has diagonal entry */ 1743 if (*(bj_ptr[i]+bdiag[i]) != i) { 1744 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1745 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1746 } 1747 1748 current_space->array += nzi; 1749 current_space->local_used += nzi; 1750 current_space->local_remaining -= nzi; 1751 current_space_lvl->array += nzi; 1752 current_space_lvl->local_used += nzi; 1753 current_space_lvl->local_remaining -= nzi; 1754 } 1755 1756 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1757 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1758 1759 /* destroy list of free space and other temporary arrays */ 1760 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1761 1762 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1763 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1764 1765 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1766 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1767 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1768 1769 #if defined(PETSC_USE_INFO) 1770 { 1771 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1772 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1773 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1774 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1775 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1776 if (diagonal_fill) { 1777 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1778 } 1779 } 1780 #endif 1781 1782 /* put together the new matrix */ 1783 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1784 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1785 b = (Mat_SeqAIJ*)(fact)->data; 1786 b->free_a = PETSC_TRUE; 1787 b->free_ij = PETSC_TRUE; 1788 b->singlemalloc = PETSC_FALSE; 1789 ierr = PetscMalloc( (bi[2*n+1] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1790 b->j = bj; 1791 b->i = bi; 1792 b->diag = bdiag; 1793 b->ilen = 0; 1794 b->imax = 0; 1795 b->row = isrow; 1796 b->col = iscol; 1797 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1798 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1799 b->icol = isicol; 1800 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1801 /* In b structure: Free imax, ilen, old a, old j. 1802 Allocate bdiag, solve_work, new a, new j */ 1803 ierr = PetscLogObjectMemory(fact,bi[2*n+1] * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1804 b->maxnz = b->nz = bi[2*n+1] ; 1805 (fact)->info.factor_mallocs = reallocs; 1806 (fact)->info.fill_ratio_given = f; 1807 (fact)->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]); 1808 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 1809 /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */ 1810 PetscFunctionReturn(0); 1811 } 1812 1813 1814 /* 1815 ilu(0) with natural ordering under revised new data structure. 1816 Factored arrays bj and ba are stored as 1817 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1818 1819 bi=fact->i is an array of size n+1, in which 1820 bi+ 1821 bi[i] -> 1st entry of L(i,:),i=0,...,i-1 1822 bi[n] -> points to L(n-1,:)+1 1823 bi[n+1] -> 1st entry of U(n-1,:) 1824 bdiag=fact->diag is an array of size n+1,in which 1825 bdiag[0] -> points to diagonal of U(n-1,:) 1826 bdiag[n-1] -> points to diagonal of U(0,:) 1827 1828 U(i,:) contains bdiag[i] as its last entry, i.e., 1829 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1830 */ 1831 #undef __FUNCT__ 1832 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct_v2" 1833 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct_v2(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1834 { 1835 1836 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1837 PetscErrorCode ierr; 1838 PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1839 PetscInt i,j,nz,*bi,*bj,*bdiag,bi_temp; 1840 1841 PetscFunctionBegin; 1842 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1843 b = (Mat_SeqAIJ*)(fact)->data; 1844 1845 /* allocate matrix arrays for new data structure */ 1846 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1847 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1848 b->singlemalloc = PETSC_TRUE; 1849 if (!b->diag){ 1850 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1851 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1852 } 1853 bdiag = b->diag; 1854 1855 if (n > 0) { 1856 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1857 } 1858 1859 /* set bi and bj with new data structure */ 1860 bi = b->i; 1861 bj = b->j; 1862 1863 /* L part */ 1864 bi[0] = 0; 1865 for (i=0; i<n; i++){ 1866 nz = adiag[i] - ai[i]; 1867 bi[i+1] = bi[i] + nz; 1868 aj = a->j + ai[i]; 1869 for (j=0; j<nz; j++){ 1870 *bj = aj[j]; bj++; 1871 } 1872 } 1873 1874 /* U part */ 1875 /* bi[n+1] = bi[n]; */ 1876 bi_temp = bi[n]; 1877 for (i=n-1; i>=0; i--){ 1878 nz = ai[i+1] - adiag[i] - 1; 1879 /* bi[2*n-i+1] = bi[2*n-i] + nz + 1; */ 1880 bi_temp = bi_temp + nz + 1; 1881 aj = a->j + adiag[i] + 1; 1882 for (j=0; j<nz; j++){ 1883 *bj = aj[j]; bj++; 1884 } 1885 /* diag[i] */ 1886 *bj = i; bj++; 1887 /* bdiag[i] = bi[2*n-i+1]-1; */ 1888 bdiag[i] = bi_temp - 1; 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_newdatastruct_v2" 1895 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_newdatastruct_v2(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1896 { 1897 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1898 IS isicol; 1899 PetscErrorCode ierr; 1900 const PetscInt *r,*ic; 1901 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1902 PetscInt *bi,*cols,nnz,*cols_lvl; 1903 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1904 PetscInt i,levels,diagonal_fill; 1905 PetscTruth col_identity,row_identity; 1906 PetscReal f; 1907 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1908 PetscBT lnkbt; 1909 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1910 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1911 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1912 PetscTruth missing; 1913 1914 PetscFunctionBegin; 1915 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1916 f = info->fill; 1917 levels = (PetscInt)info->levels; 1918 diagonal_fill = (PetscInt)info->diagonal_fill; 1919 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1920 1921 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1922 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1923 1924 if (!levels && row_identity && col_identity) { 1925 /* special case: ilu(0) with natural ordering */ 1926 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct_v2(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1927 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct_v2; 1928 1929 fact->factor = MAT_FACTOR_ILU; 1930 (fact)->info.factor_mallocs = 0; 1931 (fact)->info.fill_ratio_given = info->fill; 1932 (fact)->info.fill_ratio_needed = 1.0; 1933 b = (Mat_SeqAIJ*)(fact)->data; 1934 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1935 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1936 b->row = isrow; 1937 b->col = iscol; 1938 b->icol = isicol; 1939 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1940 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1941 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1942 /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */ 1943 PetscFunctionReturn(0); 1944 } 1945 1946 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1947 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1948 1949 /* get new row pointers */ 1950 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1951 bi[0] = 0; 1952 /* bdiag is location of diagonal in factor */ 1953 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1954 bdiag[0] = 0; 1955 1956 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1957 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1958 1959 /* create a linked list for storing column indices of the active row */ 1960 nlnk = n + 1; 1961 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1962 1963 /* initial FreeSpace size is f*(ai[n]+1) */ 1964 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1965 current_space = free_space; 1966 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1967 current_space_lvl = free_space_lvl; 1968 1969 for (i=0; i<n; i++) { 1970 nzi = 0; 1971 /* copy current row into linked list */ 1972 nnz = ai[r[i]+1] - ai[r[i]]; 1973 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1974 cols = aj + ai[r[i]]; 1975 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1976 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1977 nzi += nlnk; 1978 1979 /* make sure diagonal entry is included */ 1980 if (diagonal_fill && lnk[i] == -1) { 1981 fm = n; 1982 while (lnk[fm] < i) fm = lnk[fm]; 1983 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1984 lnk[fm] = i; 1985 lnk_lvl[i] = 0; 1986 nzi++; dcount++; 1987 } 1988 1989 /* add pivot rows into the active row */ 1990 nzbd = 0; 1991 prow = lnk[n]; 1992 while (prow < i) { 1993 nnz = bdiag[prow]; 1994 cols = bj_ptr[prow] + nnz + 1; 1995 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1996 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1997 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1998 nzi += nlnk; 1999 prow = lnk[prow]; 2000 nzbd++; 2001 } 2002 bdiag[i] = nzbd; 2003 bi[i+1] = bi[i] + nzi; 2004 2005 /* if free space is not available, make more free space */ 2006 if (current_space->local_remaining<nzi) { 2007 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 2008 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2009 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2010 reallocs++; 2011 } 2012 2013 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2014 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2015 bj_ptr[i] = current_space->array; 2016 bjlvl_ptr[i] = current_space_lvl->array; 2017 2018 /* make sure the active row i has diagonal entry */ 2019 if (*(bj_ptr[i]+bdiag[i]) != i) { 2020 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 2021 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2022 } 2023 2024 current_space->array += nzi; 2025 current_space->local_used += nzi; 2026 current_space->local_remaining -= nzi; 2027 current_space_lvl->array += nzi; 2028 current_space_lvl->local_used += nzi; 2029 current_space_lvl->local_remaining -= nzi; 2030 } 2031 2032 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2033 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2034 2035 /* destroy list of free space and other temporary arrays */ 2036 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2037 2038 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 2039 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 2040 2041 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2042 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2043 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 2044 2045 #if defined(PETSC_USE_INFO) 2046 { 2047 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2048 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2049 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2050 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2051 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2052 if (diagonal_fill) { 2053 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2054 } 2055 } 2056 #endif 2057 2058 /* put together the new matrix */ 2059 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2060 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2061 b = (Mat_SeqAIJ*)(fact)->data; 2062 b->free_a = PETSC_TRUE; 2063 b->free_ij = PETSC_TRUE; 2064 b->singlemalloc = PETSC_FALSE; 2065 ierr = PetscMalloc( (bi[2*n+1] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2066 b->j = bj; 2067 b->i = bi; 2068 b->diag = bdiag; 2069 b->ilen = 0; 2070 b->imax = 0; 2071 b->row = isrow; 2072 b->col = iscol; 2073 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2074 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2075 b->icol = isicol; 2076 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2077 /* In b structure: Free imax, ilen, old a, old j. 2078 Allocate bdiag, solve_work, new a, new j */ 2079 ierr = PetscLogObjectMemory(fact,bi[2*n+1] * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2080 b->maxnz = b->nz = bi[2*n+1] ; 2081 (fact)->info.factor_mallocs = reallocs; 2082 (fact)->info.fill_ratio_given = f; 2083 (fact)->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]); 2084 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 2085 /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */ 2086 PetscFunctionReturn(0); 2087 } 2088 2089 #undef __FUNCT__ 2090 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 2091 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 2092 { 2093 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 2094 IS isicol; 2095 PetscErrorCode ierr; 2096 const PetscInt *r,*ic; 2097 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 2098 PetscInt *bi,*cols,nnz,*cols_lvl; 2099 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 2100 PetscInt i,levels,diagonal_fill; 2101 PetscTruth col_identity,row_identity; 2102 PetscReal f; 2103 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 2104 PetscBT lnkbt; 2105 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 2106 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2107 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2108 PetscTruth missing; 2109 PetscTruth newdatastruct=PETSC_FALSE,newdatastruct_v2=PETSC_FALSE; 2110 2111 PetscFunctionBegin; 2112 ierr = PetscOptionsGetTruth(PETSC_NULL,"-ilu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 2113 if (newdatastruct){ 2114 ierr = MatILUFactorSymbolic_SeqAIJ_newdatastruct(fact,A,isrow,iscol,info);CHKERRQ(ierr); 2115 PetscFunctionReturn(0); 2116 } 2117 ierr = PetscOptionsGetTruth(PETSC_NULL,"-ilu_new_v2",&newdatastruct_v2,PETSC_NULL);CHKERRQ(ierr); 2118 if(newdatastruct_v2){ 2119 ierr = MatILUFactorSymbolic_SeqAIJ_newdatastruct_v2(fact,A,isrow,iscol,info);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2124 f = info->fill; 2125 levels = (PetscInt)info->levels; 2126 diagonal_fill = (PetscInt)info->diagonal_fill; 2127 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 2128 2129 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 2130 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 2131 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 2132 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 2133 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 2134 2135 fact->factor = MAT_FACTOR_ILU; 2136 (fact)->info.factor_mallocs = 0; 2137 (fact)->info.fill_ratio_given = info->fill; 2138 (fact)->info.fill_ratio_needed = 1.0; 2139 b = (Mat_SeqAIJ*)(fact)->data; 2140 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2141 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2142 b->row = isrow; 2143 b->col = iscol; 2144 b->icol = isicol; 2145 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2146 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2147 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2148 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 2149 PetscFunctionReturn(0); 2150 } 2151 2152 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2153 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 2154 2155 /* get new row pointers */ 2156 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 2157 bi[0] = 0; 2158 /* bdiag is location of diagonal in factor */ 2159 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 2160 bdiag[0] = 0; 2161 2162 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 2163 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 2164 2165 /* create a linked list for storing column indices of the active row */ 2166 nlnk = n + 1; 2167 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2168 2169 /* initial FreeSpace size is f*(ai[n]+1) */ 2170 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 2171 current_space = free_space; 2172 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 2173 current_space_lvl = free_space_lvl; 2174 2175 for (i=0; i<n; i++) { 2176 nzi = 0; 2177 /* copy current row into linked list */ 2178 nnz = ai[r[i]+1] - ai[r[i]]; 2179 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 2180 cols = aj + ai[r[i]]; 2181 lnk[i] = -1; /* marker to indicate if diagonal exists */ 2182 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2183 nzi += nlnk; 2184 2185 /* make sure diagonal entry is included */ 2186 if (diagonal_fill && lnk[i] == -1) { 2187 fm = n; 2188 while (lnk[fm] < i) fm = lnk[fm]; 2189 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 2190 lnk[fm] = i; 2191 lnk_lvl[i] = 0; 2192 nzi++; dcount++; 2193 } 2194 2195 /* add pivot rows into the active row */ 2196 nzbd = 0; 2197 prow = lnk[n]; 2198 while (prow < i) { 2199 nnz = bdiag[prow]; 2200 cols = bj_ptr[prow] + nnz + 1; 2201 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 2202 nnz = bi[prow+1] - bi[prow] - nnz - 1; 2203 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 2204 nzi += nlnk; 2205 prow = lnk[prow]; 2206 nzbd++; 2207 } 2208 bdiag[i] = nzbd; 2209 bi[i+1] = bi[i] + nzi; 2210 2211 /* if free space is not available, make more free space */ 2212 if (current_space->local_remaining<nzi) { 2213 nnz = nzi*(n - i); /* estimated and max additional space needed */ 2214 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2215 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2216 reallocs++; 2217 } 2218 2219 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2220 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2221 bj_ptr[i] = current_space->array; 2222 bjlvl_ptr[i] = current_space_lvl->array; 2223 2224 /* make sure the active row i has diagonal entry */ 2225 if (*(bj_ptr[i]+bdiag[i]) != i) { 2226 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 2227 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2228 } 2229 2230 current_space->array += nzi; 2231 current_space->local_used += nzi; 2232 current_space->local_remaining -= nzi; 2233 current_space_lvl->array += nzi; 2234 current_space_lvl->local_used += nzi; 2235 current_space_lvl->local_remaining -= nzi; 2236 } 2237 2238 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2239 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2240 2241 /* destroy list of free space and other temporary arrays */ 2242 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2243 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2244 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2245 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2246 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 2247 2248 #if defined(PETSC_USE_INFO) 2249 { 2250 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2251 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2252 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2253 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2254 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2255 if (diagonal_fill) { 2256 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2257 } 2258 } 2259 #endif 2260 2261 /* put together the new matrix */ 2262 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2263 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2264 b = (Mat_SeqAIJ*)(fact)->data; 2265 b->free_a = PETSC_TRUE; 2266 b->free_ij = PETSC_TRUE; 2267 b->singlemalloc = PETSC_FALSE; 2268 ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2269 b->j = bj; 2270 b->i = bi; 2271 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2272 b->diag = bdiag; 2273 b->ilen = 0; 2274 b->imax = 0; 2275 b->row = isrow; 2276 b->col = iscol; 2277 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2278 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2279 b->icol = isicol; 2280 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2281 /* In b structure: Free imax, ilen, old a, old j. 2282 Allocate bdiag, solve_work, new a, new j */ 2283 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2284 b->maxnz = b->nz = bi[n] ; 2285 (fact)->info.factor_mallocs = reallocs; 2286 (fact)->info.fill_ratio_given = f; 2287 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2288 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 2289 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_newdatastruct" 2295 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_newdatastruct(Mat B,Mat A,const MatFactorInfo *info) 2296 { 2297 Mat C = B; 2298 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2299 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2300 IS ip=b->row,iip = b->icol; 2301 PetscErrorCode ierr; 2302 const PetscInt *rip,*riip; 2303 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2304 PetscInt *ai=a->i,*aj=a->j; 2305 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2306 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2307 PetscReal zeropivot,shiftnz; 2308 PetscReal shiftpd; 2309 PetscTruth perm_identity; 2310 2311 PetscFunctionBegin; 2312 2313 shiftnz = info->shiftnz; 2314 shiftpd = info->shiftpd; 2315 zeropivot = info->zeropivot; 2316 2317 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2318 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2319 2320 /* allocate working arrays 2321 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2322 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2323 */ 2324 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 2325 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 2326 c2r = il + mbs; 2327 rtmp = (MatScalar*)(c2r + mbs); 2328 2329 for (i=0; i<mbs; i++) { 2330 c2r[i] = mbs; 2331 } 2332 il[0] = 0; 2333 2334 for (k = 0; k<mbs; k++){ 2335 /* zero rtmp */ 2336 nz = bi[k+1] - bi[k]; 2337 bjtmp = bj + bi[k]; 2338 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2339 2340 /* load in initial unfactored row */ 2341 bval = ba + bi[k]; 2342 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2343 for (j = jmin; j < jmax; j++){ 2344 col = riip[aj[j]]; 2345 if (col >= k){ /* only take upper triangular entry */ 2346 rtmp[col] = aa[j]; 2347 *bval++ = 0.0; /* for in-place factorization */ 2348 } 2349 } 2350 /* shift the diagonal of the matrix */ 2351 2352 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2353 dk = rtmp[k]; 2354 i = c2r[k]; /* first row to be added to k_th row */ 2355 2356 while (i < k){ 2357 nexti = c2r[i]; /* next row to be added to k_th row */ 2358 2359 /* compute multiplier, update diag(k) and U(i,k) */ 2360 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2361 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2362 dk += uikdi*ba[ili]; /* update diag[k] */ 2363 ba[ili] = uikdi; /* -U(i,k) */ 2364 2365 /* add multiple of row i to k-th row */ 2366 jmin = ili + 1; jmax = bi[i+1]; 2367 if (jmin < jmax){ 2368 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2369 /* update il and c2r for row i */ 2370 il[i] = jmin; 2371 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2372 } 2373 i = nexti; 2374 } 2375 2376 /* copy data into U(k,:) */ 2377 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2378 jmin = bi[k]; jmax = bi[k+1]-1; 2379 if (jmin < jmax) { 2380 for (j=jmin; j<jmax; j++){ 2381 col = bj[j]; ba[j] = rtmp[col]; 2382 } 2383 /* add the k-th row into il and c2r */ 2384 il[k] = jmin; 2385 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2386 } 2387 } 2388 2389 ierr = PetscFree(il);CHKERRQ(ierr); 2390 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2391 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2392 2393 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2394 if (perm_identity){ 2395 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_newdatastruct; 2396 (B)->ops->solvetranspose = 0; 2397 (B)->ops->forwardsolve = 0; 2398 (B)->ops->backwardsolve = 0; 2399 } else { 2400 (B)->ops->solve = MatSolve_SeqSBAIJ_1_newdatastruct; 2401 (B)->ops->solvetranspose = 0; 2402 (B)->ops->forwardsolve = 0; 2403 (B)->ops->backwardsolve = 0; 2404 } 2405 2406 C->assembled = PETSC_TRUE; 2407 C->preallocated = PETSC_TRUE; 2408 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2409 PetscFunctionReturn(0); 2410 } 2411 2412 #undef __FUNCT__ 2413 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2414 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2415 { 2416 Mat C = B; 2417 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2418 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2419 IS ip=b->row,iip = b->icol; 2420 PetscErrorCode ierr; 2421 const PetscInt *rip,*riip; 2422 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 2423 PetscInt *ai=a->i,*aj=a->j; 2424 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2425 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2426 PetscReal zeropivot,rs,shiftnz; 2427 PetscReal shiftpd; 2428 ChShift_Ctx sctx; 2429 PetscInt newshift; 2430 PetscTruth perm_identity; 2431 2432 PetscFunctionBegin; 2433 shiftnz = info->shiftnz; 2434 shiftpd = info->shiftpd; 2435 zeropivot = info->zeropivot; 2436 2437 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2438 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2439 2440 /* initialization */ 2441 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 2442 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 2443 jl = il + mbs; 2444 rtmp = (MatScalar*)(jl + mbs); 2445 2446 sctx.shift_amount = 0; 2447 sctx.nshift = 0; 2448 do { 2449 sctx.chshift = PETSC_FALSE; 2450 for (i=0; i<mbs; i++) { 2451 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 2452 } 2453 2454 for (k = 0; k<mbs; k++){ 2455 bval = ba + bi[k]; 2456 /* initialize k-th row by the perm[k]-th row of A */ 2457 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2458 for (j = jmin; j < jmax; j++){ 2459 col = riip[aj[j]]; 2460 if (col >= k){ /* only take upper triangular entry */ 2461 rtmp[col] = aa[j]; 2462 *bval++ = 0.0; /* for in-place factorization */ 2463 } 2464 } 2465 /* shift the diagonal of the matrix */ 2466 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2467 2468 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2469 dk = rtmp[k]; 2470 i = jl[k]; /* first row to be added to k_th row */ 2471 2472 while (i < k){ 2473 nexti = jl[i]; /* next row to be added to k_th row */ 2474 2475 /* compute multiplier, update diag(k) and U(i,k) */ 2476 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2477 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2478 dk += uikdi*ba[ili]; 2479 ba[ili] = uikdi; /* -U(i,k) */ 2480 2481 /* add multiple of row i to k-th row */ 2482 jmin = ili + 1; jmax = bi[i+1]; 2483 if (jmin < jmax){ 2484 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2485 /* update il and jl for row i */ 2486 il[i] = jmin; 2487 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2488 } 2489 i = nexti; 2490 } 2491 2492 /* shift the diagonals when zero pivot is detected */ 2493 /* compute rs=sum of abs(off-diagonal) */ 2494 rs = 0.0; 2495 jmin = bi[k]+1; 2496 nz = bi[k+1] - jmin; 2497 bcol = bj + jmin; 2498 for (j=0; j<nz; j++) { 2499 rs += PetscAbsScalar(rtmp[bcol[j]]); 2500 } 2501 2502 sctx.rs = rs; 2503 sctx.pv = dk; 2504 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 2505 2506 if (newshift == 1) { 2507 if (!sctx.shift_amount) { 2508 sctx.shift_amount = 1e-5; 2509 } 2510 break; 2511 } 2512 2513 /* copy data into U(k,:) */ 2514 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2515 jmin = bi[k]+1; jmax = bi[k+1]; 2516 if (jmin < jmax) { 2517 for (j=jmin; j<jmax; j++){ 2518 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 2519 } 2520 /* add the k-th row into il and jl */ 2521 il[k] = jmin; 2522 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2523 } 2524 } 2525 } while (sctx.chshift); 2526 ierr = PetscFree(il);CHKERRQ(ierr); 2527 2528 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2529 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2530 2531 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2532 if (perm_identity){ 2533 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2534 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2535 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2536 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2537 } else { 2538 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 2539 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2540 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2541 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2542 } 2543 2544 C->assembled = PETSC_TRUE; 2545 C->preallocated = PETSC_TRUE; 2546 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2547 if (sctx.nshift){ 2548 if (shiftnz) { 2549 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2550 } else if (shiftpd) { 2551 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2552 } 2553 } 2554 PetscFunctionReturn(0); 2555 } 2556 2557 #undef __FUNCT__ 2558 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_newdatastruct" 2559 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_newdatastruct(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2560 { 2561 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2562 Mat_SeqSBAIJ *b; 2563 PetscErrorCode ierr; 2564 PetscTruth perm_identity,missing; 2565 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2566 const PetscInt *rip,*riip; 2567 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2568 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2569 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2570 PetscReal fill=info->fill,levels=info->levels; 2571 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2572 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2573 PetscBT lnkbt; 2574 IS iperm; 2575 2576 PetscFunctionBegin; 2577 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2578 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2579 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2580 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2581 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2582 2583 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2584 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2585 ui[0] = 0; 2586 2587 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2588 if (!levels && perm_identity) { 2589 2590 for (i=0; i<am; i++) { 2591 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2592 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2593 } 2594 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2595 cols = uj; 2596 for (i=0; i<am; i++) { 2597 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2598 ncols = ui[i+1] - ui[i] - 1; 2599 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2600 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2601 } 2602 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2603 2604 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2605 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2606 2607 /* initialization */ 2608 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2609 2610 /* jl: linked list for storing indices of the pivot rows 2611 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2612 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 2613 il = jl + am; 2614 uj_ptr = (PetscInt**)(il + am); 2615 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 2616 for (i=0; i<am; i++){ 2617 jl[i] = am; il[i] = 0; 2618 } 2619 2620 /* create and initialize a linked list for storing column indices of the active row k */ 2621 nlnk = am + 1; 2622 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2623 2624 /* initial FreeSpace size is fill*(ai[am]+1) */ 2625 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2626 current_space = free_space; 2627 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2628 current_space_lvl = free_space_lvl; 2629 2630 for (k=0; k<am; k++){ /* for each active row k */ 2631 /* initialize lnk by the column indices of row rip[k] of A */ 2632 nzk = 0; 2633 ncols = ai[rip[k]+1] - ai[rip[k]]; 2634 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2635 ncols_upper = 0; 2636 for (j=0; j<ncols; j++){ 2637 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2638 if (riip[i] >= k){ /* only take upper triangular entry */ 2639 ajtmp[ncols_upper] = i; 2640 ncols_upper++; 2641 } 2642 } 2643 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2644 nzk += nlnk; 2645 2646 /* update lnk by computing fill-in for each pivot row to be merged in */ 2647 prow = jl[k]; /* 1st pivot row */ 2648 2649 while (prow < k){ 2650 nextprow = jl[prow]; 2651 2652 /* merge prow into k-th row */ 2653 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2654 jmax = ui[prow+1]; 2655 ncols = jmax-jmin; 2656 i = jmin - ui[prow]; 2657 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2658 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2659 j = *(uj - 1); 2660 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2661 nzk += nlnk; 2662 2663 /* update il and jl for prow */ 2664 if (jmin < jmax){ 2665 il[prow] = jmin; 2666 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2667 } 2668 prow = nextprow; 2669 } 2670 2671 /* if free space is not available, make more free space */ 2672 if (current_space->local_remaining<nzk) { 2673 i = am - k + 1; /* num of unfactored rows */ 2674 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2675 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2676 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2677 reallocs++; 2678 } 2679 2680 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2681 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2682 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2683 2684 /* add the k-th row into il and jl */ 2685 if (nzk > 1){ 2686 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2687 jl[k] = jl[i]; jl[i] = k; 2688 il[k] = ui[k] + 1; 2689 } 2690 uj_ptr[k] = current_space->array; 2691 uj_lvl_ptr[k] = current_space_lvl->array; 2692 2693 current_space->array += nzk; 2694 current_space->local_used += nzk; 2695 current_space->local_remaining -= nzk; 2696 2697 current_space_lvl->array += nzk; 2698 current_space_lvl->local_used += nzk; 2699 current_space_lvl->local_remaining -= nzk; 2700 2701 ui[k+1] = ui[k] + nzk; 2702 } 2703 2704 #if defined(PETSC_USE_INFO) 2705 if (ai[am] != 0) { 2706 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2707 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2708 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2709 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2710 } else { 2711 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2712 } 2713 #endif 2714 2715 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2716 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2717 ierr = PetscFree(jl);CHKERRQ(ierr); 2718 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2719 2720 /* destroy list of free space and other temporary array(s) */ 2721 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2722 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); 2723 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2724 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2725 2726 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2727 2728 /* put together the new matrix in MATSEQSBAIJ format */ 2729 2730 b = (Mat_SeqSBAIJ*)(fact)->data; 2731 b->singlemalloc = PETSC_FALSE; 2732 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2733 b->j = uj; 2734 b->i = ui; 2735 b->diag = udiag; 2736 b->free_diag = PETSC_TRUE; 2737 b->ilen = 0; 2738 b->imax = 0; 2739 b->row = perm; 2740 b->col = perm; 2741 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2742 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2743 b->icol = iperm; 2744 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2745 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2746 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2747 b->maxnz = b->nz = ui[am]; 2748 b->free_a = PETSC_TRUE; 2749 b->free_ij = PETSC_TRUE; 2750 2751 (fact)->info.factor_mallocs = reallocs; 2752 (fact)->info.fill_ratio_given = fill; 2753 if (ai[am] != 0) { 2754 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2755 } else { 2756 (fact)->info.fill_ratio_needed = 0.0; 2757 } 2758 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_newdatastruct; 2759 PetscFunctionReturn(0); 2760 } 2761 2762 #undef __FUNCT__ 2763 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2764 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2765 { 2766 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2767 Mat_SeqSBAIJ *b; 2768 PetscErrorCode ierr; 2769 PetscTruth perm_identity,missing; 2770 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2771 const PetscInt *rip,*riip; 2772 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2773 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2774 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2775 PetscReal fill=info->fill,levels=info->levels; 2776 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2777 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2778 PetscBT lnkbt; 2779 IS iperm; 2780 PetscTruth newdatastruct=PETSC_FALSE; 2781 2782 PetscFunctionBegin; 2783 ierr = PetscOptionsGetTruth(PETSC_NULL,"-icc_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 2784 if(newdatastruct){ 2785 ierr = MatICCFactorSymbolic_SeqAIJ_newdatastruct(fact,A,perm,info);CHKERRQ(ierr); 2786 PetscFunctionReturn(0); 2787 } 2788 2789 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2790 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2791 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2792 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2793 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2794 2795 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2796 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2797 ui[0] = 0; 2798 2799 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2800 if (!levels && perm_identity) { 2801 2802 for (i=0; i<am; i++) { 2803 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2804 udiag[i] = ui[i]; 2805 } 2806 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2807 cols = uj; 2808 for (i=0; i<am; i++) { 2809 aj = a->j + a->diag[i]; 2810 ncols = ui[i+1] - ui[i]; 2811 for (j=0; j<ncols; j++) *cols++ = *aj++; 2812 } 2813 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2814 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2815 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2816 2817 /* initialization */ 2818 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2819 2820 /* jl: linked list for storing indices of the pivot rows 2821 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2822 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 2823 il = jl + am; 2824 uj_ptr = (PetscInt**)(il + am); 2825 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 2826 for (i=0; i<am; i++){ 2827 jl[i] = am; il[i] = 0; 2828 } 2829 2830 /* create and initialize a linked list for storing column indices of the active row k */ 2831 nlnk = am + 1; 2832 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2833 2834 /* initial FreeSpace size is fill*(ai[am]+1) */ 2835 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2836 current_space = free_space; 2837 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2838 current_space_lvl = free_space_lvl; 2839 2840 for (k=0; k<am; k++){ /* for each active row k */ 2841 /* initialize lnk by the column indices of row rip[k] of A */ 2842 nzk = 0; 2843 ncols = ai[rip[k]+1] - ai[rip[k]]; 2844 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2845 ncols_upper = 0; 2846 for (j=0; j<ncols; j++){ 2847 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2848 if (riip[i] >= k){ /* only take upper triangular entry */ 2849 ajtmp[ncols_upper] = i; 2850 ncols_upper++; 2851 } 2852 } 2853 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2854 nzk += nlnk; 2855 2856 /* update lnk by computing fill-in for each pivot row to be merged in */ 2857 prow = jl[k]; /* 1st pivot row */ 2858 2859 while (prow < k){ 2860 nextprow = jl[prow]; 2861 2862 /* merge prow into k-th row */ 2863 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2864 jmax = ui[prow+1]; 2865 ncols = jmax-jmin; 2866 i = jmin - ui[prow]; 2867 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2868 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2869 j = *(uj - 1); 2870 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2871 nzk += nlnk; 2872 2873 /* update il and jl for prow */ 2874 if (jmin < jmax){ 2875 il[prow] = jmin; 2876 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2877 } 2878 prow = nextprow; 2879 } 2880 2881 /* if free space is not available, make more free space */ 2882 if (current_space->local_remaining<nzk) { 2883 i = am - k + 1; /* num of unfactored rows */ 2884 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2885 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2886 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2887 reallocs++; 2888 } 2889 2890 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2891 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2892 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2893 2894 /* add the k-th row into il and jl */ 2895 if (nzk > 1){ 2896 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2897 jl[k] = jl[i]; jl[i] = k; 2898 il[k] = ui[k] + 1; 2899 } 2900 uj_ptr[k] = current_space->array; 2901 uj_lvl_ptr[k] = current_space_lvl->array; 2902 2903 current_space->array += nzk; 2904 current_space->local_used += nzk; 2905 current_space->local_remaining -= nzk; 2906 2907 current_space_lvl->array += nzk; 2908 current_space_lvl->local_used += nzk; 2909 current_space_lvl->local_remaining -= nzk; 2910 2911 ui[k+1] = ui[k] + nzk; 2912 } 2913 2914 #if defined(PETSC_USE_INFO) 2915 if (ai[am] != 0) { 2916 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2917 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2918 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2919 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2920 } else { 2921 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2922 } 2923 #endif 2924 2925 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2926 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2927 ierr = PetscFree(jl);CHKERRQ(ierr); 2928 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2929 2930 /* destroy list of free space and other temporary array(s) */ 2931 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2932 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2933 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2934 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2935 2936 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2937 2938 /* put together the new matrix in MATSEQSBAIJ format */ 2939 2940 b = (Mat_SeqSBAIJ*)(fact)->data; 2941 b->singlemalloc = PETSC_FALSE; 2942 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2943 b->j = uj; 2944 b->i = ui; 2945 b->diag = udiag; 2946 b->free_diag = PETSC_TRUE; 2947 b->ilen = 0; 2948 b->imax = 0; 2949 b->row = perm; 2950 b->col = perm; 2951 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2952 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2953 b->icol = iperm; 2954 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2955 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2956 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2957 b->maxnz = b->nz = ui[am]; 2958 b->free_a = PETSC_TRUE; 2959 b->free_ij = PETSC_TRUE; 2960 2961 (fact)->info.factor_mallocs = reallocs; 2962 (fact)->info.fill_ratio_given = fill; 2963 if (ai[am] != 0) { 2964 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2965 } else { 2966 (fact)->info.fill_ratio_needed = 0.0; 2967 } 2968 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2969 PetscFunctionReturn(0); 2970 } 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2974 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2975 { 2976 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2977 Mat_SeqSBAIJ *b; 2978 PetscErrorCode ierr; 2979 PetscTruth perm_identity; 2980 PetscReal fill = info->fill; 2981 const PetscInt *rip,*riip; 2982 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2983 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2984 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2985 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2986 PetscBT lnkbt; 2987 IS iperm; 2988 2989 PetscFunctionBegin; 2990 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2991 /* check whether perm is the identity mapping */ 2992 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2993 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2994 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2995 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2996 2997 /* initialization */ 2998 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2999 ui[0] = 0; 3000 3001 /* jl: linked list for storing indices of the pivot rows 3002 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3003 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 3004 il = jl + am; 3005 cols = il + am; 3006 ui_ptr = (PetscInt**)(cols + am); 3007 for (i=0; i<am; i++){ 3008 jl[i] = am; il[i] = 0; 3009 } 3010 3011 /* create and initialize a linked list for storing column indices of the active row k */ 3012 nlnk = am + 1; 3013 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3014 3015 /* initial FreeSpace size is fill*(ai[am]+1) */ 3016 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3017 current_space = free_space; 3018 3019 for (k=0; k<am; k++){ /* for each active row k */ 3020 /* initialize lnk by the column indices of row rip[k] of A */ 3021 nzk = 0; 3022 ncols = ai[rip[k]+1] - ai[rip[k]]; 3023 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3024 ncols_upper = 0; 3025 for (j=0; j<ncols; j++){ 3026 i = riip[*(aj + ai[rip[k]] + j)]; 3027 if (i >= k){ /* only take upper triangular entry */ 3028 cols[ncols_upper] = i; 3029 ncols_upper++; 3030 } 3031 } 3032 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3033 nzk += nlnk; 3034 3035 /* update lnk by computing fill-in for each pivot row to be merged in */ 3036 prow = jl[k]; /* 1st pivot row */ 3037 3038 while (prow < k){ 3039 nextprow = jl[prow]; 3040 /* merge prow into k-th row */ 3041 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3042 jmax = ui[prow+1]; 3043 ncols = jmax-jmin; 3044 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3045 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3046 nzk += nlnk; 3047 3048 /* update il and jl for prow */ 3049 if (jmin < jmax){ 3050 il[prow] = jmin; 3051 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3052 } 3053 prow = nextprow; 3054 } 3055 3056 /* if free space is not available, make more free space */ 3057 if (current_space->local_remaining<nzk) { 3058 i = am - k + 1; /* num of unfactored rows */ 3059 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3060 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3061 reallocs++; 3062 } 3063 3064 /* copy data into free space, then initialize lnk */ 3065 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3066 3067 /* add the k-th row into il and jl */ 3068 if (nzk-1 > 0){ 3069 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3070 jl[k] = jl[i]; jl[i] = k; 3071 il[k] = ui[k] + 1; 3072 } 3073 ui_ptr[k] = current_space->array; 3074 current_space->array += nzk; 3075 current_space->local_used += nzk; 3076 current_space->local_remaining -= nzk; 3077 3078 ui[k+1] = ui[k] + nzk; 3079 } 3080 3081 #if defined(PETSC_USE_INFO) 3082 if (ai[am] != 0) { 3083 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3084 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3085 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3086 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3087 } else { 3088 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3089 } 3090 #endif 3091 3092 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3093 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3094 ierr = PetscFree(jl);CHKERRQ(ierr); 3095 3096 /* destroy list of free space and other temporary array(s) */ 3097 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3098 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3099 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3100 3101 /* put together the new matrix in MATSEQSBAIJ format */ 3102 3103 b = (Mat_SeqSBAIJ*)(fact)->data; 3104 b->singlemalloc = PETSC_FALSE; 3105 b->free_a = PETSC_TRUE; 3106 b->free_ij = PETSC_TRUE; 3107 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3108 b->j = uj; 3109 b->i = ui; 3110 b->diag = 0; 3111 b->ilen = 0; 3112 b->imax = 0; 3113 b->row = perm; 3114 b->col = perm; 3115 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3116 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3117 b->icol = iperm; 3118 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3119 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3120 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3121 b->maxnz = b->nz = ui[am]; 3122 3123 (fact)->info.factor_mallocs = reallocs; 3124 (fact)->info.fill_ratio_given = fill; 3125 if (ai[am] != 0) { 3126 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3127 } else { 3128 (fact)->info.fill_ratio_needed = 0.0; 3129 } 3130 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 3131 PetscFunctionReturn(0); 3132 } 3133 3134 #undef __FUNCT__ 3135 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_newdatastruct" 3136 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct(Mat A,Vec bb,Vec xx) 3137 { 3138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3139 PetscErrorCode ierr; 3140 PetscInt n = A->rmap->n; 3141 const PetscInt *ai = a->i,*aj = a->j,*vi; 3142 PetscScalar *x,sum; 3143 const PetscScalar *b; 3144 const MatScalar *aa = a->a,*v; 3145 PetscInt i,nz; 3146 3147 PetscFunctionBegin; 3148 if (!n) PetscFunctionReturn(0); 3149 3150 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 3151 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3152 3153 /* forward solve the lower triangular */ 3154 x[0] = b[0]; 3155 v = aa; 3156 vi = aj; 3157 for (i=1; i<n; i++) { 3158 nz = ai[i+1] - ai[i]; 3159 sum = b[i]; 3160 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3161 v += nz; 3162 vi += nz; 3163 x[i] = sum; 3164 } 3165 3166 /* backward solve the upper triangular */ 3167 v = aa + ai[n+1]; 3168 vi = aj + ai[n+1]; 3169 for (i=n-1; i>=0; i--){ 3170 nz = ai[2*n-i +1] - ai[2*n-i]-1; 3171 sum = x[i]; 3172 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3173 v += nz; 3174 vi += nz; vi++; 3175 x[i] = *v++ *sum; /* x[i]=aa[adiag[i]]*sum; v++; */ 3176 } 3177 3178 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3179 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 3180 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3181 PetscFunctionReturn(0); 3182 } 3183 3184 #undef __FUNCT__ 3185 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_newdatastruct_v2" 3186 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct_v2(Mat A,Vec bb,Vec xx) 3187 { 3188 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3189 PetscErrorCode ierr; 3190 PetscInt n = A->rmap->n; 3191 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3192 PetscScalar *x,sum; 3193 const PetscScalar *b; 3194 const MatScalar *aa = a->a,*v; 3195 PetscInt i,nz; 3196 3197 PetscFunctionBegin; 3198 if (!n) PetscFunctionReturn(0); 3199 3200 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 3201 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3202 3203 /* forward solve the lower triangular */ 3204 x[0] = b[0]; 3205 v = aa; 3206 vi = aj; 3207 for (i=1; i<n; i++) { 3208 nz = ai[i+1] - ai[i]; 3209 sum = b[i]; 3210 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3211 v += nz; 3212 vi += nz; 3213 x[i] = sum; 3214 } 3215 3216 /* backward solve the upper triangular */ 3217 /* v = aa + ai[n+1]; 3218 vi = aj + ai[n+1]; */ 3219 v = aa + adiag[n-1]; 3220 vi = aj + adiag[n-1]; 3221 x[n-1] = *v++ *x[n-1]; 3222 vi++; 3223 for (i=n-2; i>=0; i--){ 3224 nz = adiag[i] - adiag[i+1]-1; 3225 sum = x[i]; 3226 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3227 v += nz; 3228 vi += nz; vi++; 3229 x[i] = *v++ *sum; /* x[i]=aa[adiag[i]]*sum; v++; */ 3230 } 3231 3232 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3233 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 3234 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3235 PetscFunctionReturn(0); 3236 } 3237 3238 #undef __FUNCT__ 3239 #define __FUNCT__ "MatSolve_SeqAIJ_newdatastruct" 3240 PetscErrorCode MatSolve_SeqAIJ_newdatastruct(Mat A,Vec bb,Vec xx) 3241 { 3242 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3243 IS iscol = a->col,isrow = a->row; 3244 PetscErrorCode ierr; 3245 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,nz,k; 3246 const PetscInt *rout,*cout,*r,*c; 3247 PetscScalar *x,*tmp,*tmps,sum; 3248 const PetscScalar *b; 3249 const MatScalar *aa = a->a,*v; 3250 3251 PetscFunctionBegin; 3252 if (!n) PetscFunctionReturn(0); 3253 3254 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 3255 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3256 tmp = a->solve_work; 3257 3258 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3259 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3260 3261 /* forward solve the lower triangular */ 3262 tmp[0] = b[*r++]; 3263 tmps = tmp; 3264 v = aa; 3265 vi = aj; 3266 for (i=1; i<n; i++) { 3267 nz = ai[i+1] - ai[i]; 3268 sum = b[*r++]; 3269 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 3270 tmp[i] = sum; 3271 v += nz; vi += nz; 3272 } 3273 3274 /* backward solve the upper triangular */ 3275 k = n+1; 3276 v = aa + ai[k]; /* 1st entry of U(n-1,:) */ 3277 vi = aj + ai[k]; 3278 for (i=n-1; i>=0; i--){ 3279 k = 2*n-i; 3280 nz = ai[k +1] - ai[k] - 1; 3281 sum = tmp[i]; 3282 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 3283 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3284 v += nz+1; vi += nz+1; 3285 } 3286 3287 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3288 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3289 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 3290 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3291 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3292 PetscFunctionReturn(0); 3293 } 3294 3295 #undef __FUNCT__ 3296 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3297 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3298 { 3299 Mat B = *fact; 3300 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3301 IS isicol; 3302 PetscErrorCode ierr; 3303 const PetscInt *r,*ic; 3304 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3305 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3306 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3307 PetscInt nlnk,*lnk; 3308 PetscBT lnkbt; 3309 PetscTruth row_identity,icol_identity,both_identity; 3310 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3311 const PetscInt *ics; 3312 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3313 PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftinblocks; 3314 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3315 PetscTruth missing; 3316 3317 PetscFunctionBegin; 3318 3319 if (dt == PETSC_DEFAULT) dt = 0.005; 3320 if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */ 3321 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3322 3323 /* ------- symbolic factorization, can be reused ---------*/ 3324 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3325 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3326 adiag=a->diag; 3327 3328 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3329 3330 /* bdiag is location of diagonal in factor */ 3331 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 3332 bdiag_rev = bdiag + n+1; 3333 3334 /* allocate row pointers bi */ 3335 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3336 3337 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3338 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3339 nnz_max = ai[n]+2*n*dtcount+2; 3340 3341 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3342 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3343 3344 /* put together the new matrix */ 3345 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3346 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3347 b = (Mat_SeqAIJ*)(B)->data; 3348 b->free_a = PETSC_TRUE; 3349 b->free_ij = PETSC_TRUE; 3350 b->singlemalloc = PETSC_FALSE; 3351 b->a = ba; 3352 b->j = bj; 3353 b->i = bi; 3354 b->diag = bdiag; 3355 b->ilen = 0; 3356 b->imax = 0; 3357 b->row = isrow; 3358 b->col = iscol; 3359 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3360 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3361 b->icol = isicol; 3362 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3363 3364 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3365 b->maxnz = nnz_max; 3366 3367 (B)->factor = MAT_FACTOR_ILUDT; 3368 (B)->info.factor_mallocs = 0; 3369 (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3370 CHKMEMQ; 3371 /* ------- end of symbolic factorization ---------*/ 3372 3373 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3374 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3375 ics = ic; 3376 3377 /* linked list for storing column indices of the active row */ 3378 nlnk = n + 1; 3379 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3380 3381 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3382 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 3383 jtmp = im + n; 3384 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3385 ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3386 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 3387 vtmp = rtmp + n; 3388 3389 bi[0] = 0; 3390 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3391 bdiag_rev[n] = bdiag[0]; 3392 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3393 for (i=0; i<n; i++) { 3394 /* copy initial fill into linked list */ 3395 nzi = 0; /* nonzeros for active row i */ 3396 nzi = ai[r[i]+1] - ai[r[i]]; 3397 if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3398 nzi_al = adiag[r[i]] - ai[r[i]]; 3399 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3400 ajtmp = aj + ai[r[i]]; 3401 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3402 3403 /* load in initial (unfactored row) */ 3404 aatmp = a->a + ai[r[i]]; 3405 for (j=0; j<nzi; j++) { 3406 rtmp[ics[*ajtmp++]] = *aatmp++; 3407 } 3408 3409 /* add pivot rows into linked list */ 3410 row = lnk[n]; 3411 while (row < i ) { 3412 nzi_bl = bi[row+1] - bi[row] + 1; 3413 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3414 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3415 nzi += nlnk; 3416 row = lnk[row]; 3417 } 3418 3419 /* copy data from lnk into jtmp, then initialize lnk */ 3420 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3421 3422 /* numerical factorization */ 3423 bjtmp = jtmp; 3424 row = *bjtmp++; /* 1st pivot row */ 3425 while ( row < i ) { 3426 pc = rtmp + row; 3427 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3428 multiplier = (*pc) * (*pv); 3429 *pc = multiplier; 3430 if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 3431 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3432 pv = ba + bdiag[row+1] + 1; 3433 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3434 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3435 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3436 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 3437 } 3438 row = *bjtmp++; 3439 } 3440 3441 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3442 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3443 nzi_bl = 0; j = 0; 3444 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 3445 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3446 nzi_bl++; j++; 3447 } 3448 nzi_bu = nzi - nzi_bl -1; 3449 while (j < nzi){ 3450 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3451 j++; 3452 } 3453 3454 bjtmp = bj + bi[i]; 3455 batmp = ba + bi[i]; 3456 /* apply level dropping rule to L part */ 3457 ncut = nzi_al + dtcount; 3458 if (ncut < nzi_bl){ 3459 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3460 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3461 } else { 3462 ncut = nzi_bl; 3463 } 3464 for (j=0; j<ncut; j++){ 3465 bjtmp[j] = jtmp[j]; 3466 batmp[j] = vtmp[j]; 3467 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3468 } 3469 bi[i+1] = bi[i] + ncut; 3470 nzi = ncut + 1; 3471 3472 /* apply level dropping rule to U part */ 3473 ncut = nzi_au + dtcount; 3474 if (ncut < nzi_bu){ 3475 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3476 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3477 } else { 3478 ncut = nzi_bu; 3479 } 3480 nzi += ncut; 3481 3482 /* mark bdiagonal */ 3483 bdiag[i+1] = bdiag[i] - (ncut + 1); 3484 bdiag_rev[n-i-1] = bdiag[i+1]; 3485 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3486 bjtmp = bj + bdiag[i]; 3487 batmp = ba + bdiag[i]; 3488 *bjtmp = i; 3489 *batmp = diag_tmp; /* rtmp[i]; */ 3490 if (*batmp == 0.0) { 3491 *batmp = dt+shift; 3492 /* printf(" row %d add shift %g\n",i,shift); */ 3493 } 3494 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3495 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3496 3497 bjtmp = bj + bdiag[i+1]+1; 3498 batmp = ba + bdiag[i+1]+1; 3499 for (k=0; k<ncut; k++){ 3500 bjtmp[k] = jtmp[nzi_bl+1+k]; 3501 batmp[k] = vtmp[nzi_bl+1+k]; 3502 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3503 } 3504 /* printf("\n"); */ 3505 3506 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3507 /* 3508 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3509 printf(" ----------------------------\n"); 3510 */ 3511 } /* for (i=0; i<n; i++) */ 3512 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3513 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3514 3515 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3516 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3517 3518 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3519 ierr = PetscFree(im);CHKERRQ(ierr); 3520 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3521 3522 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3523 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3524 3525 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3526 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3527 both_identity = (PetscTruth) (row_identity && icol_identity); 3528 if (row_identity && icol_identity) { 3529 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 3530 } else { 3531 B->ops->solve = MatSolve_SeqAIJ_newdatastruct; 3532 } 3533 3534 B->ops->lufactorsymbolic = MatILUDTFactorSymbolic_SeqAIJ; 3535 B->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 3536 B->ops->solveadd = 0; 3537 B->ops->solvetranspose = 0; 3538 B->ops->solvetransposeadd = 0; 3539 B->ops->matsolve = 0; 3540 B->assembled = PETSC_TRUE; 3541 B->preallocated = PETSC_TRUE; 3542 PetscFunctionReturn(0); 3543 } 3544 3545 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3546 #undef __FUNCT__ 3547 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3548 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3549 { 3550 PetscErrorCode ierr; 3551 3552 PetscFunctionBegin; 3553 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3554 3555 fact->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 3556 PetscFunctionReturn(0); 3557 } 3558 3559 /* 3560 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3561 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3562 */ 3563 #undef __FUNCT__ 3564 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3565 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3566 { 3567 Mat C=fact; 3568 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 3569 IS isrow = b->row,isicol = b->icol; 3570 PetscErrorCode ierr; 3571 const PetscInt *r,*ic,*ics; 3572 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3573 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3574 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3575 PetscReal dt=info->dt,shift=info->shiftinblocks; 3576 PetscTruth row_identity, col_identity; 3577 3578 PetscFunctionBegin; 3579 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3580 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3581 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3582 ics = ic; 3583 3584 for (i=0; i<n; i++){ 3585 /* initialize rtmp array */ 3586 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3587 bjtmp = bj + bi[i]; 3588 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3589 rtmp[i] = 0.0; 3590 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3591 bjtmp = bj + bdiag[i+1] + 1; 3592 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3593 3594 /* load in initial unfactored row of A */ 3595 /* printf("row %d\n",i); */ 3596 nz = ai[r[i]+1] - ai[r[i]]; 3597 ajtmp = aj + ai[r[i]]; 3598 v = aa + ai[r[i]]; 3599 for (j=0; j<nz; j++) { 3600 rtmp[ics[*ajtmp++]] = v[j]; 3601 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3602 } 3603 /* printf("\n"); */ 3604 3605 /* numerical factorization */ 3606 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3607 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3608 k = 0; 3609 while (k < nzl){ 3610 row = *bjtmp++; 3611 /* printf(" prow %d\n",row); */ 3612 pc = rtmp + row; 3613 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3614 multiplier = (*pc) * (*pv); 3615 *pc = multiplier; 3616 if (PetscAbsScalar(multiplier) > dt){ 3617 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3618 pv = b->a + bdiag[row+1] + 1; 3619 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3620 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3621 /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */ 3622 } 3623 k++; 3624 } 3625 3626 /* finished row so stick it into b->a */ 3627 /* L-part */ 3628 pv = b->a + bi[i] ; 3629 pj = bj + bi[i] ; 3630 nzl = bi[i+1] - bi[i]; 3631 for (j=0; j<nzl; j++) { 3632 pv[j] = rtmp[pj[j]]; 3633 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3634 } 3635 3636 /* diagonal: invert diagonal entries for simplier triangular solves */ 3637 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3638 b->a[bdiag[i]] = 1.0/rtmp[i]; 3639 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3640 3641 /* U-part */ 3642 pv = b->a + bdiag[i+1] + 1; 3643 pj = bj + bdiag[i+1] + 1; 3644 nzu = bdiag[i] - bdiag[i+1] - 1; 3645 for (j=0; j<nzu; j++) { 3646 pv[j] = rtmp[pj[j]]; 3647 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3648 } 3649 /* printf("\n"); */ 3650 } 3651 3652 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3653 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3654 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3655 3656 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3657 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3658 if (row_identity && col_identity) { 3659 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 3660 } else { 3661 C->ops->solve = MatSolve_SeqAIJ_newdatastruct; 3662 } 3663 C->ops->solveadd = 0; 3664 C->ops->solvetranspose = 0; 3665 C->ops->solvetransposeadd = 0; 3666 C->ops->matsolve = 0; 3667 C->assembled = PETSC_TRUE; 3668 C->preallocated = PETSC_TRUE; 3669 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3670 PetscFunctionReturn(0); 3671 } 3672