1 #define PETSCMAT_DLL 2 3 4 #include "../src/mat/impls/aij/seq/aij.h" 5 #include "petscbt.h" 6 #include "../src/mat/utils/freespace.h" 7 8 EXTERN_C_BEGIN 9 #undef __FUNCT__ 10 #define __FUNCT__ "MatOrdering_Flow_SeqAIJ" 11 /* 12 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 13 */ 14 PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 15 { 16 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 17 PetscErrorCode ierr; 18 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 19 const PetscInt *ai = a->i, *aj = a->j; 20 const PetscScalar *aa = a->a; 21 PetscTruth *done; 22 PetscReal best,past = 0,future; 23 24 PetscFunctionBegin; 25 /* pick initial row */ 26 best = -1; 27 for (i=0; i<n; i++) { 28 future = 0; 29 for (j=ai[i]; j<ai[i+1]; j++) { 30 if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]); 31 } 32 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 33 if (past/future > best) { 34 best = past/future; 35 current = i; 36 } 37 } 38 39 ierr = PetscMalloc(n*sizeof(PetscTruth),&done);CHKERRQ(ierr); 40 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscTruth));CHKERRQ(ierr); 42 order[0] = current; 43 for (i=0; i<n-1; i++) { 44 done[current] = PETSC_TRUE; 45 best = -1; 46 /* loop over all neighbors of current pivot */ 47 for (j=ai[current]; j<ai[current+1]; j++) { 48 jj = aj[j]; 49 if (done[jj]) continue; 50 /* loop over columns of potential next row computing weights for below and above diagonal */ 51 past = future = 0.0; 52 for (k=ai[jj]; k<ai[jj+1]; k++) { 53 kk = aj[k]; 54 if (done[kk]) past += PetscAbsScalar(aa[k]); 55 else if (kk != jj) future += PetscAbsScalar(aa[k]); 56 } 57 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 58 if (past/future > best) { 59 best = past/future; 60 newcurrent = jj; 61 } 62 } 63 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 64 best = -1; 65 for (k=0; k<n; k++) { 66 if (done[k]) continue; 67 future = 0; 68 past = 0; 69 for (j=ai[k]; j<ai[k+1]; j++) { 70 kk = aj[j]; 71 if (done[kk]) past += PetscAbsScalar(aa[j]); 72 else if (kk != k) future += PetscAbsScalar(aa[j]); 73 } 74 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 75 if (past/future > best) { 76 best = past/future; 77 newcurrent = k; 78 } 79 } 80 } 81 if (current == newcurrent) SETERRQ(PETSC_ERR_PLIB,"newcurrent cannot be current"); 82 current = newcurrent; 83 order[i+1] = current; 84 } 85 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,irow);CHKERRQ(ierr); 86 *icol = *irow; 87 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 88 ierr = PetscFree(done);CHKERRQ(ierr); 89 ierr = PetscFree(order);CHKERRQ(ierr); 90 PetscFunctionReturn(0); 91 } 92 EXTERN_C_END 93 94 EXTERN_C_BEGIN 95 #undef __FUNCT__ 96 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 97 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 98 { 99 PetscFunctionBegin; 100 *flg = PETSC_TRUE; 101 PetscFunctionReturn(0); 102 } 103 EXTERN_C_END 104 105 EXTERN_C_BEGIN 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 108 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 109 { 110 PetscInt n = A->rmap->n; 111 PetscErrorCode ierr; 112 113 PetscFunctionBegin; 114 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 115 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 116 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 117 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 118 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 119 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 120 (*B)->ops->iludtfactor = MatILUDTFactor_SeqAIJ; 121 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 122 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 123 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 124 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 125 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 126 } else SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 127 (*B)->factor = ftype; 128 PetscFunctionReturn(0); 129 } 130 EXTERN_C_END 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 134 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 135 { 136 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 137 IS isicol; 138 PetscErrorCode ierr; 139 const PetscInt *r,*ic; 140 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 141 PetscInt *bi,*bj,*ajtmp; 142 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 143 PetscReal f; 144 PetscInt nlnk,*lnk,k,**bi_ptr; 145 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 146 PetscBT lnkbt; 147 PetscTruth newdatastruct=PETSC_FALSE; 148 149 PetscFunctionBegin; 150 ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 151 if(newdatastruct){ 152 ierr = MatLUFactorSymbolic_SeqAIJ_newdatastruct(B,A,isrow,iscol,info);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 157 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 158 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 159 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 160 161 /* get new row pointers */ 162 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 163 bi[0] = 0; 164 165 /* bdiag is location of diagonal in factor */ 166 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 167 bdiag[0] = 0; 168 169 /* linked list for storing column indices of the active row */ 170 nlnk = n + 1; 171 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 172 173 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 174 175 /* initial FreeSpace size is f*(ai[n]+1) */ 176 f = info->fill; 177 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 178 current_space = free_space; 179 180 for (i=0; i<n; i++) { 181 /* copy previous fill into linked list */ 182 nzi = 0; 183 nnz = ai[r[i]+1] - ai[r[i]]; 184 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 185 ajtmp = aj + ai[r[i]]; 186 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 187 nzi += nlnk; 188 189 /* add pivot rows into linked list */ 190 row = lnk[n]; 191 while (row < i) { 192 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 193 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 194 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 195 nzi += nlnk; 196 row = lnk[row]; 197 } 198 bi[i+1] = bi[i] + nzi; 199 im[i] = nzi; 200 201 /* mark bdiag */ 202 nzbd = 0; 203 nnz = nzi; 204 k = lnk[n]; 205 while (nnz-- && k < i){ 206 nzbd++; 207 k = lnk[k]; 208 } 209 bdiag[i] = bi[i] + nzbd; 210 211 /* if free space is not available, make more free space */ 212 if (current_space->local_remaining<nzi) { 213 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 214 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 215 reallocs++; 216 } 217 218 /* copy data into free space, then initialize lnk */ 219 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 220 bi_ptr[i] = current_space->array; 221 current_space->array += nzi; 222 current_space->local_used += nzi; 223 current_space->local_remaining -= nzi; 224 } 225 #if defined(PETSC_USE_INFO) 226 if (ai[n] != 0) { 227 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 228 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 229 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 230 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 231 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 232 } else { 233 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 234 } 235 #endif 236 237 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 238 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 239 240 /* destroy list of free space and other temporary array(s) */ 241 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 242 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 243 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 244 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 245 246 /* put together the new matrix */ 247 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 248 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 249 b = (Mat_SeqAIJ*)(B)->data; 250 b->free_a = PETSC_TRUE; 251 b->free_ij = PETSC_TRUE; 252 b->singlemalloc = PETSC_FALSE; 253 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 254 b->j = bj; 255 b->i = bi; 256 b->diag = bdiag; 257 b->ilen = 0; 258 b->imax = 0; 259 b->row = isrow; 260 b->col = iscol; 261 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 262 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 263 b->icol = isicol; 264 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 265 266 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 267 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 268 b->maxnz = b->nz = bi[n] ; 269 270 (B)->factor = MAT_FACTOR_LU; 271 (B)->info.factor_mallocs = reallocs; 272 (B)->info.fill_ratio_given = f; 273 274 if (ai[n]) { 275 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 276 } else { 277 (B)->info.fill_ratio_needed = 0.0; 278 } 279 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 280 (B)->ops->solve = MatSolve_SeqAIJ; 281 (B)->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 282 /* switch to inodes if appropriate */ 283 ierr = MatLUFactorSymbolic_Inode(B,A,isrow,iscol,info);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #undef __FUNCT__ 288 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_newdatastruct" 289 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_newdatastruct(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 290 { 291 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 292 IS isicol; 293 PetscErrorCode ierr; 294 const PetscInt *r,*ic; 295 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 296 PetscInt *bi,*bj,*ajtmp; 297 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 298 PetscReal f; 299 PetscInt nlnk,*lnk,k,**bi_ptr; 300 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 301 PetscBT lnkbt; 302 303 PetscFunctionBegin; 304 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square"); 305 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 306 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 307 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 308 309 /* get new row pointers */ 310 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 311 bi[0] = 0; 312 313 /* bdiag is location of diagonal in factor */ 314 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 315 bdiag[0] = 0; 316 317 /* linked list for storing column indices of the active row */ 318 nlnk = n + 1; 319 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 320 321 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 322 323 /* initial FreeSpace size is f*(ai[n]+1) */ 324 f = info->fill; 325 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 326 current_space = free_space; 327 328 for (i=0; i<n; i++) { 329 /* copy previous fill into linked list */ 330 nzi = 0; 331 nnz = ai[r[i]+1] - ai[r[i]]; 332 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 333 ajtmp = aj + ai[r[i]]; 334 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 335 nzi += nlnk; 336 337 /* add pivot rows into linked list */ 338 row = lnk[n]; 339 while (row < i){ 340 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 341 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 342 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 343 nzi += nlnk; 344 row = lnk[row]; 345 } 346 bi[i+1] = bi[i] + nzi; 347 im[i] = nzi; 348 349 /* mark bdiag */ 350 nzbd = 0; 351 nnz = nzi; 352 k = lnk[n]; 353 while (nnz-- && k < i){ 354 nzbd++; 355 k = lnk[k]; 356 } 357 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_newdatastruct() */ 358 359 /* if free space is not available, make more free space */ 360 if (current_space->local_remaining<nzi) { 361 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 362 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 363 reallocs++; 364 } 365 366 /* copy data into free space, then initialize lnk */ 367 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 368 bi_ptr[i] = current_space->array; 369 current_space->array += nzi; 370 current_space->local_used += nzi; 371 current_space->local_remaining -= nzi; 372 } 373 #if defined(PETSC_USE_INFO) 374 if (ai[n] != 0) { 375 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 376 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 377 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 378 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 379 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 380 } else { 381 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 382 } 383 #endif 384 385 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 386 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 387 388 /* destroy list of free space and other temporary array(s) */ 389 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 390 ierr = PetscFreeSpaceContiguous_newdatastruct(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 391 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 392 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 393 394 /* put together the new matrix */ 395 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 396 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 397 b = (Mat_SeqAIJ*)(B)->data; 398 b->free_a = PETSC_TRUE; 399 b->free_ij = PETSC_TRUE; 400 b->singlemalloc = PETSC_FALSE; 401 ierr = PetscMalloc((bi[2*n+1])*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 402 b->j = bj; 403 b->i = bi; 404 b->diag = bdiag; 405 b->ilen = 0; 406 b->imax = 0; 407 b->row = isrow; 408 b->col = iscol; 409 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 410 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 411 b->icol = isicol; 412 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 413 414 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 415 ierr = PetscLogObjectMemory(B,bi[2*n+1]*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 416 b->maxnz = b->nz = bi[2*n+1] ; 417 418 (B)->factor = MAT_FACTOR_LU; 419 (B)->info.factor_mallocs = reallocs; 420 (B)->info.fill_ratio_given = f; 421 422 if (ai[n]) { 423 (B)->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]); 424 } else { 425 (B)->info.fill_ratio_needed = 0.0; 426 } 427 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 428 PetscFunctionReturn(0); 429 } 430 431 /* 432 Trouble in factorization, should we dump the original matrix? 433 */ 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatFactorDumpMatrix" 436 PetscErrorCode MatFactorDumpMatrix(Mat A) 437 { 438 PetscErrorCode ierr; 439 PetscTruth flg = PETSC_FALSE; 440 441 PetscFunctionBegin; 442 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 443 if (flg) { 444 PetscViewer viewer; 445 char filename[PETSC_MAX_PATH_LEN]; 446 447 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 448 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 449 ierr = MatView(A,viewer);CHKERRQ(ierr); 450 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 451 } 452 PetscFunctionReturn(0); 453 } 454 455 extern PetscErrorCode MatSolve_Inode(Mat,Vec,Vec); 456 457 /* ----------------------------------------------------------- */ 458 extern PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct(Mat,Vec,Vec); 459 extern PetscErrorCode MatSolve_SeqAIJ_newdatastruct(Mat,Vec,Vec); 460 461 #undef __FUNCT__ 462 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_newdatastruct" 463 PetscErrorCode MatLUFactorNumeric_SeqAIJ_newdatastruct(Mat B,Mat A,const MatFactorInfo *info) 464 { 465 Mat C=B; 466 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 467 IS isrow = b->row,isicol = b->icol; 468 PetscErrorCode ierr; 469 const PetscInt *r,*ic,*ics; 470 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 471 PetscInt *ajtmp,*bjtmp,nz,nzL,row,*bdiag=b->diag,*pj; 472 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 473 PetscReal shift=info->shiftinblocks; 474 PetscTruth row_identity, col_identity; 475 476 PetscFunctionBegin; 477 /* printf("MatLUFactorNumeric_SeqAIJ_newdatastruct is called ...\n"); */ 478 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 479 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 480 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 481 ics = ic; 482 483 for (i=0; i<n; i++){ 484 /* zero rtmp */ 485 /* L part */ 486 nz = bi[i+1] - bi[i]; 487 bjtmp = bj + bi[i]; 488 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 489 490 /* U part */ 491 nz = bi[2*n-i+1] - bi[2*n-i]; 492 bjtmp = bj + bi[2*n-i]; 493 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 494 495 /* load in initial (unfactored row) */ 496 nz = ai[r[i]+1] - ai[r[i]]; 497 ajtmp = aj + ai[r[i]]; 498 v = aa + ai[r[i]]; 499 for (j=0; j<nz; j++) { 500 rtmp[ics[ajtmp[j]]] = v[j]; 501 } 502 if (rtmp[ics[r[i]]] == 0.0){ 503 rtmp[ics[r[i]]] += shift; /* shift the diagonal of the matrix */ 504 /* printf("row %d, shift %g\n",i,shift); */ 505 } 506 507 /* elimination */ 508 bjtmp = bj + bi[i]; 509 row = *bjtmp++; 510 nzL = bi[i+1] - bi[i]; 511 k = 0; 512 while (k < nzL) { 513 pc = rtmp + row; 514 if (*pc != 0.0) { 515 pv = b->a + bdiag[row]; 516 multiplier = *pc * (*pv); 517 *pc = multiplier; 518 pj = b->j + bi[2*n-row]; /* begining of U(row,:) */ 519 pv = b->a + bi[2*n-row]; 520 nz = bi[2*n-row+1] - bi[2*n-row] - 1; /* num of entries in U(row,:), excluding diag */ 521 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 522 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 523 } 524 row = *bjtmp++; k++; 525 } 526 527 /* finished row so stick it into b->a */ 528 /* L part */ 529 pv = b->a + bi[i] ; 530 pj = b->j + bi[i] ; 531 nz = bi[i+1] - bi[i]; 532 for (j=0; j<nz; j++) { 533 pv[j] = rtmp[pj[j]]; 534 } 535 536 /* Mark diagonal and invert diagonal for simplier triangular solves */ 537 pv = b->a + bdiag[i]; 538 pj = b->j + bdiag[i]; 539 /* if (*pj != i)SETERRQ2(PETSC_ERR_SUP,"row %d != *pj %d",i,*pj) */ 540 *pv = 1.0/rtmp[*pj]; 541 542 /* U part */ 543 pv = b->a + bi[2*n-i]; 544 pj = b->j + bi[2*n-i]; 545 nz = bi[2*n-i+1] - bi[2*n-i] - 1; 546 for (j=0; j<nz; j++) pv[j] = rtmp[pj[j]]; 547 } 548 ierr = PetscFree(rtmp);CHKERRQ(ierr); 549 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 550 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 551 552 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 553 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 554 if (row_identity && col_identity) { 555 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 556 } else { 557 C->ops->solve = MatSolve_SeqAIJ_newdatastruct; 558 } 559 560 C->ops->solveadd = 0; 561 C->ops->solvetranspose = 0; 562 C->ops->solvetransposeadd = 0; 563 C->ops->matsolve = 0; 564 C->assembled = PETSC_TRUE; 565 C->preallocated = PETSC_TRUE; 566 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 567 PetscFunctionReturn(0); 568 } 569 570 #undef __FUNCT__ 571 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 572 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 573 { 574 Mat C=B; 575 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 576 IS isrow = b->row,isicol = b->icol; 577 PetscErrorCode ierr; 578 const PetscInt *r,*ic,*ics; 579 PetscInt nz,row,i,j,n=A->rmap->n,diag; 580 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 581 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 582 MatScalar *pv,*rtmp,*pc,multiplier,d; 583 const MatScalar *v,*aa=a->a; 584 PetscReal rs=0.0; 585 LUShift_Ctx sctx; 586 PetscInt newshift,*ddiag; 587 588 PetscFunctionBegin; 589 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 590 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 591 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 592 ics = ic; 593 594 sctx.shift_top = 0; 595 sctx.nshift_max = 0; 596 sctx.shift_lo = 0; 597 sctx.shift_hi = 0; 598 sctx.shift_fraction = 0; 599 600 /* if both shift schemes are chosen by user, only use info->shiftpd */ 601 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 602 ddiag = a->diag; 603 sctx.shift_top = info->zeropivot; 604 for (i=0; i<n; i++) { 605 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 606 d = (aa)[ddiag[i]]; 607 rs = -PetscAbsScalar(d) - PetscRealPart(d); 608 v = aa+ai[i]; 609 nz = ai[i+1] - ai[i]; 610 for (j=0; j<nz; j++) 611 rs += PetscAbsScalar(v[j]); 612 if (rs>sctx.shift_top) sctx.shift_top = rs; 613 } 614 sctx.shift_top *= 1.1; 615 sctx.nshift_max = 5; 616 sctx.shift_lo = 0.; 617 sctx.shift_hi = 1.; 618 } 619 620 sctx.shift_amount = 0.0; 621 sctx.nshift = 0; 622 do { 623 sctx.lushift = PETSC_FALSE; 624 for (i=0; i<n; i++){ 625 nz = bi[i+1] - bi[i]; 626 bjtmp = bj + bi[i]; 627 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 628 629 /* load in initial (unfactored row) */ 630 nz = ai[r[i]+1] - ai[r[i]]; 631 ajtmp = aj + ai[r[i]]; 632 v = aa + ai[r[i]]; 633 for (j=0; j<nz; j++) { 634 rtmp[ics[ajtmp[j]]] = v[j]; 635 } 636 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 637 /* if (sctx.shift_amount > 0.0) printf("row %d, shift %g\n",i,sctx.shift_amount); */ 638 639 row = *bjtmp++; 640 while (row < i) { 641 pc = rtmp + row; 642 if (*pc != 0.0) { 643 pv = b->a + diag_offset[row]; 644 pj = b->j + diag_offset[row] + 1; 645 multiplier = *pc / *pv++; 646 *pc = multiplier; 647 nz = bi[row+1] - diag_offset[row] - 1; 648 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 649 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 650 } 651 row = *bjtmp++; 652 } 653 /* finished row so stick it into b->a */ 654 pv = b->a + bi[i] ; 655 pj = b->j + bi[i] ; 656 nz = bi[i+1] - bi[i]; 657 diag = diag_offset[i] - bi[i]; 658 rs = 0.0; 659 for (j=0; j<nz; j++) { 660 pv[j] = rtmp[pj[j]]; 661 rs += PetscAbsScalar(pv[j]); 662 } 663 rs -= PetscAbsScalar(pv[diag]); 664 665 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 666 sctx.rs = rs; 667 sctx.pv = pv[diag]; 668 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 669 if (newshift == 1) break; 670 } 671 672 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 673 /* 674 * if no shift in this attempt & shifting & started shifting & can refine, 675 * then try lower shift 676 */ 677 sctx.shift_hi = sctx.shift_fraction; 678 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 679 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 680 sctx.lushift = PETSC_TRUE; 681 sctx.nshift++; 682 } 683 } while (sctx.lushift); 684 685 /* invert diagonal entries for simplier triangular solves */ 686 for (i=0; i<n; i++) { 687 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 688 } 689 ierr = PetscFree(rtmp);CHKERRQ(ierr); 690 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 691 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 692 if (b->inode.use) { 693 C->ops->solve = MatSolve_Inode; 694 } else { 695 PetscTruth row_identity, col_identity; 696 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 697 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 698 if (row_identity && col_identity) { 699 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 700 } else { 701 C->ops->solve = MatSolve_SeqAIJ; 702 } 703 } 704 C->ops->solveadd = MatSolveAdd_SeqAIJ; 705 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 706 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 707 C->ops->matsolve = MatMatSolve_SeqAIJ; 708 C->assembled = PETSC_TRUE; 709 C->preallocated = PETSC_TRUE; 710 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 711 if (sctx.nshift){ 712 if (info->shiftpd) { 713 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 714 } else if (info->shiftnz) { 715 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 716 } 717 } 718 PetscFunctionReturn(0); 719 } 720 721 /* 722 This routine implements inplace ILU(0) with row or/and column permutations. 723 Input: 724 A - original matrix 725 Output; 726 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 727 a->j (col index) is permuted by the inverse of colperm, then sorted 728 a->a reordered accordingly with a->j 729 a->diag (ptr to diagonal elements) is updated. 730 */ 731 #undef __FUNCT__ 732 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 733 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 734 { 735 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 736 IS isrow = a->row,isicol = a->icol; 737 PetscErrorCode ierr; 738 const PetscInt *r,*ic,*ics; 739 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 740 PetscInt *ajtmp,nz,row; 741 PetscInt *diag = a->diag,nbdiag,*pj; 742 PetscScalar *rtmp,*pc,multiplier,d; 743 MatScalar *v,*pv; 744 PetscReal rs; 745 LUShift_Ctx sctx; 746 PetscInt newshift; 747 748 PetscFunctionBegin; 749 if (A != B) SETERRQ(PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 750 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 751 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 752 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 753 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 754 ics = ic; 755 756 sctx.shift_top = 0; 757 sctx.nshift_max = 0; 758 sctx.shift_lo = 0; 759 sctx.shift_hi = 0; 760 sctx.shift_fraction = 0; 761 762 /* if both shift schemes are chosen by user, only use info->shiftpd */ 763 if (info->shiftpd) { /* set sctx.shift_top=max{rs} */ 764 sctx.shift_top = 0; 765 for (i=0; i<n; i++) { 766 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 767 d = (a->a)[diag[i]]; 768 rs = -PetscAbsScalar(d) - PetscRealPart(d); 769 v = a->a+ai[i]; 770 nz = ai[i+1] - ai[i]; 771 for (j=0; j<nz; j++) 772 rs += PetscAbsScalar(v[j]); 773 if (rs>sctx.shift_top) sctx.shift_top = rs; 774 } 775 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 776 sctx.shift_top *= 1.1; 777 sctx.nshift_max = 5; 778 sctx.shift_lo = 0.; 779 sctx.shift_hi = 1.; 780 } 781 782 sctx.shift_amount = 0; 783 sctx.nshift = 0; 784 do { 785 sctx.lushift = PETSC_FALSE; 786 for (i=0; i<n; i++){ 787 /* load in initial unfactored row */ 788 nz = ai[r[i]+1] - ai[r[i]]; 789 ajtmp = aj + ai[r[i]]; 790 v = a->a + ai[r[i]]; 791 /* sort permuted ajtmp and values v accordingly */ 792 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 793 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 794 795 diag[r[i]] = ai[r[i]]; 796 for (j=0; j<nz; j++) { 797 rtmp[ajtmp[j]] = v[j]; 798 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 799 } 800 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 801 802 row = *ajtmp++; 803 while (row < i) { 804 pc = rtmp + row; 805 if (*pc != 0.0) { 806 pv = a->a + diag[r[row]]; 807 pj = aj + diag[r[row]] + 1; 808 809 multiplier = *pc / *pv++; 810 *pc = multiplier; 811 nz = ai[r[row]+1] - diag[r[row]] - 1; 812 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 813 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 814 } 815 row = *ajtmp++; 816 } 817 /* finished row so overwrite it onto a->a */ 818 pv = a->a + ai[r[i]] ; 819 pj = aj + ai[r[i]] ; 820 nz = ai[r[i]+1] - ai[r[i]]; 821 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 822 823 rs = 0.0; 824 for (j=0; j<nz; j++) { 825 pv[j] = rtmp[pj[j]]; 826 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 827 } 828 829 /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */ 830 sctx.rs = rs; 831 sctx.pv = pv[nbdiag]; 832 ierr = MatLUCheckShift_inline(info,sctx,i,newshift);CHKERRQ(ierr); 833 if (newshift == 1) break; 834 } 835 836 if (info->shiftpd && !sctx.lushift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 837 /* 838 * if no shift in this attempt & shifting & started shifting & can refine, 839 * then try lower shift 840 */ 841 sctx.shift_hi = sctx.shift_fraction; 842 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 843 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 844 sctx.lushift = PETSC_TRUE; 845 sctx.nshift++; 846 } 847 } while (sctx.lushift); 848 849 /* invert diagonal entries for simplier triangular solves */ 850 for (i=0; i<n; i++) { 851 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 852 } 853 854 ierr = PetscFree(rtmp);CHKERRQ(ierr); 855 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 856 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 857 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 858 A->ops->solveadd = MatSolveAdd_SeqAIJ; 859 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 860 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 861 A->assembled = PETSC_TRUE; 862 A->preallocated = PETSC_TRUE; 863 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 864 if (sctx.nshift){ 865 if (info->shiftpd) { 866 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 867 } else if (info->shiftnz) { 868 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 869 } 870 } 871 PetscFunctionReturn(0); 872 } 873 874 /* ----------------------------------------------------------- */ 875 #undef __FUNCT__ 876 #define __FUNCT__ "MatLUFactor_SeqAIJ" 877 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 878 { 879 PetscErrorCode ierr; 880 Mat C; 881 882 PetscFunctionBegin; 883 ierr = MatGetFactor(A,MAT_SOLVER_PETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 884 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 885 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 886 A->ops->solve = C->ops->solve; 887 A->ops->solvetranspose = C->ops->solvetranspose; 888 ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 889 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 /* ----------------------------------------------------------- */ 893 894 895 #undef __FUNCT__ 896 #define __FUNCT__ "MatSolve_SeqAIJ" 897 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 898 { 899 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 900 IS iscol = a->col,isrow = a->row; 901 PetscErrorCode ierr; 902 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 903 PetscInt nz; 904 const PetscInt *rout,*cout,*r,*c; 905 PetscScalar *x,*tmp,*tmps,sum; 906 const PetscScalar *b; 907 const MatScalar *aa = a->a,*v; 908 909 PetscFunctionBegin; 910 if (!n) PetscFunctionReturn(0); 911 912 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 913 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 914 tmp = a->solve_work; 915 916 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 917 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 918 919 /* forward solve the lower triangular */ 920 tmp[0] = b[*r++]; 921 tmps = tmp; 922 for (i=1; i<n; i++) { 923 v = aa + ai[i] ; 924 vi = aj + ai[i] ; 925 nz = a->diag[i] - ai[i]; 926 sum = b[*r++]; 927 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 928 tmp[i] = sum; 929 } 930 931 /* backward solve the upper triangular */ 932 for (i=n-1; i>=0; i--){ 933 v = aa + a->diag[i] + 1; 934 vi = aj + a->diag[i] + 1; 935 nz = ai[i+1] - a->diag[i] - 1; 936 sum = tmp[i]; 937 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 938 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 939 } 940 941 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 942 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 943 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 944 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 945 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatMatSolve_SeqAIJ" 951 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 952 { 953 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 954 IS iscol = a->col,isrow = a->row; 955 PetscErrorCode ierr; 956 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 957 PetscInt nz,neq; 958 const PetscInt *rout,*cout,*r,*c; 959 PetscScalar *x,*b,*tmp,*tmps,sum; 960 const MatScalar *aa = a->a,*v; 961 PetscTruth bisdense,xisdense; 962 963 PetscFunctionBegin; 964 if (!n) PetscFunctionReturn(0); 965 966 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 967 if (!bisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 968 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 969 if (!xisdense) SETERRQ(PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 970 971 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 972 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 973 974 tmp = a->solve_work; 975 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 976 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 977 978 for (neq=0; neq<B->cmap->n; neq++){ 979 /* forward solve the lower triangular */ 980 tmp[0] = b[r[0]]; 981 tmps = tmp; 982 for (i=1; i<n; i++) { 983 v = aa + ai[i] ; 984 vi = aj + ai[i] ; 985 nz = a->diag[i] - ai[i]; 986 sum = b[r[i]]; 987 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 988 tmp[i] = sum; 989 } 990 /* backward solve the upper triangular */ 991 for (i=n-1; i>=0; i--){ 992 v = aa + a->diag[i] + 1; 993 vi = aj + a->diag[i] + 1; 994 nz = ai[i+1] - a->diag[i] - 1; 995 sum = tmp[i]; 996 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 997 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 998 } 999 1000 b += n; 1001 x += n; 1002 } 1003 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1004 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1005 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1006 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1007 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1008 PetscFunctionReturn(0); 1009 } 1010 1011 #undef __FUNCT__ 1012 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1013 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1014 { 1015 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1016 IS iscol = a->col,isrow = a->row; 1017 PetscErrorCode ierr; 1018 const PetscInt *r,*c,*rout,*cout; 1019 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1020 PetscInt nz,row; 1021 PetscScalar *x,*b,*tmp,*tmps,sum; 1022 const MatScalar *aa = a->a,*v; 1023 1024 PetscFunctionBegin; 1025 if (!n) PetscFunctionReturn(0); 1026 1027 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1028 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1029 tmp = a->solve_work; 1030 1031 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1032 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1033 1034 /* forward solve the lower triangular */ 1035 tmp[0] = b[*r++]; 1036 tmps = tmp; 1037 for (row=1; row<n; row++) { 1038 i = rout[row]; /* permuted row */ 1039 v = aa + ai[i] ; 1040 vi = aj + ai[i] ; 1041 nz = a->diag[i] - ai[i]; 1042 sum = b[*r++]; 1043 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1044 tmp[row] = sum; 1045 } 1046 1047 /* backward solve the upper triangular */ 1048 for (row=n-1; row>=0; row--){ 1049 i = rout[row]; /* permuted row */ 1050 v = aa + a->diag[i] + 1; 1051 vi = aj + a->diag[i] + 1; 1052 nz = ai[i+1] - a->diag[i] - 1; 1053 sum = tmp[row]; 1054 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1055 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1056 } 1057 1058 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1059 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1060 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1061 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1062 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1063 PetscFunctionReturn(0); 1064 } 1065 1066 /* ----------------------------------------------------------- */ 1067 #include "../src/mat/impls/aij/seq/ftn-kernels/fsolve.h" 1068 #undef __FUNCT__ 1069 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 1070 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 1071 { 1072 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1073 PetscErrorCode ierr; 1074 PetscInt n = A->rmap->n; 1075 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1076 PetscScalar *x; 1077 const PetscScalar *b; 1078 const MatScalar *aa = a->a; 1079 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1080 PetscInt adiag_i,i,nz,ai_i; 1081 const PetscInt *vi; 1082 const MatScalar *v; 1083 PetscScalar sum; 1084 #endif 1085 1086 PetscFunctionBegin; 1087 if (!n) PetscFunctionReturn(0); 1088 1089 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1090 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1091 1092 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1093 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1094 #else 1095 /* forward solve the lower triangular */ 1096 x[0] = b[0]; 1097 for (i=1; i<n; i++) { 1098 ai_i = ai[i]; 1099 v = aa + ai_i; 1100 vi = aj + ai_i; 1101 nz = adiag[i] - ai_i; 1102 sum = b[i]; 1103 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1104 x[i] = sum; 1105 } 1106 1107 /* backward solve the upper triangular */ 1108 for (i=n-1; i>=0; i--){ 1109 adiag_i = adiag[i]; 1110 v = aa + adiag_i + 1; 1111 vi = aj + adiag_i + 1; 1112 nz = ai[i+1] - adiag_i - 1; 1113 sum = x[i]; 1114 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1115 x[i] = sum*aa[adiag_i]; 1116 } 1117 #endif 1118 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1119 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 1120 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1121 PetscFunctionReturn(0); 1122 } 1123 1124 #undef __FUNCT__ 1125 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1126 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1127 { 1128 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1129 IS iscol = a->col,isrow = a->row; 1130 PetscErrorCode ierr; 1131 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1132 PetscInt nz; 1133 const PetscInt *rout,*cout,*r,*c; 1134 PetscScalar *x,*b,*tmp,sum; 1135 const MatScalar *aa = a->a,*v; 1136 1137 PetscFunctionBegin; 1138 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1139 1140 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1141 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1142 tmp = a->solve_work; 1143 1144 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1145 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1146 1147 /* forward solve the lower triangular */ 1148 tmp[0] = b[*r++]; 1149 for (i=1; i<n; i++) { 1150 v = aa + ai[i] ; 1151 vi = aj + ai[i] ; 1152 nz = a->diag[i] - ai[i]; 1153 sum = b[*r++]; 1154 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1155 tmp[i] = sum; 1156 } 1157 1158 /* backward solve the upper triangular */ 1159 for (i=n-1; i>=0; i--){ 1160 v = aa + a->diag[i] + 1; 1161 vi = aj + a->diag[i] + 1; 1162 nz = ai[i+1] - a->diag[i] - 1; 1163 sum = tmp[i]; 1164 while (nz--) sum -= *v++ * tmp[*vi++ ]; 1165 tmp[i] = sum*aa[a->diag[i]]; 1166 x[*c--] += tmp[i]; 1167 } 1168 1169 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1170 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1171 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1172 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1173 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1174 1175 PetscFunctionReturn(0); 1176 } 1177 /* -------------------------------------------------------------------*/ 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1180 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1181 { 1182 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1183 IS iscol = a->col,isrow = a->row; 1184 PetscErrorCode ierr; 1185 const PetscInt *rout,*cout,*r,*c; 1186 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1187 PetscInt nz,*diag = a->diag; 1188 PetscScalar *x,*b,*tmp,s1; 1189 const MatScalar *aa = a->a,*v; 1190 1191 PetscFunctionBegin; 1192 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1193 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1194 tmp = a->solve_work; 1195 1196 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1197 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1198 1199 /* copy the b into temp work space according to permutation */ 1200 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1201 1202 /* forward solve the U^T */ 1203 for (i=0; i<n; i++) { 1204 v = aa + diag[i] ; 1205 vi = aj + diag[i] + 1; 1206 nz = ai[i+1] - diag[i] - 1; 1207 s1 = tmp[i]; 1208 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1209 while (nz--) { 1210 tmp[*vi++ ] -= (*v++)*s1; 1211 } 1212 tmp[i] = s1; 1213 } 1214 1215 /* backward solve the L^T */ 1216 for (i=n-1; i>=0; i--){ 1217 v = aa + diag[i] - 1 ; 1218 vi = aj + diag[i] - 1 ; 1219 nz = diag[i] - ai[i]; 1220 s1 = tmp[i]; 1221 while (nz--) { 1222 tmp[*vi-- ] -= (*v--)*s1; 1223 } 1224 } 1225 1226 /* copy tmp into x according to permutation */ 1227 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1228 1229 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1230 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1231 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1232 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1233 1234 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1235 PetscFunctionReturn(0); 1236 } 1237 1238 #undef __FUNCT__ 1239 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1240 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1241 { 1242 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1243 IS iscol = a->col,isrow = a->row; 1244 PetscErrorCode ierr; 1245 const PetscInt *r,*c,*rout,*cout; 1246 PetscInt i,n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1247 PetscInt nz,*diag = a->diag; 1248 PetscScalar *x,*b,*tmp; 1249 const MatScalar *aa = a->a,*v; 1250 1251 PetscFunctionBegin; 1252 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1253 1254 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1255 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1256 tmp = a->solve_work; 1257 1258 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1259 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1260 1261 /* copy the b into temp work space according to permutation */ 1262 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1263 1264 /* forward solve the U^T */ 1265 for (i=0; i<n; i++) { 1266 v = aa + diag[i] ; 1267 vi = aj + diag[i] + 1; 1268 nz = ai[i+1] - diag[i] - 1; 1269 tmp[i] *= *v++; 1270 while (nz--) { 1271 tmp[*vi++ ] -= (*v++)*tmp[i]; 1272 } 1273 } 1274 1275 /* backward solve the L^T */ 1276 for (i=n-1; i>=0; i--){ 1277 v = aa + diag[i] - 1 ; 1278 vi = aj + diag[i] - 1 ; 1279 nz = diag[i] - ai[i]; 1280 while (nz--) { 1281 tmp[*vi-- ] -= (*v--)*tmp[i]; 1282 } 1283 } 1284 1285 /* copy tmp into x according to permutation */ 1286 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1287 1288 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1289 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1290 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1291 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1292 1293 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1294 PetscFunctionReturn(0); 1295 } 1296 /* ----------------------------------------------------------------*/ 1297 EXTERN PetscErrorCode Mat_CheckInode(Mat,PetscTruth); 1298 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscTruth); 1299 1300 /* 1301 ilu(0) with natural ordering under new data structure. 1302 Factored arrays bj and ba are stored as 1303 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1304 1305 bi=fact->i is an array of size 2n+2, in which 1306 bi+ 1307 bi[i] -> 1st entry of L(i,:),i=0,...,i-1 1308 bi[n] -> points to L(n-1,:)+1 1309 bi[n+1] -> 1st entry of U(n-1,:) 1310 bi[2n-i] -> 1st entry of U(i,:) 1311 bi[2n-i+1] -> end of U(i,:)+1, the 1st entry of U(i-1,:) 1312 bi[2n] -> 1st entry of U(0,:) 1313 bi[2n+1] -> points to U(0,:)+1 1314 1315 U(i,:) contains diag[i] as its last entry, i.e., 1316 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1317 */ 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct" 1320 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1321 { 1322 1323 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1324 PetscErrorCode ierr; 1325 PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1326 PetscInt i,j,nz,*bi,*bj,*bdiag; 1327 1328 PetscFunctionBegin; 1329 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1330 b = (Mat_SeqAIJ*)(fact)->data; 1331 1332 /* allocate matrix arrays for new data structure */ 1333 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,2*n+2,PetscInt,&b->i);CHKERRQ(ierr); 1334 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(2*n+2)*sizeof(PetscInt));CHKERRQ(ierr); 1335 b->singlemalloc = PETSC_TRUE; 1336 if (!b->diag){ 1337 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1338 } 1339 bdiag = b->diag; 1340 1341 if (n > 0) { 1342 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1343 } 1344 1345 /* set bi and bj with new data structure */ 1346 bi = b->i; 1347 bj = b->j; 1348 1349 /* L part */ 1350 bi[0] = 0; 1351 for (i=0; i<n; i++){ 1352 nz = adiag[i] - ai[i]; 1353 bi[i+1] = bi[i] + nz; 1354 aj = a->j + ai[i]; 1355 for (j=0; j<nz; j++){ 1356 *bj = aj[j]; bj++; 1357 } 1358 } 1359 1360 /* U part */ 1361 bi[n+1] = bi[n]; 1362 for (i=n-1; i>=0; i--){ 1363 nz = ai[i+1] - adiag[i] - 1; 1364 bi[2*n-i+1] = bi[2*n-i] + nz + 1; 1365 aj = a->j + adiag[i] + 1; 1366 for (j=0; j<nz; j++){ 1367 *bj = aj[j]; bj++; 1368 } 1369 /* diag[i] */ 1370 *bj = i; bj++; 1371 bdiag[i] = bi[2*n-i+1]-1; 1372 } 1373 PetscFunctionReturn(0); 1374 } 1375 1376 #undef __FUNCT__ 1377 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_newdatastruct" 1378 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_newdatastruct(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1379 { 1380 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1381 IS isicol; 1382 PetscErrorCode ierr; 1383 const PetscInt *r,*ic; 1384 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1385 PetscInt *bi,*cols,nnz,*cols_lvl; 1386 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1387 PetscInt i,levels,diagonal_fill; 1388 PetscTruth col_identity,row_identity; 1389 PetscReal f; 1390 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1391 PetscBT lnkbt; 1392 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1393 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1394 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1395 PetscTruth missing; 1396 1397 PetscFunctionBegin; 1398 //printf("MatILUFactorSymbolic_SeqAIJ_newdatastruct ...\n"); 1399 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1400 f = info->fill; 1401 levels = (PetscInt)info->levels; 1402 diagonal_fill = (PetscInt)info->diagonal_fill; 1403 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1404 1405 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1406 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1407 1408 if (!levels && row_identity && col_identity) { 1409 /* special case: ilu(0) with natural ordering */ 1410 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0_newdatastruct(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1411 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 1412 1413 fact->factor = MAT_FACTOR_ILU; 1414 (fact)->info.factor_mallocs = 0; 1415 (fact)->info.fill_ratio_given = info->fill; 1416 (fact)->info.fill_ratio_needed = 1.0; 1417 b = (Mat_SeqAIJ*)(fact)->data; 1418 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1419 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1420 b->row = isrow; 1421 b->col = iscol; 1422 b->icol = isicol; 1423 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1424 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1425 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1426 /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */ 1427 PetscFunctionReturn(0); 1428 } 1429 1430 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1431 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1432 1433 /* get new row pointers */ 1434 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1435 bi[0] = 0; 1436 /* bdiag is location of diagonal in factor */ 1437 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1438 bdiag[0] = 0; 1439 1440 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1441 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1442 1443 /* create a linked list for storing column indices of the active row */ 1444 nlnk = n + 1; 1445 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1446 1447 /* initial FreeSpace size is f*(ai[n]+1) */ 1448 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1449 current_space = free_space; 1450 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1451 current_space_lvl = free_space_lvl; 1452 1453 for (i=0; i<n; i++) { 1454 nzi = 0; 1455 /* copy current row into linked list */ 1456 nnz = ai[r[i]+1] - ai[r[i]]; 1457 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1458 cols = aj + ai[r[i]]; 1459 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1460 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1461 nzi += nlnk; 1462 1463 /* make sure diagonal entry is included */ 1464 if (diagonal_fill && lnk[i] == -1) { 1465 fm = n; 1466 while (lnk[fm] < i) fm = lnk[fm]; 1467 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1468 lnk[fm] = i; 1469 lnk_lvl[i] = 0; 1470 nzi++; dcount++; 1471 } 1472 1473 /* add pivot rows into the active row */ 1474 nzbd = 0; 1475 prow = lnk[n]; 1476 while (prow < i) { 1477 nnz = bdiag[prow]; 1478 cols = bj_ptr[prow] + nnz + 1; 1479 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1480 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1481 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1482 nzi += nlnk; 1483 prow = lnk[prow]; 1484 nzbd++; 1485 } 1486 bdiag[i] = nzbd; 1487 bi[i+1] = bi[i] + nzi; 1488 1489 /* if free space is not available, make more free space */ 1490 if (current_space->local_remaining<nzi) { 1491 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1492 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1493 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1494 reallocs++; 1495 } 1496 1497 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1498 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1499 bj_ptr[i] = current_space->array; 1500 bjlvl_ptr[i] = current_space_lvl->array; 1501 1502 /* make sure the active row i has diagonal entry */ 1503 if (*(bj_ptr[i]+bdiag[i]) != i) { 1504 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1505 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1506 } 1507 1508 current_space->array += nzi; 1509 current_space->local_used += nzi; 1510 current_space->local_remaining -= nzi; 1511 current_space_lvl->array += nzi; 1512 current_space_lvl->local_used += nzi; 1513 current_space_lvl->local_remaining -= nzi; 1514 } 1515 1516 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1517 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1518 1519 /* destroy list of free space and other temporary arrays */ 1520 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1521 1522 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1523 ierr = PetscFreeSpaceContiguous_newdatastruct(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1524 1525 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1526 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1527 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1528 1529 #if defined(PETSC_USE_INFO) 1530 { 1531 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1532 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1533 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1534 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1535 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1536 if (diagonal_fill) { 1537 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1538 } 1539 } 1540 #endif 1541 1542 /* put together the new matrix */ 1543 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1544 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1545 b = (Mat_SeqAIJ*)(fact)->data; 1546 b->free_a = PETSC_TRUE; 1547 b->free_ij = PETSC_TRUE; 1548 b->singlemalloc = PETSC_FALSE; 1549 ierr = PetscMalloc( (bi[2*n+1] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1550 b->j = bj; 1551 b->i = bi; 1552 b->diag = bdiag; 1553 b->ilen = 0; 1554 b->imax = 0; 1555 b->row = isrow; 1556 b->col = iscol; 1557 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1558 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1559 b->icol = isicol; 1560 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1561 /* In b structure: Free imax, ilen, old a, old j. 1562 Allocate bdiag, solve_work, new a, new j */ 1563 ierr = PetscLogObjectMemory(fact,bi[2*n+1] * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1564 b->maxnz = b->nz = bi[2*n+1] ; 1565 (fact)->info.factor_mallocs = reallocs; 1566 (fact)->info.fill_ratio_given = f; 1567 (fact)->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]); 1568 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_newdatastruct; 1569 /* ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); */ 1570 PetscFunctionReturn(0); 1571 } 1572 1573 #undef __FUNCT__ 1574 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1575 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1576 { 1577 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1578 IS isicol; 1579 PetscErrorCode ierr; 1580 const PetscInt *r,*ic; 1581 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1582 PetscInt *bi,*cols,nnz,*cols_lvl; 1583 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1584 PetscInt i,levels,diagonal_fill; 1585 PetscTruth col_identity,row_identity; 1586 PetscReal f; 1587 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1588 PetscBT lnkbt; 1589 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1590 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1591 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1592 PetscTruth missing; 1593 PetscTruth newdatastruct=PETSC_FALSE; 1594 1595 PetscFunctionBegin; 1596 ierr = PetscOptionsGetTruth(PETSC_NULL,"-ilu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr); 1597 if (newdatastruct){ 1598 ierr = MatILUFactorSymbolic_SeqAIJ_newdatastruct(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1603 f = info->fill; 1604 levels = (PetscInt)info->levels; 1605 diagonal_fill = (PetscInt)info->diagonal_fill; 1606 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1607 1608 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1609 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1610 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1611 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1612 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1613 1614 fact->factor = MAT_FACTOR_ILU; 1615 (fact)->info.factor_mallocs = 0; 1616 (fact)->info.fill_ratio_given = info->fill; 1617 (fact)->info.fill_ratio_needed = 1.0; 1618 b = (Mat_SeqAIJ*)(fact)->data; 1619 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1620 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1621 b->row = isrow; 1622 b->col = iscol; 1623 b->icol = isicol; 1624 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1625 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1626 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1627 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1628 PetscFunctionReturn(0); 1629 } 1630 1631 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1632 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1633 1634 /* get new row pointers */ 1635 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1636 bi[0] = 0; 1637 /* bdiag is location of diagonal in factor */ 1638 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1639 bdiag[0] = 0; 1640 1641 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);CHKERRQ(ierr); 1642 bjlvl_ptr = (PetscInt**)(bj_ptr + n); 1643 1644 /* create a linked list for storing column indices of the active row */ 1645 nlnk = n + 1; 1646 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1647 1648 /* initial FreeSpace size is f*(ai[n]+1) */ 1649 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1650 current_space = free_space; 1651 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1652 current_space_lvl = free_space_lvl; 1653 1654 for (i=0; i<n; i++) { 1655 nzi = 0; 1656 /* copy current row into linked list */ 1657 nnz = ai[r[i]+1] - ai[r[i]]; 1658 if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1659 cols = aj + ai[r[i]]; 1660 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1661 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1662 nzi += nlnk; 1663 1664 /* make sure diagonal entry is included */ 1665 if (diagonal_fill && lnk[i] == -1) { 1666 fm = n; 1667 while (lnk[fm] < i) fm = lnk[fm]; 1668 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1669 lnk[fm] = i; 1670 lnk_lvl[i] = 0; 1671 nzi++; dcount++; 1672 } 1673 1674 /* add pivot rows into the active row */ 1675 nzbd = 0; 1676 prow = lnk[n]; 1677 while (prow < i) { 1678 nnz = bdiag[prow]; 1679 cols = bj_ptr[prow] + nnz + 1; 1680 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1681 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1682 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1683 nzi += nlnk; 1684 prow = lnk[prow]; 1685 nzbd++; 1686 } 1687 bdiag[i] = nzbd; 1688 bi[i+1] = bi[i] + nzi; 1689 1690 /* if free space is not available, make more free space */ 1691 if (current_space->local_remaining<nzi) { 1692 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1693 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1694 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1695 reallocs++; 1696 } 1697 1698 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1699 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1700 bj_ptr[i] = current_space->array; 1701 bjlvl_ptr[i] = current_space_lvl->array; 1702 1703 /* make sure the active row i has diagonal entry */ 1704 if (*(bj_ptr[i]+bdiag[i]) != i) { 1705 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\ 1706 try running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1707 } 1708 1709 current_space->array += nzi; 1710 current_space->local_used += nzi; 1711 current_space->local_remaining -= nzi; 1712 current_space_lvl->array += nzi; 1713 current_space_lvl->local_used += nzi; 1714 current_space_lvl->local_remaining -= nzi; 1715 } 1716 1717 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1718 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1719 1720 /* destroy list of free space and other temporary arrays */ 1721 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1722 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 1723 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1724 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1725 ierr = PetscFree(bj_ptr);CHKERRQ(ierr); 1726 1727 #if defined(PETSC_USE_INFO) 1728 { 1729 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1730 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1731 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1732 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1733 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1734 if (diagonal_fill) { 1735 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1736 } 1737 } 1738 #endif 1739 1740 /* put together the new matrix */ 1741 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1742 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1743 b = (Mat_SeqAIJ*)(fact)->data; 1744 b->free_a = PETSC_TRUE; 1745 b->free_ij = PETSC_TRUE; 1746 b->singlemalloc = PETSC_FALSE; 1747 ierr = PetscMalloc( (bi[n] )*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1748 b->j = bj; 1749 b->i = bi; 1750 for (i=0; i<n; i++) bdiag[i] += bi[i]; 1751 b->diag = bdiag; 1752 b->ilen = 0; 1753 b->imax = 0; 1754 b->row = isrow; 1755 b->col = iscol; 1756 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1757 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1758 b->icol = isicol; 1759 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1760 /* In b structure: Free imax, ilen, old a, old j. 1761 Allocate bdiag, solve_work, new a, new j */ 1762 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1763 b->maxnz = b->nz = bi[n] ; 1764 (fact)->info.factor_mallocs = reallocs; 1765 (fact)->info.fill_ratio_given = f; 1766 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 1767 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1768 ierr = MatILUFactorSymbolic_Inode(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1769 PetscFunctionReturn(0); 1770 } 1771 1772 #include "../src/mat/impls/sbaij/seq/sbaij.h" 1773 #undef __FUNCT__ 1774 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 1775 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 1776 { 1777 Mat C = B; 1778 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 1779 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 1780 IS ip=b->row,iip = b->icol; 1781 PetscErrorCode ierr; 1782 const PetscInt *rip,*riip; 1783 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol; 1784 PetscInt *ai=a->i,*aj=a->j; 1785 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 1786 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 1787 PetscReal zeropivot,rs,shiftnz; 1788 PetscReal shiftpd; 1789 ChShift_Ctx sctx; 1790 PetscInt newshift; 1791 PetscTruth perm_identity; 1792 1793 PetscFunctionBegin; 1794 1795 shiftnz = info->shiftnz; 1796 shiftpd = info->shiftpd; 1797 zeropivot = info->zeropivot; 1798 1799 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 1800 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 1801 1802 /* initialization */ 1803 nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar); 1804 ierr = PetscMalloc(nz,&il);CHKERRQ(ierr); 1805 jl = il + mbs; 1806 rtmp = (MatScalar*)(jl + mbs); 1807 1808 sctx.shift_amount = 0; 1809 sctx.nshift = 0; 1810 do { 1811 sctx.chshift = PETSC_FALSE; 1812 for (i=0; i<mbs; i++) { 1813 rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0; 1814 } 1815 1816 for (k = 0; k<mbs; k++){ 1817 bval = ba + bi[k]; 1818 /* initialize k-th row by the perm[k]-th row of A */ 1819 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 1820 for (j = jmin; j < jmax; j++){ 1821 col = riip[aj[j]]; 1822 if (col >= k){ /* only take upper triangular entry */ 1823 rtmp[col] = aa[j]; 1824 *bval++ = 0.0; /* for in-place factorization */ 1825 } 1826 } 1827 /* shift the diagonal of the matrix */ 1828 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 1829 1830 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 1831 dk = rtmp[k]; 1832 i = jl[k]; /* first row to be added to k_th row */ 1833 1834 while (i < k){ 1835 nexti = jl[i]; /* next row to be added to k_th row */ 1836 1837 /* compute multiplier, update diag(k) and U(i,k) */ 1838 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 1839 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 1840 dk += uikdi*ba[ili]; 1841 ba[ili] = uikdi; /* -U(i,k) */ 1842 1843 /* add multiple of row i to k-th row */ 1844 jmin = ili + 1; jmax = bi[i+1]; 1845 if (jmin < jmax){ 1846 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 1847 /* update il and jl for row i */ 1848 il[i] = jmin; 1849 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 1850 } 1851 i = nexti; 1852 } 1853 1854 /* shift the diagonals when zero pivot is detected */ 1855 /* compute rs=sum of abs(off-diagonal) */ 1856 rs = 0.0; 1857 jmin = bi[k]+1; 1858 nz = bi[k+1] - jmin; 1859 bcol = bj + jmin; 1860 while (nz--){ 1861 rs += PetscAbsScalar(rtmp[*bcol]); 1862 bcol++; 1863 } 1864 1865 sctx.rs = rs; 1866 sctx.pv = dk; 1867 ierr = MatCholeskyCheckShift_inline(info,sctx,k,newshift);CHKERRQ(ierr); 1868 1869 if (newshift == 1) { 1870 if (!sctx.shift_amount) { 1871 sctx.shift_amount = 1e-5; 1872 } 1873 break; 1874 } 1875 1876 /* copy data into U(k,:) */ 1877 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 1878 jmin = bi[k]+1; jmax = bi[k+1]; 1879 if (jmin < jmax) { 1880 for (j=jmin; j<jmax; j++){ 1881 col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0; 1882 } 1883 /* add the k-th row into il and jl */ 1884 il[k] = jmin; 1885 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 1886 } 1887 } 1888 } while (sctx.chshift); 1889 ierr = PetscFree(il);CHKERRQ(ierr); 1890 1891 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 1892 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 1893 1894 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 1895 if (perm_identity){ 1896 (B)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1897 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 1898 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 1899 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 1900 } else { 1901 (B)->ops->solve = MatSolve_SeqSBAIJ_1; 1902 (B)->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 1903 (B)->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 1904 (B)->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 1905 } 1906 1907 C->assembled = PETSC_TRUE; 1908 C->preallocated = PETSC_TRUE; 1909 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 1910 if (sctx.nshift){ 1911 if (shiftnz) { 1912 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1913 } else if (shiftpd) { 1914 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 1915 } 1916 } 1917 PetscFunctionReturn(0); 1918 } 1919 1920 #undef __FUNCT__ 1921 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 1922 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 1923 { 1924 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1925 Mat_SeqSBAIJ *b; 1926 PetscErrorCode ierr; 1927 PetscTruth perm_identity,missing; 1928 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui; 1929 const PetscInt *rip,*riip; 1930 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 1931 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 1932 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 1933 PetscReal fill=info->fill,levels=info->levels; 1934 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1935 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1936 PetscBT lnkbt; 1937 IS iperm; 1938 1939 PetscFunctionBegin; 1940 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1941 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1942 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1943 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 1944 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 1945 1946 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 1947 ui[0] = 0; 1948 1949 /* ICC(0) without matrix ordering: simply copies fill pattern */ 1950 if (!levels && perm_identity) { 1951 1952 for (i=0; i<am; i++) { 1953 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 1954 } 1955 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 1956 cols = uj; 1957 for (i=0; i<am; i++) { 1958 aj = a->j + a->diag[i]; 1959 ncols = ui[i+1] - ui[i]; 1960 for (j=0; j<ncols; j++) *cols++ = *aj++; 1961 } 1962 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 1963 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 1964 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 1965 1966 /* initialization */ 1967 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 1968 1969 /* jl: linked list for storing indices of the pivot rows 1970 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 1971 ierr = PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 1972 il = jl + am; 1973 uj_ptr = (PetscInt**)(il + am); 1974 uj_lvl_ptr = (PetscInt**)(uj_ptr + am); 1975 for (i=0; i<am; i++){ 1976 jl[i] = am; il[i] = 0; 1977 } 1978 1979 /* create and initialize a linked list for storing column indices of the active row k */ 1980 nlnk = am + 1; 1981 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1982 1983 /* initial FreeSpace size is fill*(ai[am]+1) */ 1984 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 1985 current_space = free_space; 1986 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 1987 current_space_lvl = free_space_lvl; 1988 1989 for (k=0; k<am; k++){ /* for each active row k */ 1990 /* initialize lnk by the column indices of row rip[k] of A */ 1991 nzk = 0; 1992 ncols = ai[rip[k]+1] - ai[rip[k]]; 1993 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 1994 ncols_upper = 0; 1995 for (j=0; j<ncols; j++){ 1996 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 1997 if (riip[i] >= k){ /* only take upper triangular entry */ 1998 ajtmp[ncols_upper] = i; 1999 ncols_upper++; 2000 } 2001 } 2002 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2003 nzk += nlnk; 2004 2005 /* update lnk by computing fill-in for each pivot row to be merged in */ 2006 prow = jl[k]; /* 1st pivot row */ 2007 2008 while (prow < k){ 2009 nextprow = jl[prow]; 2010 2011 /* merge prow into k-th row */ 2012 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2013 jmax = ui[prow+1]; 2014 ncols = jmax-jmin; 2015 i = jmin - ui[prow]; 2016 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2017 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2018 j = *(uj - 1); 2019 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2020 nzk += nlnk; 2021 2022 /* update il and jl for prow */ 2023 if (jmin < jmax){ 2024 il[prow] = jmin; 2025 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2026 } 2027 prow = nextprow; 2028 } 2029 2030 /* if free space is not available, make more free space */ 2031 if (current_space->local_remaining<nzk) { 2032 i = am - k + 1; /* num of unfactored rows */ 2033 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2034 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2035 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2036 reallocs++; 2037 } 2038 2039 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2040 if (nzk == 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2041 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2042 2043 /* add the k-th row into il and jl */ 2044 if (nzk > 1){ 2045 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2046 jl[k] = jl[i]; jl[i] = k; 2047 il[k] = ui[k] + 1; 2048 } 2049 uj_ptr[k] = current_space->array; 2050 uj_lvl_ptr[k] = current_space_lvl->array; 2051 2052 current_space->array += nzk; 2053 current_space->local_used += nzk; 2054 current_space->local_remaining -= nzk; 2055 2056 current_space_lvl->array += nzk; 2057 current_space_lvl->local_used += nzk; 2058 current_space_lvl->local_remaining -= nzk; 2059 2060 ui[k+1] = ui[k] + nzk; 2061 } 2062 2063 #if defined(PETSC_USE_INFO) 2064 if (ai[am] != 0) { 2065 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2066 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2067 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2068 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2069 } else { 2070 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2071 } 2072 #endif 2073 2074 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2075 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2076 ierr = PetscFree(jl);CHKERRQ(ierr); 2077 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2078 2079 /* destroy list of free space and other temporary array(s) */ 2080 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2081 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2082 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2083 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2084 2085 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2086 2087 /* put together the new matrix in MATSEQSBAIJ format */ 2088 2089 b = (Mat_SeqSBAIJ*)(fact)->data; 2090 b->singlemalloc = PETSC_FALSE; 2091 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2092 b->j = uj; 2093 b->i = ui; 2094 b->diag = 0; 2095 b->ilen = 0; 2096 b->imax = 0; 2097 b->row = perm; 2098 b->col = perm; 2099 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2100 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2101 b->icol = iperm; 2102 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2103 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2104 ierr = PetscLogObjectMemory((fact),(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2105 b->maxnz = b->nz = ui[am]; 2106 b->free_a = PETSC_TRUE; 2107 b->free_ij = PETSC_TRUE; 2108 2109 (fact)->info.factor_mallocs = reallocs; 2110 (fact)->info.fill_ratio_given = fill; 2111 if (ai[am] != 0) { 2112 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2113 } else { 2114 (fact)->info.fill_ratio_needed = 0.0; 2115 } 2116 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2117 PetscFunctionReturn(0); 2118 } 2119 2120 #undef __FUNCT__ 2121 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2122 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2123 { 2124 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2125 Mat_SeqSBAIJ *b; 2126 PetscErrorCode ierr; 2127 PetscTruth perm_identity; 2128 PetscReal fill = info->fill; 2129 const PetscInt *rip,*riip; 2130 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2131 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2132 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2133 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2134 PetscBT lnkbt; 2135 IS iperm; 2136 2137 PetscFunctionBegin; 2138 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2139 /* check whether perm is the identity mapping */ 2140 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2141 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2142 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2143 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2144 2145 /* initialization */ 2146 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2147 ui[0] = 0; 2148 2149 /* jl: linked list for storing indices of the pivot rows 2150 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2151 ierr = PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);CHKERRQ(ierr); 2152 il = jl + am; 2153 cols = il + am; 2154 ui_ptr = (PetscInt**)(cols + am); 2155 for (i=0; i<am; i++){ 2156 jl[i] = am; il[i] = 0; 2157 } 2158 2159 /* create and initialize a linked list for storing column indices of the active row k */ 2160 nlnk = am + 1; 2161 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2162 2163 /* initial FreeSpace size is fill*(ai[am]+1) */ 2164 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2165 current_space = free_space; 2166 2167 for (k=0; k<am; k++){ /* for each active row k */ 2168 /* initialize lnk by the column indices of row rip[k] of A */ 2169 nzk = 0; 2170 ncols = ai[rip[k]+1] - ai[rip[k]]; 2171 if (!ncols) SETERRQ2(PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2172 ncols_upper = 0; 2173 for (j=0; j<ncols; j++){ 2174 i = riip[*(aj + ai[rip[k]] + j)]; 2175 if (i >= k){ /* only take upper triangular entry */ 2176 cols[ncols_upper] = i; 2177 ncols_upper++; 2178 } 2179 } 2180 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2181 nzk += nlnk; 2182 2183 /* update lnk by computing fill-in for each pivot row to be merged in */ 2184 prow = jl[k]; /* 1st pivot row */ 2185 2186 while (prow < k){ 2187 nextprow = jl[prow]; 2188 /* merge prow into k-th row */ 2189 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2190 jmax = ui[prow+1]; 2191 ncols = jmax-jmin; 2192 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2193 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2194 nzk += nlnk; 2195 2196 /* update il and jl for prow */ 2197 if (jmin < jmax){ 2198 il[prow] = jmin; 2199 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2200 } 2201 prow = nextprow; 2202 } 2203 2204 /* if free space is not available, make more free space */ 2205 if (current_space->local_remaining<nzk) { 2206 i = am - k + 1; /* num of unfactored rows */ 2207 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2208 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2209 reallocs++; 2210 } 2211 2212 /* copy data into free space, then initialize lnk */ 2213 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2214 2215 /* add the k-th row into il and jl */ 2216 if (nzk-1 > 0){ 2217 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2218 jl[k] = jl[i]; jl[i] = k; 2219 il[k] = ui[k] + 1; 2220 } 2221 ui_ptr[k] = current_space->array; 2222 current_space->array += nzk; 2223 current_space->local_used += nzk; 2224 current_space->local_remaining -= nzk; 2225 2226 ui[k+1] = ui[k] + nzk; 2227 } 2228 2229 #if defined(PETSC_USE_INFO) 2230 if (ai[am] != 0) { 2231 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 2232 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2233 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2234 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2235 } else { 2236 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2237 } 2238 #endif 2239 2240 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2241 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2242 ierr = PetscFree(jl);CHKERRQ(ierr); 2243 2244 /* destroy list of free space and other temporary array(s) */ 2245 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2246 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2247 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2248 2249 /* put together the new matrix in MATSEQSBAIJ format */ 2250 2251 b = (Mat_SeqSBAIJ*)(fact)->data; 2252 b->singlemalloc = PETSC_FALSE; 2253 b->free_a = PETSC_TRUE; 2254 b->free_ij = PETSC_TRUE; 2255 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2256 b->j = uj; 2257 b->i = ui; 2258 b->diag = 0; 2259 b->ilen = 0; 2260 b->imax = 0; 2261 b->row = perm; 2262 b->col = perm; 2263 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2264 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2265 b->icol = iperm; 2266 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2267 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2268 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2269 b->maxnz = b->nz = ui[am]; 2270 2271 (fact)->info.factor_mallocs = reallocs; 2272 (fact)->info.fill_ratio_given = fill; 2273 if (ai[am] != 0) { 2274 (fact)->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2275 } else { 2276 (fact)->info.fill_ratio_needed = 0.0; 2277 } 2278 (fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2279 PetscFunctionReturn(0); 2280 } 2281 2282 #undef __FUNCT__ 2283 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_newdatastruct" 2284 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_newdatastruct(Mat A,Vec bb,Vec xx) 2285 { 2286 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2287 PetscErrorCode ierr; 2288 PetscInt n = A->rmap->n; 2289 const PetscInt *ai = a->i,*aj = a->j,*vi; 2290 PetscScalar *x,sum; 2291 const PetscScalar *b; 2292 const MatScalar *aa = a->a,*v; 2293 PetscInt i,nz; 2294 2295 PetscFunctionBegin; 2296 if (!n) PetscFunctionReturn(0); 2297 2298 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 2299 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2300 2301 /* forward solve the lower triangular */ 2302 x[0] = b[0]; 2303 v = aa; 2304 vi = aj; 2305 for (i=1; i<n; i++) { 2306 nz = ai[i+1] - ai[i]; 2307 sum = b[i]; 2308 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 2309 /* while (nz--) sum -= *v++ * x[*vi++];*/ 2310 v += nz; 2311 vi += nz; 2312 x[i] = sum; 2313 } 2314 2315 /* backward solve the upper triangular */ 2316 v = aa + ai[n+1]; 2317 vi = aj + ai[n+1]; 2318 for (i=n-1; i>=0; i--){ 2319 nz = ai[2*n-i +1] - ai[2*n-i]-1; 2320 sum = x[i]; 2321 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 2322 /* while (nz--) sum -= *v++ * x[*vi++]; */ 2323 v += nz; 2324 vi += nz; vi++; 2325 x[i] = *v++ *sum; /* x[i]=aa[adiag[i]]*sum; v++; */ 2326 } 2327 2328 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 2329 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 2330 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2331 PetscFunctionReturn(0); 2332 } 2333 2334 #undef __FUNCT__ 2335 #define __FUNCT__ "MatSolve_SeqAIJ_newdatastruct" 2336 PetscErrorCode MatSolve_SeqAIJ_newdatastruct(Mat A,Vec bb,Vec xx) 2337 { 2338 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2339 IS iscol = a->col,isrow = a->row; 2340 PetscErrorCode ierr; 2341 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,nz,k; 2342 const PetscInt *rout,*cout,*r,*c; 2343 PetscScalar *x,*tmp,*tmps,sum; 2344 const PetscScalar *b; 2345 const MatScalar *aa = a->a,*v; 2346 2347 PetscFunctionBegin; 2348 if (!n) PetscFunctionReturn(0); 2349 2350 ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 2351 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2352 tmp = a->solve_work; 2353 2354 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 2355 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 2356 2357 /* forward solve the lower triangular */ 2358 tmp[0] = b[*r++]; 2359 tmps = tmp; 2360 v = aa; 2361 vi = aj; 2362 for (i=1; i<n; i++) { 2363 nz = ai[i+1] - ai[i]; 2364 sum = b[*r++]; 2365 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 2366 tmp[i] = sum; 2367 v += nz; vi += nz; 2368 } 2369 2370 /* backward solve the upper triangular */ 2371 k = n+1; 2372 v = aa + ai[k]; /* 1st entry of U(n-1,:) */ 2373 vi = aj + ai[k]; 2374 for (i=n-1; i>=0; i--){ 2375 k = 2*n-i; 2376 nz = ai[k +1] - ai[k] - 1; 2377 sum = tmp[i]; 2378 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 2379 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 2380 v += nz+1; vi += nz+1; 2381 } 2382 2383 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 2384 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 2385 ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr); 2386 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2387 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 2393 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 2394 { 2395 Mat B = *fact; 2396 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 2397 IS isicol; 2398 PetscErrorCode ierr; 2399 const PetscInt *r,*ic; 2400 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 2401 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 2402 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 2403 PetscInt nlnk,*lnk; 2404 PetscBT lnkbt; 2405 PetscTruth row_identity,icol_identity,both_identity; 2406 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 2407 const PetscInt *ics; 2408 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 2409 PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftinblocks; 2410 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 2411 PetscTruth missing; 2412 2413 PetscFunctionBegin; 2414 2415 if (dt == PETSC_DEFAULT) dt = 0.005; 2416 if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */ 2417 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 2418 2419 /* ------- symbolic factorization, can be reused ---------*/ 2420 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 2421 if (missing) SETERRQ1(PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 2422 adiag=a->diag; 2423 2424 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 2425 2426 /* bdiag is location of diagonal in factor */ 2427 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 2428 bdiag_rev = bdiag + n+1; 2429 2430 /* allocate row pointers bi */ 2431 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 2432 2433 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 2434 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 2435 nnz_max = ai[n]+2*n*dtcount+2; 2436 2437 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2438 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 2439 2440 /* put together the new matrix */ 2441 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2442 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 2443 b = (Mat_SeqAIJ*)(B)->data; 2444 b->free_a = PETSC_TRUE; 2445 b->free_ij = PETSC_TRUE; 2446 b->singlemalloc = PETSC_FALSE; 2447 b->a = ba; 2448 b->j = bj; 2449 b->i = bi; 2450 b->diag = bdiag; 2451 b->ilen = 0; 2452 b->imax = 0; 2453 b->row = isrow; 2454 b->col = iscol; 2455 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2456 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2457 b->icol = isicol; 2458 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2459 2460 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2461 b->maxnz = nnz_max; 2462 2463 (B)->factor = MAT_FACTOR_ILUDT; 2464 (B)->info.factor_mallocs = 0; 2465 (B)->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 2466 CHKMEMQ; 2467 /* ------- end of symbolic factorization ---------*/ 2468 2469 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2470 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 2471 ics = ic; 2472 2473 /* linked list for storing column indices of the active row */ 2474 nlnk = n + 1; 2475 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2476 2477 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 2478 ierr = PetscMalloc((2*n+1)*sizeof(PetscInt),&im);CHKERRQ(ierr); 2479 jtmp = im + n; 2480 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 2481 ierr = PetscMalloc((2*n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 2482 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2483 vtmp = rtmp + n; 2484 2485 bi[0] = 0; 2486 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 2487 bdiag_rev[n] = bdiag[0]; 2488 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 2489 for (i=0; i<n; i++) { 2490 /* copy initial fill into linked list */ 2491 nzi = 0; /* nonzeros for active row i */ 2492 nzi = ai[r[i]+1] - ai[r[i]]; 2493 if (!nzi) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 2494 nzi_al = adiag[r[i]] - ai[r[i]]; 2495 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 2496 ajtmp = aj + ai[r[i]]; 2497 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2498 2499 /* load in initial (unfactored row) */ 2500 aatmp = a->a + ai[r[i]]; 2501 for (j=0; j<nzi; j++) { 2502 rtmp[ics[*ajtmp++]] = *aatmp++; 2503 } 2504 2505 /* add pivot rows into linked list */ 2506 row = lnk[n]; 2507 while (row < i ) { 2508 nzi_bl = bi[row+1] - bi[row] + 1; 2509 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 2510 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 2511 nzi += nlnk; 2512 row = lnk[row]; 2513 } 2514 2515 /* copy data from lnk into jtmp, then initialize lnk */ 2516 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 2517 2518 /* numerical factorization */ 2519 bjtmp = jtmp; 2520 row = *bjtmp++; /* 1st pivot row */ 2521 while ( row < i ) { 2522 pc = rtmp + row; 2523 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 2524 multiplier = (*pc) * (*pv); 2525 *pc = multiplier; 2526 if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 2527 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 2528 pv = ba + bdiag[row+1] + 1; 2529 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 2530 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 2531 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 2532 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 2533 } 2534 row = *bjtmp++; 2535 } 2536 2537 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 2538 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 2539 nzi_bl = 0; j = 0; 2540 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 2541 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 2542 nzi_bl++; j++; 2543 } 2544 nzi_bu = nzi - nzi_bl -1; 2545 while (j < nzi){ 2546 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 2547 j++; 2548 } 2549 2550 bjtmp = bj + bi[i]; 2551 batmp = ba + bi[i]; 2552 /* apply level dropping rule to L part */ 2553 ncut = nzi_al + dtcount; 2554 if (ncut < nzi_bl){ 2555 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 2556 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 2557 } else { 2558 ncut = nzi_bl; 2559 } 2560 for (j=0; j<ncut; j++){ 2561 bjtmp[j] = jtmp[j]; 2562 batmp[j] = vtmp[j]; 2563 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 2564 } 2565 bi[i+1] = bi[i] + ncut; 2566 nzi = ncut + 1; 2567 2568 /* apply level dropping rule to U part */ 2569 ncut = nzi_au + dtcount; 2570 if (ncut < nzi_bu){ 2571 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 2572 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 2573 } else { 2574 ncut = nzi_bu; 2575 } 2576 nzi += ncut; 2577 2578 /* mark bdiagonal */ 2579 bdiag[i+1] = bdiag[i] - (ncut + 1); 2580 bdiag_rev[n-i-1] = bdiag[i+1]; 2581 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 2582 bjtmp = bj + bdiag[i]; 2583 batmp = ba + bdiag[i]; 2584 *bjtmp = i; 2585 *batmp = diag_tmp; /* rtmp[i]; */ 2586 if (*batmp == 0.0) { 2587 *batmp = dt+shift; 2588 /* printf(" row %d add shift %g\n",i,shift); */ 2589 } 2590 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 2591 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 2592 2593 bjtmp = bj + bdiag[i+1]+1; 2594 batmp = ba + bdiag[i+1]+1; 2595 for (k=0; k<ncut; k++){ 2596 bjtmp[k] = jtmp[nzi_bl+1+k]; 2597 batmp[k] = vtmp[nzi_bl+1+k]; 2598 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 2599 } 2600 /* printf("\n"); */ 2601 2602 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 2603 /* 2604 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 2605 printf(" ----------------------------\n"); 2606 */ 2607 } /* for (i=0; i<n; i++) */ 2608 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 2609 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 2610 2611 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2612 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2613 2614 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2615 ierr = PetscFree(im);CHKERRQ(ierr); 2616 ierr = PetscFree(rtmp);CHKERRQ(ierr); 2617 2618 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 2619 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 2620 2621 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 2622 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 2623 both_identity = (PetscTruth) (row_identity && icol_identity); 2624 if (row_identity && icol_identity) { 2625 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 2626 } else { 2627 B->ops->solve = MatSolve_SeqAIJ_newdatastruct; 2628 } 2629 2630 B->ops->lufactorsymbolic = MatILUDTFactorSymbolic_SeqAIJ; 2631 B->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 2632 B->ops->solveadd = 0; 2633 B->ops->solvetranspose = 0; 2634 B->ops->solvetransposeadd = 0; 2635 B->ops->matsolve = 0; 2636 B->assembled = PETSC_TRUE; 2637 B->preallocated = PETSC_TRUE; 2638 PetscFunctionReturn(0); 2639 } 2640 2641 /* a wraper of MatILUDTFactor_SeqAIJ() */ 2642 #undef __FUNCT__ 2643 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 2644 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 2645 { 2646 PetscErrorCode ierr; 2647 2648 PetscFunctionBegin; 2649 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 2650 2651 fact->ops->lufactornumeric = MatILUDTFactorNumeric_SeqAIJ; 2652 PetscFunctionReturn(0); 2653 } 2654 2655 /* 2656 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 2657 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 2658 */ 2659 #undef __FUNCT__ 2660 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 2661 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 2662 { 2663 Mat C=fact; 2664 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 2665 IS isrow = b->row,isicol = b->icol; 2666 PetscErrorCode ierr; 2667 const PetscInt *r,*ic,*ics; 2668 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 2669 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 2670 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 2671 PetscReal dt=info->dt,shift=info->shiftinblocks; 2672 PetscTruth row_identity, col_identity; 2673 2674 PetscFunctionBegin; 2675 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2676 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 2677 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 2678 ics = ic; 2679 2680 for (i=0; i<n; i++){ 2681 /* initialize rtmp array */ 2682 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 2683 bjtmp = bj + bi[i]; 2684 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 2685 rtmp[i] = 0.0; 2686 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 2687 bjtmp = bj + bdiag[i+1] + 1; 2688 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 2689 2690 /* load in initial unfactored row of A */ 2691 /* printf("row %d\n",i); */ 2692 nz = ai[r[i]+1] - ai[r[i]]; 2693 ajtmp = aj + ai[r[i]]; 2694 v = aa + ai[r[i]]; 2695 for (j=0; j<nz; j++) { 2696 rtmp[ics[*ajtmp++]] = v[j]; 2697 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 2698 } 2699 /* printf("\n"); */ 2700 2701 /* numerical factorization */ 2702 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 2703 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 2704 k = 0; 2705 while (k < nzl){ 2706 row = *bjtmp++; 2707 /* printf(" prow %d\n",row); */ 2708 pc = rtmp + row; 2709 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 2710 multiplier = (*pc) * (*pv); 2711 *pc = multiplier; 2712 if (PetscAbsScalar(multiplier) > dt){ 2713 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 2714 pv = b->a + bdiag[row+1] + 1; 2715 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 2716 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 2717 /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */ 2718 } 2719 k++; 2720 } 2721 2722 /* finished row so stick it into b->a */ 2723 /* L-part */ 2724 pv = b->a + bi[i] ; 2725 pj = bj + bi[i] ; 2726 nzl = bi[i+1] - bi[i]; 2727 for (j=0; j<nzl; j++) { 2728 pv[j] = rtmp[pj[j]]; 2729 /* printf(" (%d,%g),",pj[j],pv[j]); */ 2730 } 2731 2732 /* diagonal: invert diagonal entries for simplier triangular solves */ 2733 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 2734 b->a[bdiag[i]] = 1.0/rtmp[i]; 2735 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 2736 2737 /* U-part */ 2738 pv = b->a + bdiag[i+1] + 1; 2739 pj = bj + bdiag[i+1] + 1; 2740 nzu = bdiag[i] - bdiag[i+1] - 1; 2741 for (j=0; j<nzu; j++) { 2742 pv[j] = rtmp[pj[j]]; 2743 /* printf(" (%d,%g),",pj[j],pv[j]); */ 2744 } 2745 /* printf("\n"); */ 2746 } 2747 2748 ierr = PetscFree(rtmp);CHKERRQ(ierr); 2749 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2750 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2751 2752 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 2753 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 2754 if (row_identity && col_identity) { 2755 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_newdatastruct; 2756 } else { 2757 C->ops->solve = MatSolve_SeqAIJ_newdatastruct; 2758 } 2759 C->ops->solveadd = 0; 2760 C->ops->solvetranspose = 0; 2761 C->ops->solvetransposeadd = 0; 2762 C->ops->matsolve = 0; 2763 C->assembled = PETSC_TRUE; 2764 C->preallocated = PETSC_TRUE; 2765 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 2766 PetscFunctionReturn(0); 2767 } 2768