1 #define PETSCMAT_DLL 2 3 4 #include "../src/mat/impls/aij/seq/aij.h" 5 #include "../src/mat/impls/sbaij/seq/sbaij.h" 6 #include "petscbt.h" 7 #include "../src/mat/utils/freespace.h" 8 9 EXTERN_C_BEGIN 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 12 /* 13 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 14 15 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 16 */ 17 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 18 { 19 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 20 PetscErrorCode ierr; 21 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 22 const PetscInt *ai = a->i, *aj = a->j; 23 const PetscScalar *aa = a->a; 24 PetscTruth *done; 25 PetscReal best,past = 0,future; 26 27 PetscFunctionBegin; 28 /* pick initial row */ 29 best = -1; 30 for (i=0; i<n; i++) { 31 future = 0.0; 32 for (j=ai[i]; j<ai[i+1]; j++) { 33 if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]); 34 } 35 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 36 if (past/future > best) { 37 best = past/future; 38 current = i; 39 } 40 } 41 42 ierr = PetscMalloc(n*sizeof(PetscTruth),&done);CHKERRQ(ierr); 43 ierr = PetscMemzero(done,n*sizeof(PetscTruth));CHKERRQ(ierr); 44 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 45 order[0] = current; 46 for (i=0; i<n-1; i++) { 47 done[current] = PETSC_TRUE; 48 best = -1; 49 /* loop over all neighbors of current pivot */ 50 for (j=ai[current]; j<ai[current+1]; j++) { 51 jj = aj[j]; 52 if (done[jj]) continue; 53 /* loop over columns of potential next row computing weights for below and above diagonal */ 54 past = future = 0.0; 55 for (k=ai[jj]; k<ai[jj+1]; k++) { 56 kk = aj[k]; 57 if (done[kk]) past += PetscAbsScalar(aa[k]); 58 else if (kk != jj) future += PetscAbsScalar(aa[k]); 59 } 60 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 61 if (past/future > best) { 62 best = past/future; 63 newcurrent = jj; 64 } 65 } 66 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 67 best = -1; 68 for (k=0; k<n; k++) { 69 if (done[k]) continue; 70 future = 0.0; 71 past = 0.0; 72 for (j=ai[k]; j<ai[k+1]; j++) { 73 kk = aj[j]; 74 if (done[kk]) past += PetscAbsScalar(aa[j]); 75 else if (kk != k) future += PetscAbsScalar(aa[j]); 76 } 77 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 78 if (past/future > best) { 79 best = past/future; 80 newcurrent = k; 81 } 82 } 83 } 84 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 85 current = newcurrent; 86 order[i+1] = current; 87 } 88 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,irow);CHKERRQ(ierr); 89 *icol = *irow; 90 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 91 ierr = PetscFree(done);CHKERRQ(ierr); 92 ierr = PetscFree(order);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 EXTERN_C_END 96 97 EXTERN_C_BEGIN 98 #undef __FUNCT__ 99 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 100 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscTruth *flg) 101 { 102 PetscFunctionBegin; 103 *flg = PETSC_TRUE; 104 PetscFunctionReturn(0); 105 } 106 EXTERN_C_END 107 108 EXTERN_C_BEGIN 109 #undef __FUNCT__ 110 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 111 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 112 { 113 PetscInt n = A->rmap->n; 114 PetscErrorCode ierr; 115 116 PetscFunctionBegin; 117 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 118 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 119 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 120 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 121 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 122 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 123 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 124 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 125 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 126 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 127 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 128 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 129 (*B)->factortype = ftype; 130 PetscFunctionReturn(0); 131 } 132 EXTERN_C_END 133 134 #undef __FUNCT__ 135 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 136 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 137 { 138 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 139 IS isicol; 140 PetscErrorCode ierr; 141 const PetscInt *r,*ic; 142 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 143 PetscInt *bi,*bj,*ajtmp; 144 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 145 PetscReal f; 146 PetscInt nlnk,*lnk,k,**bi_ptr; 147 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 148 PetscBT lnkbt; 149 150 PetscFunctionBegin; 151 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 152 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 153 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 154 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 155 156 /* get new row pointers */ 157 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 158 bi[0] = 0; 159 160 /* bdiag is location of diagonal in factor */ 161 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 162 bdiag[0] = 0; 163 164 /* linked list for storing column indices of the active row */ 165 nlnk = n + 1; 166 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 167 168 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 169 170 /* initial FreeSpace size is f*(ai[n]+1) */ 171 f = info->fill; 172 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 173 current_space = free_space; 174 175 for (i=0; i<n; i++) { 176 /* copy previous fill into linked list */ 177 nzi = 0; 178 nnz = ai[r[i]+1] - ai[r[i]]; 179 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 180 ajtmp = aj + ai[r[i]]; 181 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 182 nzi += nlnk; 183 184 /* add pivot rows into linked list */ 185 row = lnk[n]; 186 while (row < i) { 187 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 188 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 189 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 190 nzi += nlnk; 191 row = lnk[row]; 192 } 193 bi[i+1] = bi[i] + nzi; 194 im[i] = nzi; 195 196 /* mark bdiag */ 197 nzbd = 0; 198 nnz = nzi; 199 k = lnk[n]; 200 while (nnz-- && k < i){ 201 nzbd++; 202 k = lnk[k]; 203 } 204 bdiag[i] = bi[i] + nzbd; 205 206 /* if free space is not available, make more free space */ 207 if (current_space->local_remaining<nzi) { 208 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 209 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 210 reallocs++; 211 } 212 213 /* copy data into free space, then initialize lnk */ 214 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 215 bi_ptr[i] = current_space->array; 216 current_space->array += nzi; 217 current_space->local_used += nzi; 218 current_space->local_remaining -= nzi; 219 } 220 #if defined(PETSC_USE_INFO) 221 if (ai[n] != 0) { 222 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 223 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 224 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 225 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 226 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 227 } else { 228 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 229 } 230 #endif 231 232 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 233 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 234 235 /* destroy list of free space and other temporary array(s) */ 236 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 237 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 238 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 239 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 240 241 /* put together the new matrix */ 242 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 243 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 244 b = (Mat_SeqAIJ*)(B)->data; 245 b->free_a = PETSC_TRUE; 246 b->free_ij = PETSC_TRUE; 247 b->singlemalloc = PETSC_FALSE; 248 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 249 b->j = bj; 250 b->i = bi; 251 b->diag = bdiag; 252 b->ilen = 0; 253 b->imax = 0; 254 b->row = isrow; 255 b->col = iscol; 256 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 257 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 258 b->icol = isicol; 259 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 260 261 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 262 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 263 b->maxnz = b->nz = bi[n] ; 264 265 (B)->factortype = MAT_FACTOR_LU; 266 (B)->info.factor_mallocs = reallocs; 267 (B)->info.fill_ratio_given = f; 268 269 if (ai[n]) { 270 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 271 } else { 272 (B)->info.fill_ratio_needed = 0.0; 273 } 274 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 275 if (a->inode.size) { 276 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 277 } 278 PetscFunctionReturn(0); 279 } 280 281 #undef __FUNCT__ 282 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 283 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 284 { 285 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 286 IS isicol; 287 PetscErrorCode ierr; 288 const PetscInt *r,*ic; 289 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 290 PetscInt *bi,*bj,*ajtmp; 291 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 292 PetscReal f; 293 PetscInt nlnk,*lnk,k,**bi_ptr; 294 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 295 PetscBT lnkbt; 296 PetscTruth olddatastruct=PETSC_FALSE; 297 298 PetscFunctionBegin; 299 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 300 ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 301 if(olddatastruct){ 302 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 306 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 307 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 308 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 309 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 310 311 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 312 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 313 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 314 bi[0] = bdiag[0] = 0; 315 316 /* linked list for storing column indices of the active row */ 317 nlnk = n + 1; 318 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 319 320 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 321 322 /* initial FreeSpace size is f*(ai[n]+1) */ 323 f = info->fill; 324 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 325 current_space = free_space; 326 327 for (i=0; i<n; i++) { 328 /* copy previous fill into linked list */ 329 nzi = 0; 330 nnz = ai[r[i]+1] - ai[r[i]]; 331 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 332 ajtmp = aj + ai[r[i]]; 333 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 334 nzi += nlnk; 335 336 /* add pivot rows into linked list */ 337 row = lnk[n]; 338 while (row < i){ 339 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 340 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 341 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 342 nzi += nlnk; 343 row = lnk[row]; 344 } 345 bi[i+1] = bi[i] + nzi; 346 im[i] = nzi; 347 348 /* mark bdiag */ 349 nzbd = 0; 350 nnz = nzi; 351 k = lnk[n]; 352 while (nnz-- && k < i){ 353 nzbd++; 354 k = lnk[k]; 355 } 356 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 357 358 /* if free space is not available, make more free space */ 359 if (current_space->local_remaining<nzi) { 360 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 361 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 362 reallocs++; 363 } 364 365 /* copy data into free space, then initialize lnk */ 366 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 367 bi_ptr[i] = current_space->array; 368 current_space->array += nzi; 369 current_space->local_used += nzi; 370 current_space->local_remaining -= nzi; 371 } 372 #if defined(PETSC_USE_INFO) 373 if (ai[n] != 0) { 374 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 375 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 376 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 377 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 378 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 379 } else { 380 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 381 } 382 #endif 383 384 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 385 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 386 387 /* destroy list of free space and other temporary array(s) */ 388 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 389 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 390 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 391 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 392 393 /* put together the new matrix */ 394 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 395 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 396 b = (Mat_SeqAIJ*)(B)->data; 397 b->free_a = PETSC_TRUE; 398 b->free_ij = PETSC_TRUE; 399 b->singlemalloc = PETSC_FALSE; 400 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 401 b->j = bj; 402 b->i = bi; 403 b->diag = bdiag; 404 b->ilen = 0; 405 b->imax = 0; 406 b->row = isrow; 407 b->col = iscol; 408 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 409 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 410 b->icol = isicol; 411 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 412 413 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 414 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 415 b->maxnz = b->nz = bdiag[0]+1; 416 B->factortype = MAT_FACTOR_LU; 417 B->info.factor_mallocs = reallocs; 418 B->info.fill_ratio_given = f; 419 420 if (ai[n]) { 421 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 422 } else { 423 B->info.fill_ratio_needed = 0.0; 424 } 425 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 426 if (a->inode.size) { 427 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 428 } 429 PetscFunctionReturn(0); 430 } 431 432 /* 433 Trouble in factorization, should we dump the original matrix? 434 */ 435 #undef __FUNCT__ 436 #define __FUNCT__ "MatFactorDumpMatrix" 437 PetscErrorCode MatFactorDumpMatrix(Mat A) 438 { 439 PetscErrorCode ierr; 440 PetscTruth flg = PETSC_FALSE; 441 442 PetscFunctionBegin; 443 ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 444 if (flg) { 445 PetscViewer viewer; 446 char filename[PETSC_MAX_PATH_LEN]; 447 448 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 449 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 450 ierr = MatView(A,viewer);CHKERRQ(ierr); 451 ierr = PetscViewerDestroy(viewer);CHKERRQ(ierr); 452 } 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 458 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 459 { 460 Mat C=B; 461 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 462 IS isrow = b->row,isicol = b->icol; 463 PetscErrorCode ierr; 464 const PetscInt *r,*ic,*ics; 465 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 466 PetscInt i,j,k,nz,nzL,row,*pj; 467 const PetscInt *ajtmp,*bjtmp; 468 MatScalar *rtmp,*pc,multiplier,*pv; 469 const MatScalar *aa=a->a,*v; 470 PetscTruth row_identity,col_identity; 471 FactorShiftCtx sctx; 472 const PetscInt *ddiag; 473 PetscReal rs; 474 MatScalar d; 475 476 PetscFunctionBegin; 477 /* MatPivotSetUp(): initialize shift context sctx */ 478 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 479 480 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 481 ddiag = a->diag; 482 sctx.shift_top = info->zeropivot; 483 for (i=0; i<n; i++) { 484 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 485 d = (aa)[ddiag[i]]; 486 rs = -PetscAbsScalar(d) - PetscRealPart(d); 487 v = aa+ai[i]; 488 nz = ai[i+1] - ai[i]; 489 for (j=0; j<nz; j++) 490 rs += PetscAbsScalar(v[j]); 491 if (rs>sctx.shift_top) sctx.shift_top = rs; 492 } 493 sctx.shift_top *= 1.1; 494 sctx.nshift_max = 5; 495 sctx.shift_lo = 0.; 496 sctx.shift_hi = 1.; 497 } 498 499 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 500 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 501 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 502 ics = ic; 503 504 do { 505 sctx.newshift = PETSC_FALSE; 506 for (i=0; i<n; i++){ 507 /* zero rtmp */ 508 /* L part */ 509 nz = bi[i+1] - bi[i]; 510 bjtmp = bj + bi[i]; 511 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 512 513 /* U part */ 514 nz = bdiag[i]-bdiag[i+1]; 515 bjtmp = bj + bdiag[i+1]+1; 516 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 517 518 /* load in initial (unfactored row) */ 519 nz = ai[r[i]+1] - ai[r[i]]; 520 ajtmp = aj + ai[r[i]]; 521 v = aa + ai[r[i]]; 522 for (j=0; j<nz; j++) { 523 rtmp[ics[ajtmp[j]]] = v[j]; 524 } 525 /* ZeropivotApply() */ 526 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 527 528 /* elimination */ 529 bjtmp = bj + bi[i]; 530 row = *bjtmp++; 531 nzL = bi[i+1] - bi[i]; 532 for(k=0; k < nzL;k++) { 533 pc = rtmp + row; 534 if (*pc != 0.0) { 535 pv = b->a + bdiag[row]; 536 multiplier = *pc * (*pv); 537 *pc = multiplier; 538 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 539 pv = b->a + bdiag[row+1]+1; 540 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 541 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 542 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 543 } 544 row = *bjtmp++; 545 } 546 547 /* finished row so stick it into b->a */ 548 rs = 0.0; 549 /* L part */ 550 pv = b->a + bi[i] ; 551 pj = b->j + bi[i] ; 552 nz = bi[i+1] - bi[i]; 553 for (j=0; j<nz; j++) { 554 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 555 } 556 557 /* U part */ 558 pv = b->a + bdiag[i+1]+1; 559 pj = b->j + bdiag[i+1]+1; 560 nz = bdiag[i] - bdiag[i+1]-1; 561 for (j=0; j<nz; j++) { 562 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 563 } 564 565 sctx.rs = rs; 566 sctx.pv = rtmp[i]; 567 ierr = MatPivotCheck(info,&sctx,i);CHKERRQ(ierr); 568 if(sctx.newshift) break; /* break for-loop */ 569 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 570 571 /* Mark diagonal and invert diagonal for simplier triangular solves */ 572 pv = b->a + bdiag[i]; 573 *pv = 1.0/rtmp[i]; 574 575 } /* endof for (i=0; i<n; i++){ */ 576 577 /* MatPivotRefine() */ 578 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){ 579 /* 580 * if no shift in this attempt & shifting & started shifting & can refine, 581 * then try lower shift 582 */ 583 sctx.shift_hi = sctx.shift_fraction; 584 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 585 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 586 sctx.newshift = PETSC_TRUE; 587 sctx.nshift++; 588 } 589 } while (sctx.newshift); 590 591 ierr = PetscFree(rtmp);CHKERRQ(ierr); 592 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 593 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 594 595 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 596 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 597 if (row_identity && col_identity) { 598 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 599 } else { 600 C->ops->solve = MatSolve_SeqAIJ; 601 } 602 C->ops->solveadd = MatSolveAdd_SeqAIJ; 603 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 604 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 605 C->ops->matsolve = MatMatSolve_SeqAIJ; 606 C->assembled = PETSC_TRUE; 607 C->preallocated = PETSC_TRUE; 608 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 609 610 /* MatShiftView(A,info,&sctx) */ 611 if (sctx.nshift){ 612 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 613 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 614 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 615 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 616 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 617 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 618 } 619 } 620 ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 626 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 627 { 628 Mat C=B; 629 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 630 IS isrow = b->row,isicol = b->icol; 631 PetscErrorCode ierr; 632 const PetscInt *r,*ic,*ics; 633 PetscInt nz,row,i,j,n=A->rmap->n,diag; 634 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 635 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 636 MatScalar *pv,*rtmp,*pc,multiplier,d; 637 const MatScalar *v,*aa=a->a; 638 PetscReal rs=0.0; 639 FactorShiftCtx sctx; 640 const PetscInt *ddiag; 641 PetscTruth row_identity, col_identity; 642 643 PetscFunctionBegin; 644 /* MatPivotSetUp(): initialize shift context sctx */ 645 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 646 647 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 648 ddiag = a->diag; 649 sctx.shift_top = info->zeropivot; 650 for (i=0; i<n; i++) { 651 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 652 d = (aa)[ddiag[i]]; 653 rs = -PetscAbsScalar(d) - PetscRealPart(d); 654 v = aa+ai[i]; 655 nz = ai[i+1] - ai[i]; 656 for (j=0; j<nz; j++) 657 rs += PetscAbsScalar(v[j]); 658 if (rs>sctx.shift_top) sctx.shift_top = rs; 659 } 660 sctx.shift_top *= 1.1; 661 sctx.nshift_max = 5; 662 sctx.shift_lo = 0.; 663 sctx.shift_hi = 1.; 664 } 665 666 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 667 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 668 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 669 ics = ic; 670 671 do { 672 sctx.newshift = PETSC_FALSE; 673 for (i=0; i<n; i++){ 674 nz = bi[i+1] - bi[i]; 675 bjtmp = bj + bi[i]; 676 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 677 678 /* load in initial (unfactored row) */ 679 nz = ai[r[i]+1] - ai[r[i]]; 680 ajtmp = aj + ai[r[i]]; 681 v = aa + ai[r[i]]; 682 for (j=0; j<nz; j++) { 683 rtmp[ics[ajtmp[j]]] = v[j]; 684 } 685 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 686 687 row = *bjtmp++; 688 while (row < i) { 689 pc = rtmp + row; 690 if (*pc != 0.0) { 691 pv = b->a + diag_offset[row]; 692 pj = b->j + diag_offset[row] + 1; 693 multiplier = *pc / *pv++; 694 *pc = multiplier; 695 nz = bi[row+1] - diag_offset[row] - 1; 696 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 697 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 698 } 699 row = *bjtmp++; 700 } 701 /* finished row so stick it into b->a */ 702 pv = b->a + bi[i] ; 703 pj = b->j + bi[i] ; 704 nz = bi[i+1] - bi[i]; 705 diag = diag_offset[i] - bi[i]; 706 rs = 0.0; 707 for (j=0; j<nz; j++) { 708 pv[j] = rtmp[pj[j]]; 709 rs += PetscAbsScalar(pv[j]); 710 } 711 rs -= PetscAbsScalar(pv[diag]); 712 713 sctx.rs = rs; 714 sctx.pv = pv[diag]; 715 ierr = MatPivotCheck(info,&sctx,i);CHKERRQ(ierr); 716 if (sctx.newshift) break; 717 pv[diag] = sctx.pv; 718 } 719 720 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 721 /* 722 * if no shift in this attempt & shifting & started shifting & can refine, 723 * then try lower shift 724 */ 725 sctx.shift_hi = sctx.shift_fraction; 726 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 727 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 728 sctx.newshift = PETSC_TRUE; 729 sctx.nshift++; 730 } 731 } while (sctx.newshift); 732 733 /* invert diagonal entries for simplier triangular solves */ 734 for (i=0; i<n; i++) { 735 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 736 } 737 ierr = PetscFree(rtmp);CHKERRQ(ierr); 738 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 739 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 740 741 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 742 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 743 if (row_identity && col_identity) { 744 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 745 } else { 746 C->ops->solve = MatSolve_SeqAIJ_inplace; 747 } 748 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 749 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 750 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 751 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 752 C->assembled = PETSC_TRUE; 753 C->preallocated = PETSC_TRUE; 754 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 755 if (sctx.nshift){ 756 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 757 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 758 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 759 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 760 } 761 } 762 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 763 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 764 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 /* 769 This routine implements inplace ILU(0) with row or/and column permutations. 770 Input: 771 A - original matrix 772 Output; 773 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 774 a->j (col index) is permuted by the inverse of colperm, then sorted 775 a->a reordered accordingly with a->j 776 a->diag (ptr to diagonal elements) is updated. 777 */ 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 780 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 781 { 782 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 783 IS isrow = a->row,isicol = a->icol; 784 PetscErrorCode ierr; 785 const PetscInt *r,*ic,*ics; 786 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 787 PetscInt *ajtmp,nz,row; 788 PetscInt *diag = a->diag,nbdiag,*pj; 789 PetscScalar *rtmp,*pc,multiplier,d; 790 MatScalar *pv,*v; 791 PetscReal rs; 792 FactorShiftCtx sctx; 793 const MatScalar *aa=a->a,*vtmp; 794 795 PetscFunctionBegin; 796 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 797 798 /* MatPivotSetUp(): initialize shift context sctx */ 799 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 800 801 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 802 const PetscInt *ddiag = a->diag; 803 sctx.shift_top = info->zeropivot; 804 for (i=0; i<n; i++) { 805 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 806 d = (aa)[ddiag[i]]; 807 rs = -PetscAbsScalar(d) - PetscRealPart(d); 808 vtmp = aa+ai[i]; 809 nz = ai[i+1] - ai[i]; 810 for (j=0; j<nz; j++) 811 rs += PetscAbsScalar(vtmp[j]); 812 if (rs>sctx.shift_top) sctx.shift_top = rs; 813 } 814 sctx.shift_top *= 1.1; 815 sctx.nshift_max = 5; 816 sctx.shift_lo = 0.; 817 sctx.shift_hi = 1.; 818 } 819 820 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 821 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 822 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 823 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 824 ics = ic; 825 826 #if defined(MV) 827 sctx.shift_top = 0.; 828 sctx.nshift_max = 0; 829 sctx.shift_lo = 0.; 830 sctx.shift_hi = 0.; 831 sctx.shift_fraction = 0.; 832 833 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 834 sctx.shift_top = 0.; 835 for (i=0; i<n; i++) { 836 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 837 d = (a->a)[diag[i]]; 838 rs = -PetscAbsScalar(d) - PetscRealPart(d); 839 v = a->a+ai[i]; 840 nz = ai[i+1] - ai[i]; 841 for (j=0; j<nz; j++) 842 rs += PetscAbsScalar(v[j]); 843 if (rs>sctx.shift_top) sctx.shift_top = rs; 844 } 845 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 846 sctx.shift_top *= 1.1; 847 sctx.nshift_max = 5; 848 sctx.shift_lo = 0.; 849 sctx.shift_hi = 1.; 850 } 851 852 sctx.shift_amount = 0.; 853 sctx.nshift = 0; 854 #endif 855 856 do { 857 sctx.newshift = PETSC_FALSE; 858 for (i=0; i<n; i++){ 859 /* load in initial unfactored row */ 860 nz = ai[r[i]+1] - ai[r[i]]; 861 ajtmp = aj + ai[r[i]]; 862 v = a->a + ai[r[i]]; 863 /* sort permuted ajtmp and values v accordingly */ 864 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 865 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 866 867 diag[r[i]] = ai[r[i]]; 868 for (j=0; j<nz; j++) { 869 rtmp[ajtmp[j]] = v[j]; 870 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 871 } 872 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 873 874 row = *ajtmp++; 875 while (row < i) { 876 pc = rtmp + row; 877 if (*pc != 0.0) { 878 pv = a->a + diag[r[row]]; 879 pj = aj + diag[r[row]] + 1; 880 881 multiplier = *pc / *pv++; 882 *pc = multiplier; 883 nz = ai[r[row]+1] - diag[r[row]] - 1; 884 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 885 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 886 } 887 row = *ajtmp++; 888 } 889 /* finished row so overwrite it onto a->a */ 890 pv = a->a + ai[r[i]] ; 891 pj = aj + ai[r[i]] ; 892 nz = ai[r[i]+1] - ai[r[i]]; 893 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 894 895 rs = 0.0; 896 for (j=0; j<nz; j++) { 897 pv[j] = rtmp[pj[j]]; 898 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 899 } 900 901 sctx.rs = rs; 902 sctx.pv = pv[nbdiag]; 903 ierr = MatPivotCheck(info,&sctx,i);CHKERRQ(ierr); 904 if (sctx.newshift) break; 905 pv[nbdiag] = sctx.pv; 906 } 907 908 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 909 /* 910 * if no shift in this attempt & shifting & started shifting & can refine, 911 * then try lower shift 912 */ 913 sctx.shift_hi = sctx.shift_fraction; 914 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 915 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 916 sctx.newshift = PETSC_TRUE; 917 sctx.nshift++; 918 } 919 } while (sctx.newshift); 920 921 /* invert diagonal entries for simplier triangular solves */ 922 for (i=0; i<n; i++) { 923 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 924 } 925 926 ierr = PetscFree(rtmp);CHKERRQ(ierr); 927 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 928 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 929 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 930 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 931 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 932 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 933 A->assembled = PETSC_TRUE; 934 A->preallocated = PETSC_TRUE; 935 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 936 if (sctx.nshift){ 937 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 938 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 939 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 940 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 941 } 942 } 943 PetscFunctionReturn(0); 944 } 945 946 /* ----------------------------------------------------------- */ 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatLUFactor_SeqAIJ" 949 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 950 { 951 PetscErrorCode ierr; 952 Mat C; 953 954 PetscFunctionBegin; 955 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 956 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 957 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 958 A->ops->solve = C->ops->solve; 959 A->ops->solvetranspose = C->ops->solvetranspose; 960 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 961 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 /* ----------------------------------------------------------- */ 965 966 967 #undef __FUNCT__ 968 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 969 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 970 { 971 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 972 IS iscol = a->col,isrow = a->row; 973 PetscErrorCode ierr; 974 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 975 PetscInt nz; 976 const PetscInt *rout,*cout,*r,*c; 977 PetscScalar *x,*tmp,*tmps,sum; 978 const PetscScalar *b; 979 const MatScalar *aa = a->a,*v; 980 981 PetscFunctionBegin; 982 if (!n) PetscFunctionReturn(0); 983 984 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 985 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 986 tmp = a->solve_work; 987 988 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 989 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 990 991 /* forward solve the lower triangular */ 992 tmp[0] = b[*r++]; 993 tmps = tmp; 994 for (i=1; i<n; i++) { 995 v = aa + ai[i] ; 996 vi = aj + ai[i] ; 997 nz = a->diag[i] - ai[i]; 998 sum = b[*r++]; 999 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1000 tmp[i] = sum; 1001 } 1002 1003 /* backward solve the upper triangular */ 1004 for (i=n-1; i>=0; i--){ 1005 v = aa + a->diag[i] + 1; 1006 vi = aj + a->diag[i] + 1; 1007 nz = ai[i+1] - a->diag[i] - 1; 1008 sum = tmp[i]; 1009 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1010 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1011 } 1012 1013 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1014 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1015 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1016 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1017 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1023 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1024 { 1025 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1026 IS iscol = a->col,isrow = a->row; 1027 PetscErrorCode ierr; 1028 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1029 PetscInt nz,neq; 1030 const PetscInt *rout,*cout,*r,*c; 1031 PetscScalar *x,*b,*tmp,*tmps,sum; 1032 const MatScalar *aa = a->a,*v; 1033 PetscTruth bisdense,xisdense; 1034 1035 PetscFunctionBegin; 1036 if (!n) PetscFunctionReturn(0); 1037 1038 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1039 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1040 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1041 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1042 1043 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1044 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1045 1046 tmp = a->solve_work; 1047 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1048 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1049 1050 for (neq=0; neq<B->cmap->n; neq++){ 1051 /* forward solve the lower triangular */ 1052 tmp[0] = b[r[0]]; 1053 tmps = tmp; 1054 for (i=1; i<n; i++) { 1055 v = aa + ai[i] ; 1056 vi = aj + ai[i] ; 1057 nz = a->diag[i] - ai[i]; 1058 sum = b[r[i]]; 1059 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1060 tmp[i] = sum; 1061 } 1062 /* backward solve the upper triangular */ 1063 for (i=n-1; i>=0; i--){ 1064 v = aa + a->diag[i] + 1; 1065 vi = aj + a->diag[i] + 1; 1066 nz = ai[i+1] - a->diag[i] - 1; 1067 sum = tmp[i]; 1068 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1069 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1070 } 1071 1072 b += n; 1073 x += n; 1074 } 1075 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1076 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1077 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1078 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1079 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1085 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1086 { 1087 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1088 IS iscol = a->col,isrow = a->row; 1089 PetscErrorCode ierr; 1090 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1091 PetscInt nz,neq; 1092 const PetscInt *rout,*cout,*r,*c; 1093 PetscScalar *x,*b,*tmp,sum; 1094 const MatScalar *aa = a->a,*v; 1095 PetscTruth bisdense,xisdense; 1096 1097 PetscFunctionBegin; 1098 if (!n) PetscFunctionReturn(0); 1099 1100 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1101 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1102 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1103 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1104 1105 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1106 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1107 1108 tmp = a->solve_work; 1109 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1110 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1111 1112 for (neq=0; neq<B->cmap->n; neq++){ 1113 /* forward solve the lower triangular */ 1114 tmp[0] = b[r[0]]; 1115 v = aa; 1116 vi = aj; 1117 for (i=1; i<n; i++) { 1118 nz = ai[i+1] - ai[i]; 1119 sum = b[r[i]]; 1120 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1121 tmp[i] = sum; 1122 v += nz; vi += nz; 1123 } 1124 1125 /* backward solve the upper triangular */ 1126 for (i=n-1; i>=0; i--){ 1127 v = aa + adiag[i+1]+1; 1128 vi = aj + adiag[i+1]+1; 1129 nz = adiag[i]-adiag[i+1]-1; 1130 sum = tmp[i]; 1131 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1132 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1133 } 1134 1135 b += n; 1136 x += n; 1137 } 1138 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1139 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1140 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1141 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1142 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1148 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1149 { 1150 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1151 IS iscol = a->col,isrow = a->row; 1152 PetscErrorCode ierr; 1153 const PetscInt *r,*c,*rout,*cout; 1154 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1155 PetscInt nz,row; 1156 PetscScalar *x,*b,*tmp,*tmps,sum; 1157 const MatScalar *aa = a->a,*v; 1158 1159 PetscFunctionBegin; 1160 if (!n) PetscFunctionReturn(0); 1161 1162 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1163 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1164 tmp = a->solve_work; 1165 1166 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1167 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1168 1169 /* forward solve the lower triangular */ 1170 tmp[0] = b[*r++]; 1171 tmps = tmp; 1172 for (row=1; row<n; row++) { 1173 i = rout[row]; /* permuted row */ 1174 v = aa + ai[i] ; 1175 vi = aj + ai[i] ; 1176 nz = a->diag[i] - ai[i]; 1177 sum = b[*r++]; 1178 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1179 tmp[row] = sum; 1180 } 1181 1182 /* backward solve the upper triangular */ 1183 for (row=n-1; row>=0; row--){ 1184 i = rout[row]; /* permuted row */ 1185 v = aa + a->diag[i] + 1; 1186 vi = aj + a->diag[i] + 1; 1187 nz = ai[i+1] - a->diag[i] - 1; 1188 sum = tmp[row]; 1189 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1190 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1191 } 1192 1193 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1194 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1195 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1196 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1197 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 /* ----------------------------------------------------------- */ 1202 #include "../src/mat/impls/aij/seq/ftn-kernels/fsolve.h" 1203 #undef __FUNCT__ 1204 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1205 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1206 { 1207 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1208 PetscErrorCode ierr; 1209 PetscInt n = A->rmap->n; 1210 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1211 PetscScalar *x; 1212 const PetscScalar *b; 1213 const MatScalar *aa = a->a; 1214 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1215 PetscInt adiag_i,i,nz,ai_i; 1216 const PetscInt *vi; 1217 const MatScalar *v; 1218 PetscScalar sum; 1219 #endif 1220 1221 PetscFunctionBegin; 1222 if (!n) PetscFunctionReturn(0); 1223 1224 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1225 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1226 1227 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1228 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1229 #else 1230 /* forward solve the lower triangular */ 1231 x[0] = b[0]; 1232 for (i=1; i<n; i++) { 1233 ai_i = ai[i]; 1234 v = aa + ai_i; 1235 vi = aj + ai_i; 1236 nz = adiag[i] - ai_i; 1237 sum = b[i]; 1238 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1239 x[i] = sum; 1240 } 1241 1242 /* backward solve the upper triangular */ 1243 for (i=n-1; i>=0; i--){ 1244 adiag_i = adiag[i]; 1245 v = aa + adiag_i + 1; 1246 vi = aj + adiag_i + 1; 1247 nz = ai[i+1] - adiag_i - 1; 1248 sum = x[i]; 1249 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1250 x[i] = sum*aa[adiag_i]; 1251 } 1252 #endif 1253 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1254 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1255 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1261 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1262 { 1263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1264 IS iscol = a->col,isrow = a->row; 1265 PetscErrorCode ierr; 1266 PetscInt i, n = A->rmap->n,j; 1267 PetscInt nz; 1268 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1269 PetscScalar *x,*tmp,sum; 1270 const PetscScalar *b; 1271 const MatScalar *aa = a->a,*v; 1272 1273 PetscFunctionBegin; 1274 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1275 1276 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1277 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1278 tmp = a->solve_work; 1279 1280 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1281 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1282 1283 /* forward solve the lower triangular */ 1284 tmp[0] = b[*r++]; 1285 for (i=1; i<n; i++) { 1286 v = aa + ai[i] ; 1287 vi = aj + ai[i] ; 1288 nz = a->diag[i] - ai[i]; 1289 sum = b[*r++]; 1290 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1291 tmp[i] = sum; 1292 } 1293 1294 /* backward solve the upper triangular */ 1295 for (i=n-1; i>=0; i--){ 1296 v = aa + a->diag[i] + 1; 1297 vi = aj + a->diag[i] + 1; 1298 nz = ai[i+1] - a->diag[i] - 1; 1299 sum = tmp[i]; 1300 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1301 tmp[i] = sum*aa[a->diag[i]]; 1302 x[*c--] += tmp[i]; 1303 } 1304 1305 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1306 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1307 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1308 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1309 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1310 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1316 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1317 { 1318 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1319 IS iscol = a->col,isrow = a->row; 1320 PetscErrorCode ierr; 1321 PetscInt i, n = A->rmap->n,j; 1322 PetscInt nz; 1323 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1324 PetscScalar *x,*tmp,sum; 1325 const PetscScalar *b; 1326 const MatScalar *aa = a->a,*v; 1327 1328 PetscFunctionBegin; 1329 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1330 1331 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1332 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1333 tmp = a->solve_work; 1334 1335 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1336 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1337 1338 /* forward solve the lower triangular */ 1339 tmp[0] = b[r[0]]; 1340 v = aa; 1341 vi = aj; 1342 for (i=1; i<n; i++) { 1343 nz = ai[i+1] - ai[i]; 1344 sum = b[r[i]]; 1345 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1346 tmp[i] = sum; 1347 v += nz; vi += nz; 1348 } 1349 1350 /* backward solve the upper triangular */ 1351 v = aa + adiag[n-1]; 1352 vi = aj + adiag[n-1]; 1353 for (i=n-1; i>=0; i--){ 1354 nz = adiag[i] - adiag[i+1] - 1; 1355 sum = tmp[i]; 1356 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1357 tmp[i] = sum*v[nz]; 1358 x[c[i]] += tmp[i]; 1359 v += nz+1; vi += nz+1; 1360 } 1361 1362 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1363 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1364 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1365 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1366 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1367 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1373 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1374 { 1375 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1376 IS iscol = a->col,isrow = a->row; 1377 PetscErrorCode ierr; 1378 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1379 PetscInt i,n = A->rmap->n,j; 1380 PetscInt nz; 1381 PetscScalar *x,*tmp,s1; 1382 const MatScalar *aa = a->a,*v; 1383 const PetscScalar *b; 1384 1385 PetscFunctionBegin; 1386 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1387 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1388 tmp = a->solve_work; 1389 1390 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1391 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1392 1393 /* copy the b into temp work space according to permutation */ 1394 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1395 1396 /* forward solve the U^T */ 1397 for (i=0; i<n; i++) { 1398 v = aa + diag[i] ; 1399 vi = aj + diag[i] + 1; 1400 nz = ai[i+1] - diag[i] - 1; 1401 s1 = tmp[i]; 1402 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1403 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1404 tmp[i] = s1; 1405 } 1406 1407 /* backward solve the L^T */ 1408 for (i=n-1; i>=0; i--){ 1409 v = aa + diag[i] - 1 ; 1410 vi = aj + diag[i] - 1 ; 1411 nz = diag[i] - ai[i]; 1412 s1 = tmp[i]; 1413 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1414 } 1415 1416 /* copy tmp into x according to permutation */ 1417 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1418 1419 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1420 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1421 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1422 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1423 1424 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1430 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1431 { 1432 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1433 IS iscol = a->col,isrow = a->row; 1434 PetscErrorCode ierr; 1435 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1436 PetscInt i,n = A->rmap->n,j; 1437 PetscInt nz; 1438 PetscScalar *x,*tmp,s1; 1439 const MatScalar *aa = a->a,*v; 1440 const PetscScalar *b; 1441 1442 PetscFunctionBegin; 1443 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1444 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1445 tmp = a->solve_work; 1446 1447 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1448 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1449 1450 /* copy the b into temp work space according to permutation */ 1451 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1452 1453 /* forward solve the U^T */ 1454 for (i=0; i<n; i++) { 1455 v = aa + adiag[i+1] + 1; 1456 vi = aj + adiag[i+1] + 1; 1457 nz = adiag[i] - adiag[i+1] - 1; 1458 s1 = tmp[i]; 1459 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1460 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1461 tmp[i] = s1; 1462 } 1463 1464 /* backward solve the L^T */ 1465 for (i=n-1; i>=0; i--){ 1466 v = aa + ai[i]; 1467 vi = aj + ai[i]; 1468 nz = ai[i+1] - ai[i]; 1469 s1 = tmp[i]; 1470 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1471 } 1472 1473 /* copy tmp into x according to permutation */ 1474 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1475 1476 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1477 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1478 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1479 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1480 1481 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1487 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1488 { 1489 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1490 IS iscol = a->col,isrow = a->row; 1491 PetscErrorCode ierr; 1492 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1493 PetscInt i,n = A->rmap->n,j; 1494 PetscInt nz; 1495 PetscScalar *x,*tmp,s1; 1496 const MatScalar *aa = a->a,*v; 1497 const PetscScalar *b; 1498 1499 PetscFunctionBegin; 1500 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1501 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1502 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1503 tmp = a->solve_work; 1504 1505 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1506 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1507 1508 /* copy the b into temp work space according to permutation */ 1509 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1510 1511 /* forward solve the U^T */ 1512 for (i=0; i<n; i++) { 1513 v = aa + diag[i] ; 1514 vi = aj + diag[i] + 1; 1515 nz = ai[i+1] - diag[i] - 1; 1516 s1 = tmp[i]; 1517 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1518 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1519 tmp[i] = s1; 1520 } 1521 1522 /* backward solve the L^T */ 1523 for (i=n-1; i>=0; i--){ 1524 v = aa + diag[i] - 1 ; 1525 vi = aj + diag[i] - 1 ; 1526 nz = diag[i] - ai[i]; 1527 s1 = tmp[i]; 1528 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1529 } 1530 1531 /* copy tmp into x according to permutation */ 1532 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1533 1534 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1535 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1536 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1537 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1538 1539 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1545 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1546 { 1547 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1548 IS iscol = a->col,isrow = a->row; 1549 PetscErrorCode ierr; 1550 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1551 PetscInt i,n = A->rmap->n,j; 1552 PetscInt nz; 1553 PetscScalar *x,*tmp,s1; 1554 const MatScalar *aa = a->a,*v; 1555 const PetscScalar *b; 1556 1557 PetscFunctionBegin; 1558 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1559 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1560 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1561 tmp = a->solve_work; 1562 1563 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1564 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1565 1566 /* copy the b into temp work space according to permutation */ 1567 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1568 1569 /* forward solve the U^T */ 1570 for (i=0; i<n; i++) { 1571 v = aa + adiag[i+1] + 1; 1572 vi = aj + adiag[i+1] + 1; 1573 nz = adiag[i] - adiag[i+1] - 1; 1574 s1 = tmp[i]; 1575 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1576 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1577 tmp[i] = s1; 1578 } 1579 1580 1581 /* backward solve the L^T */ 1582 for (i=n-1; i>=0; i--){ 1583 v = aa + ai[i] ; 1584 vi = aj + ai[i]; 1585 nz = ai[i+1] - ai[i]; 1586 s1 = tmp[i]; 1587 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1588 } 1589 1590 /* copy tmp into x according to permutation */ 1591 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1592 1593 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1594 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1595 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1596 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1597 1598 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 /* ----------------------------------------------------------------*/ 1603 1604 EXTERN PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscTruth); 1605 1606 /* 1607 ilu() under revised new data structure. 1608 Factored arrays bj and ba are stored as 1609 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1610 1611 bi=fact->i is an array of size n+1, in which 1612 bi+ 1613 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1614 bi[n]: points to L(n-1,n-1)+1 1615 1616 bdiag=fact->diag is an array of size n+1,in which 1617 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1618 bdiag[n]: points to entry of U(n-1,0)-1 1619 1620 U(i,:) contains bdiag[i] as its last entry, i.e., 1621 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1622 */ 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1625 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1626 { 1627 1628 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1629 PetscErrorCode ierr; 1630 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1631 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1632 PetscTruth missing; 1633 IS isicol; 1634 1635 PetscFunctionBegin; 1636 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1637 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1638 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1639 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1640 1641 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1642 b = (Mat_SeqAIJ*)(fact)->data; 1643 1644 /* allocate matrix arrays for new data structure */ 1645 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1646 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1647 b->singlemalloc = PETSC_TRUE; 1648 if (!b->diag){ 1649 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1650 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1651 } 1652 bdiag = b->diag; 1653 1654 if (n > 0) { 1655 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1656 } 1657 1658 /* set bi and bj with new data structure */ 1659 bi = b->i; 1660 bj = b->j; 1661 1662 /* L part */ 1663 bi[0] = 0; 1664 for (i=0; i<n; i++){ 1665 nz = adiag[i] - ai[i]; 1666 bi[i+1] = bi[i] + nz; 1667 aj = a->j + ai[i]; 1668 for (j=0; j<nz; j++){ 1669 /* *bj = aj[j]; bj++; */ 1670 bj[k++] = aj[j]; 1671 } 1672 } 1673 1674 /* U part */ 1675 bdiag[n] = bi[n]-1; 1676 for (i=n-1; i>=0; i--){ 1677 nz = ai[i+1] - adiag[i] - 1; 1678 aj = a->j + adiag[i] + 1; 1679 for (j=0; j<nz; j++){ 1680 /* *bj = aj[j]; bj++; */ 1681 bj[k++] = aj[j]; 1682 } 1683 /* diag[i] */ 1684 /* *bj = i; bj++; */ 1685 bj[k++] = i; 1686 bdiag[i] = bdiag[i+1] + nz + 1; 1687 } 1688 1689 fact->factortype = MAT_FACTOR_ILU; 1690 fact->info.factor_mallocs = 0; 1691 fact->info.fill_ratio_given = info->fill; 1692 fact->info.fill_ratio_needed = 1.0; 1693 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1694 1695 b = (Mat_SeqAIJ*)(fact)->data; 1696 b->row = isrow; 1697 b->col = iscol; 1698 b->icol = isicol; 1699 ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1700 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1701 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1707 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1708 { 1709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1710 IS isicol; 1711 PetscErrorCode ierr; 1712 const PetscInt *r,*ic; 1713 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1714 PetscInt *bi,*cols,nnz,*cols_lvl; 1715 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1716 PetscInt i,levels,diagonal_fill; 1717 PetscTruth col_identity,row_identity; 1718 PetscReal f; 1719 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1720 PetscBT lnkbt; 1721 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1722 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1723 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1724 1725 PetscFunctionBegin; 1726 /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1727 /* 1728 PetscTruth olddatastruct=PETSC_FALSE; 1729 ierr = PetscOptionsGetTruth(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 1730 if(olddatastruct){ 1731 ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 */ 1735 1736 levels = (PetscInt)info->levels; 1737 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1738 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1739 1740 if (!levels && row_identity && col_identity) { 1741 /* special case: ilu(0) with natural ordering */ 1742 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1743 if (a->inode.size) { 1744 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1745 } 1746 PetscFunctionReturn(0); 1747 } 1748 1749 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1750 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1751 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1752 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1753 1754 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1755 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1756 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1757 bi[0] = bdiag[0] = 0; 1758 1759 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1760 1761 /* create a linked list for storing column indices of the active row */ 1762 nlnk = n + 1; 1763 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1764 1765 /* initial FreeSpace size is f*(ai[n]+1) */ 1766 f = info->fill; 1767 diagonal_fill = (PetscInt)info->diagonal_fill; 1768 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1769 current_space = free_space; 1770 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1771 current_space_lvl = free_space_lvl; 1772 1773 for (i=0; i<n; i++) { 1774 nzi = 0; 1775 /* copy current row into linked list */ 1776 nnz = ai[r[i]+1] - ai[r[i]]; 1777 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1778 cols = aj + ai[r[i]]; 1779 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1780 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1781 nzi += nlnk; 1782 1783 /* make sure diagonal entry is included */ 1784 if (diagonal_fill && lnk[i] == -1) { 1785 fm = n; 1786 while (lnk[fm] < i) fm = lnk[fm]; 1787 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1788 lnk[fm] = i; 1789 lnk_lvl[i] = 0; 1790 nzi++; dcount++; 1791 } 1792 1793 /* add pivot rows into the active row */ 1794 nzbd = 0; 1795 prow = lnk[n]; 1796 while (prow < i) { 1797 nnz = bdiag[prow]; 1798 cols = bj_ptr[prow] + nnz + 1; 1799 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1800 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1801 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1802 nzi += nlnk; 1803 prow = lnk[prow]; 1804 nzbd++; 1805 } 1806 bdiag[i] = nzbd; 1807 bi[i+1] = bi[i] + nzi; 1808 1809 /* if free space is not available, make more free space */ 1810 if (current_space->local_remaining<nzi) { 1811 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1812 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1813 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1814 reallocs++; 1815 } 1816 1817 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1818 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1819 bj_ptr[i] = current_space->array; 1820 bjlvl_ptr[i] = current_space_lvl->array; 1821 1822 /* make sure the active row i has diagonal entry */ 1823 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1824 1825 current_space->array += nzi; 1826 current_space->local_used += nzi; 1827 current_space->local_remaining -= nzi; 1828 current_space_lvl->array += nzi; 1829 current_space_lvl->local_used += nzi; 1830 current_space_lvl->local_remaining -= nzi; 1831 } 1832 1833 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1834 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1835 1836 /* destroy list of free space and other temporary arrays */ 1837 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1838 1839 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1840 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1841 1842 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1843 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1844 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1845 1846 #if defined(PETSC_USE_INFO) 1847 { 1848 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1849 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1850 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1851 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1852 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1853 if (diagonal_fill) { 1854 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1855 } 1856 } 1857 #endif 1858 1859 /* put together the new matrix */ 1860 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1861 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1862 b = (Mat_SeqAIJ*)(fact)->data; 1863 b->free_a = PETSC_TRUE; 1864 b->free_ij = PETSC_TRUE; 1865 b->singlemalloc = PETSC_FALSE; 1866 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1867 b->j = bj; 1868 b->i = bi; 1869 b->diag = bdiag; 1870 b->ilen = 0; 1871 b->imax = 0; 1872 b->row = isrow; 1873 b->col = iscol; 1874 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1875 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1876 b->icol = isicol; 1877 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1878 /* In b structure: Free imax, ilen, old a, old j. 1879 Allocate bdiag, solve_work, new a, new j */ 1880 ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1881 b->maxnz = b->nz = bdiag[0]+1; 1882 (fact)->info.factor_mallocs = reallocs; 1883 (fact)->info.fill_ratio_given = f; 1884 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1885 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1886 if (a->inode.size) { 1887 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1888 } 1889 PetscFunctionReturn(0); 1890 } 1891 1892 #undef __FUNCT__ 1893 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1894 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1895 { 1896 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1897 IS isicol; 1898 PetscErrorCode ierr; 1899 const PetscInt *r,*ic; 1900 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1901 PetscInt *bi,*cols,nnz,*cols_lvl; 1902 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1903 PetscInt i,levels,diagonal_fill; 1904 PetscTruth col_identity,row_identity; 1905 PetscReal f; 1906 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1907 PetscBT lnkbt; 1908 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1909 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1910 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1911 PetscTruth missing; 1912 1913 PetscFunctionBegin; 1914 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1915 f = info->fill; 1916 levels = (PetscInt)info->levels; 1917 diagonal_fill = (PetscInt)info->diagonal_fill; 1918 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1919 1920 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1921 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1922 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1923 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1924 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1925 if (a->inode.size) { 1926 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1927 } 1928 fact->factortype = MAT_FACTOR_ILU; 1929 (fact)->info.factor_mallocs = 0; 1930 (fact)->info.fill_ratio_given = info->fill; 1931 (fact)->info.fill_ratio_needed = 1.0; 1932 b = (Mat_SeqAIJ*)(fact)->data; 1933 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1934 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1935 b->row = isrow; 1936 b->col = iscol; 1937 b->icol = isicol; 1938 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1939 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1940 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1941 PetscFunctionReturn(0); 1942 } 1943 1944 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1945 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1946 1947 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1948 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1949 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1950 bi[0] = bdiag[0] = 0; 1951 1952 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1953 1954 /* create a linked list for storing column indices of the active row */ 1955 nlnk = n + 1; 1956 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1957 1958 /* initial FreeSpace size is f*(ai[n]+1) */ 1959 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1960 current_space = free_space; 1961 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1962 current_space_lvl = free_space_lvl; 1963 1964 for (i=0; i<n; i++) { 1965 nzi = 0; 1966 /* copy current row into linked list */ 1967 nnz = ai[r[i]+1] - ai[r[i]]; 1968 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1969 cols = aj + ai[r[i]]; 1970 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1971 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1972 nzi += nlnk; 1973 1974 /* make sure diagonal entry is included */ 1975 if (diagonal_fill && lnk[i] == -1) { 1976 fm = n; 1977 while (lnk[fm] < i) fm = lnk[fm]; 1978 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1979 lnk[fm] = i; 1980 lnk_lvl[i] = 0; 1981 nzi++; dcount++; 1982 } 1983 1984 /* add pivot rows into the active row */ 1985 nzbd = 0; 1986 prow = lnk[n]; 1987 while (prow < i) { 1988 nnz = bdiag[prow]; 1989 cols = bj_ptr[prow] + nnz + 1; 1990 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1991 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1992 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1993 nzi += nlnk; 1994 prow = lnk[prow]; 1995 nzbd++; 1996 } 1997 bdiag[i] = nzbd; 1998 bi[i+1] = bi[i] + nzi; 1999 2000 /* if free space is not available, make more free space */ 2001 if (current_space->local_remaining<nzi) { 2002 nnz = nzi*(n - i); /* estimated and max additional space needed */ 2003 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2004 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2005 reallocs++; 2006 } 2007 2008 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2009 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2010 bj_ptr[i] = current_space->array; 2011 bjlvl_ptr[i] = current_space_lvl->array; 2012 2013 /* make sure the active row i has diagonal entry */ 2014 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2015 2016 current_space->array += nzi; 2017 current_space->local_used += nzi; 2018 current_space->local_remaining -= nzi; 2019 current_space_lvl->array += nzi; 2020 current_space_lvl->local_used += nzi; 2021 current_space_lvl->local_remaining -= nzi; 2022 } 2023 2024 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2025 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2026 2027 /* destroy list of free space and other temporary arrays */ 2028 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2029 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2030 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2031 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2032 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2033 2034 #if defined(PETSC_USE_INFO) 2035 { 2036 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2037 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2038 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2039 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2040 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2041 if (diagonal_fill) { 2042 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2043 } 2044 } 2045 #endif 2046 2047 /* put together the new matrix */ 2048 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2049 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2050 b = (Mat_SeqAIJ*)(fact)->data; 2051 b->free_a = PETSC_TRUE; 2052 b->free_ij = PETSC_TRUE; 2053 b->singlemalloc = PETSC_FALSE; 2054 ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2055 b->j = bj; 2056 b->i = bi; 2057 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2058 b->diag = bdiag; 2059 b->ilen = 0; 2060 b->imax = 0; 2061 b->row = isrow; 2062 b->col = iscol; 2063 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2064 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2065 b->icol = isicol; 2066 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2067 /* In b structure: Free imax, ilen, old a, old j. 2068 Allocate bdiag, solve_work, new a, new j */ 2069 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2070 b->maxnz = b->nz = bi[n] ; 2071 (fact)->info.factor_mallocs = reallocs; 2072 (fact)->info.fill_ratio_given = f; 2073 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2074 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2075 if (a->inode.size) { 2076 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2077 } 2078 PetscFunctionReturn(0); 2079 } 2080 2081 #undef __FUNCT__ 2082 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2083 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2084 { 2085 Mat C = B; 2086 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2087 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2088 IS ip=b->row,iip = b->icol; 2089 PetscErrorCode ierr; 2090 const PetscInt *rip,*riip; 2091 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2092 PetscInt *ai=a->i,*aj=a->j; 2093 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2094 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2095 PetscTruth perm_identity; 2096 FactorShiftCtx sctx; 2097 PetscReal rs; 2098 MatScalar d,*v; 2099 2100 PetscFunctionBegin; 2101 /* MatPivotSetUp(): initialize shift context sctx */ 2102 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2103 2104 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2105 sctx.shift_top = info->zeropivot; 2106 for (i=0; i<mbs; i++) { 2107 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2108 d = (aa)[a->diag[i]]; 2109 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2110 v = aa+ai[i]; 2111 nz = ai[i+1] - ai[i]; 2112 for (j=0; j<nz; j++) 2113 rs += PetscAbsScalar(v[j]); 2114 if (rs>sctx.shift_top) sctx.shift_top = rs; 2115 } 2116 sctx.shift_top *= 1.1; 2117 sctx.nshift_max = 5; 2118 sctx.shift_lo = 0.; 2119 sctx.shift_hi = 1.; 2120 } 2121 2122 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2123 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2124 2125 /* allocate working arrays 2126 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2127 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2128 */ 2129 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 2130 2131 do { 2132 sctx.newshift = PETSC_FALSE; 2133 2134 for (i=0; i<mbs; i++) c2r[i] = mbs; 2135 il[0] = 0; 2136 2137 for (k = 0; k<mbs; k++){ 2138 /* zero rtmp */ 2139 nz = bi[k+1] - bi[k]; 2140 bjtmp = bj + bi[k]; 2141 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2142 2143 /* load in initial unfactored row */ 2144 bval = ba + bi[k]; 2145 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2146 for (j = jmin; j < jmax; j++){ 2147 col = riip[aj[j]]; 2148 if (col >= k){ /* only take upper triangular entry */ 2149 rtmp[col] = aa[j]; 2150 *bval++ = 0.0; /* for in-place factorization */ 2151 } 2152 } 2153 /* shift the diagonal of the matrix: ZeropivotApply() */ 2154 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2155 2156 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2157 dk = rtmp[k]; 2158 i = c2r[k]; /* first row to be added to k_th row */ 2159 2160 while (i < k){ 2161 nexti = c2r[i]; /* next row to be added to k_th row */ 2162 2163 /* compute multiplier, update diag(k) and U(i,k) */ 2164 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2165 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2166 dk += uikdi*ba[ili]; /* update diag[k] */ 2167 ba[ili] = uikdi; /* -U(i,k) */ 2168 2169 /* add multiple of row i to k-th row */ 2170 jmin = ili + 1; jmax = bi[i+1]; 2171 if (jmin < jmax){ 2172 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2173 /* update il and c2r for row i */ 2174 il[i] = jmin; 2175 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2176 } 2177 i = nexti; 2178 } 2179 2180 /* copy data into U(k,:) */ 2181 rs = 0.0; 2182 jmin = bi[k]; jmax = bi[k+1]-1; 2183 if (jmin < jmax) { 2184 for (j=jmin; j<jmax; j++){ 2185 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2186 } 2187 /* add the k-th row into il and c2r */ 2188 il[k] = jmin; 2189 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2190 } 2191 2192 /* MatPivotCheck() */ 2193 sctx.rs = rs; 2194 sctx.pv = dk; 2195 ierr = MatPivotCheck(info,&sctx,i);CHKERRQ(ierr); 2196 if(sctx.newshift) break; 2197 dk = sctx.pv; 2198 2199 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2200 } 2201 } while (sctx.newshift); 2202 2203 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2204 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2205 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2206 2207 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2208 if (perm_identity){ 2209 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2210 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2211 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2212 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2213 } else { 2214 B->ops->solve = MatSolve_SeqSBAIJ_1; 2215 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2216 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2217 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2218 } 2219 2220 C->assembled = PETSC_TRUE; 2221 C->preallocated = PETSC_TRUE; 2222 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2223 2224 /* MatPivotView() */ 2225 if (sctx.nshift){ 2226 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2227 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2228 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2229 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2230 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 2231 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 2232 } 2233 } 2234 PetscFunctionReturn(0); 2235 } 2236 2237 #undef __FUNCT__ 2238 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2239 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2240 { 2241 Mat C = B; 2242 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2243 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2244 IS ip=b->row,iip = b->icol; 2245 PetscErrorCode ierr; 2246 const PetscInt *rip,*riip; 2247 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2248 PetscInt *ai=a->i,*aj=a->j; 2249 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2250 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2251 PetscTruth perm_identity; 2252 FactorShiftCtx sctx; 2253 PetscReal rs; 2254 MatScalar d,*v; 2255 2256 PetscFunctionBegin; 2257 /* MatPivotSetUp(): initialize shift context sctx */ 2258 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2259 2260 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2261 sctx.shift_top = info->zeropivot; 2262 for (i=0; i<mbs; i++) { 2263 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2264 d = (aa)[a->diag[i]]; 2265 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2266 v = aa+ai[i]; 2267 nz = ai[i+1] - ai[i]; 2268 for (j=0; j<nz; j++) 2269 rs += PetscAbsScalar(v[j]); 2270 if (rs>sctx.shift_top) sctx.shift_top = rs; 2271 } 2272 sctx.shift_top *= 1.1; 2273 sctx.nshift_max = 5; 2274 sctx.shift_lo = 0.; 2275 sctx.shift_hi = 1.; 2276 } 2277 2278 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2279 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2280 2281 /* initialization */ 2282 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2283 2284 do { 2285 sctx.newshift = PETSC_FALSE; 2286 2287 for (i=0; i<mbs; i++) jl[i] = mbs; 2288 il[0] = 0; 2289 2290 for (k = 0; k<mbs; k++){ 2291 /* zero rtmp */ 2292 nz = bi[k+1] - bi[k]; 2293 bjtmp = bj + bi[k]; 2294 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2295 2296 bval = ba + bi[k]; 2297 /* initialize k-th row by the perm[k]-th row of A */ 2298 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2299 for (j = jmin; j < jmax; j++){ 2300 col = riip[aj[j]]; 2301 if (col >= k){ /* only take upper triangular entry */ 2302 rtmp[col] = aa[j]; 2303 *bval++ = 0.0; /* for in-place factorization */ 2304 } 2305 } 2306 /* shift the diagonal of the matrix */ 2307 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2308 2309 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2310 dk = rtmp[k]; 2311 i = jl[k]; /* first row to be added to k_th row */ 2312 2313 while (i < k){ 2314 nexti = jl[i]; /* next row to be added to k_th row */ 2315 2316 /* compute multiplier, update diag(k) and U(i,k) */ 2317 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2318 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2319 dk += uikdi*ba[ili]; 2320 ba[ili] = uikdi; /* -U(i,k) */ 2321 2322 /* add multiple of row i to k-th row */ 2323 jmin = ili + 1; jmax = bi[i+1]; 2324 if (jmin < jmax){ 2325 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2326 /* update il and jl for row i */ 2327 il[i] = jmin; 2328 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2329 } 2330 i = nexti; 2331 } 2332 2333 /* shift the diagonals when zero pivot is detected */ 2334 /* compute rs=sum of abs(off-diagonal) */ 2335 rs = 0.0; 2336 jmin = bi[k]+1; 2337 nz = bi[k+1] - jmin; 2338 bcol = bj + jmin; 2339 for (j=0; j<nz; j++) { 2340 rs += PetscAbsScalar(rtmp[bcol[j]]); 2341 } 2342 2343 sctx.rs = rs; 2344 sctx.pv = dk; 2345 ierr = MatPivotCheck(info,&sctx,k);CHKERRQ(ierr); 2346 if (sctx.newshift) break; 2347 dk = sctx.pv; 2348 2349 /* copy data into U(k,:) */ 2350 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2351 jmin = bi[k]+1; jmax = bi[k+1]; 2352 if (jmin < jmax) { 2353 for (j=jmin; j<jmax; j++){ 2354 col = bj[j]; ba[j] = rtmp[col]; 2355 } 2356 /* add the k-th row into il and jl */ 2357 il[k] = jmin; 2358 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2359 } 2360 } 2361 } while (sctx.newshift); 2362 2363 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2364 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2365 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2366 2367 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2368 if (perm_identity){ 2369 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2370 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2371 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2372 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2373 } else { 2374 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2375 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2376 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2377 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2378 } 2379 2380 C->assembled = PETSC_TRUE; 2381 C->preallocated = PETSC_TRUE; 2382 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2383 if (sctx.nshift){ 2384 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2385 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2386 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2387 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2388 } 2389 } 2390 PetscFunctionReturn(0); 2391 } 2392 2393 /* 2394 icc() under revised new data structure. 2395 Factored arrays bj and ba are stored as 2396 U(0,:),...,U(i,:),U(n-1,:) 2397 2398 ui=fact->i is an array of size n+1, in which 2399 ui+ 2400 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2401 ui[n]: points to U(n-1,n-1)+1 2402 2403 udiag=fact->diag is an array of size n,in which 2404 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2405 2406 U(i,:) contains udiag[i] as its last entry, i.e., 2407 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2408 */ 2409 2410 #undef __FUNCT__ 2411 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2412 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2413 { 2414 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2415 Mat_SeqSBAIJ *b; 2416 PetscErrorCode ierr; 2417 PetscTruth perm_identity,missing; 2418 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2419 const PetscInt *rip,*riip; 2420 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2421 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2422 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2423 PetscReal fill=info->fill,levels=info->levels; 2424 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2425 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2426 PetscBT lnkbt; 2427 IS iperm; 2428 2429 PetscFunctionBegin; 2430 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2431 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2432 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2433 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2434 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2435 2436 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2437 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2438 ui[0] = 0; 2439 2440 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2441 if (!levels && perm_identity) { 2442 for (i=0; i<am; i++) { 2443 ncols = ai[i+1] - a->diag[i]; 2444 ui[i+1] = ui[i] + ncols; 2445 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2446 } 2447 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2448 cols = uj; 2449 for (i=0; i<am; i++) { 2450 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2451 ncols = ai[i+1] - a->diag[i] -1; 2452 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2453 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2454 } 2455 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2456 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2457 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2458 2459 /* initialization */ 2460 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2461 2462 /* jl: linked list for storing indices of the pivot rows 2463 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2464 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2465 for (i=0; i<am; i++){ 2466 jl[i] = am; il[i] = 0; 2467 } 2468 2469 /* create and initialize a linked list for storing column indices of the active row k */ 2470 nlnk = am + 1; 2471 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2472 2473 /* initial FreeSpace size is fill*(ai[am]+1) */ 2474 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2475 current_space = free_space; 2476 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2477 current_space_lvl = free_space_lvl; 2478 2479 for (k=0; k<am; k++){ /* for each active row k */ 2480 /* initialize lnk by the column indices of row rip[k] of A */ 2481 nzk = 0; 2482 ncols = ai[rip[k]+1] - ai[rip[k]]; 2483 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2484 ncols_upper = 0; 2485 for (j=0; j<ncols; j++){ 2486 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2487 if (riip[i] >= k){ /* only take upper triangular entry */ 2488 ajtmp[ncols_upper] = i; 2489 ncols_upper++; 2490 } 2491 } 2492 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2493 nzk += nlnk; 2494 2495 /* update lnk by computing fill-in for each pivot row to be merged in */ 2496 prow = jl[k]; /* 1st pivot row */ 2497 2498 while (prow < k){ 2499 nextprow = jl[prow]; 2500 2501 /* merge prow into k-th row */ 2502 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2503 jmax = ui[prow+1]; 2504 ncols = jmax-jmin; 2505 i = jmin - ui[prow]; 2506 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2507 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2508 j = *(uj - 1); 2509 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2510 nzk += nlnk; 2511 2512 /* update il and jl for prow */ 2513 if (jmin < jmax){ 2514 il[prow] = jmin; 2515 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2516 } 2517 prow = nextprow; 2518 } 2519 2520 /* if free space is not available, make more free space */ 2521 if (current_space->local_remaining<nzk) { 2522 i = am - k + 1; /* num of unfactored rows */ 2523 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2524 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2525 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2526 reallocs++; 2527 } 2528 2529 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2530 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2531 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2532 2533 /* add the k-th row into il and jl */ 2534 if (nzk > 1){ 2535 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2536 jl[k] = jl[i]; jl[i] = k; 2537 il[k] = ui[k] + 1; 2538 } 2539 uj_ptr[k] = current_space->array; 2540 uj_lvl_ptr[k] = current_space_lvl->array; 2541 2542 current_space->array += nzk; 2543 current_space->local_used += nzk; 2544 current_space->local_remaining -= nzk; 2545 2546 current_space_lvl->array += nzk; 2547 current_space_lvl->local_used += nzk; 2548 current_space_lvl->local_remaining -= nzk; 2549 2550 ui[k+1] = ui[k] + nzk; 2551 } 2552 2553 #if defined(PETSC_USE_INFO) 2554 if (ai[am] != 0) { 2555 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2556 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2557 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2558 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2559 } else { 2560 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2561 } 2562 #endif 2563 2564 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2565 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2566 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2567 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2568 2569 /* destroy list of free space and other temporary array(s) */ 2570 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2571 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2572 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2573 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2574 2575 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2576 2577 /* put together the new matrix in MATSEQSBAIJ format */ 2578 b = (Mat_SeqSBAIJ*)(fact)->data; 2579 b->singlemalloc = PETSC_FALSE; 2580 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2581 b->j = uj; 2582 b->i = ui; 2583 b->diag = udiag; 2584 b->free_diag = PETSC_TRUE; 2585 b->ilen = 0; 2586 b->imax = 0; 2587 b->row = perm; 2588 b->col = perm; 2589 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2590 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2591 b->icol = iperm; 2592 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2593 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2594 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2595 b->maxnz = b->nz = ui[am]; 2596 b->free_a = PETSC_TRUE; 2597 b->free_ij = PETSC_TRUE; 2598 2599 fact->info.factor_mallocs = reallocs; 2600 fact->info.fill_ratio_given = fill; 2601 if (ai[am] != 0) { 2602 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2603 } else { 2604 fact->info.fill_ratio_needed = 0.0; 2605 } 2606 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2607 PetscFunctionReturn(0); 2608 } 2609 2610 #undef __FUNCT__ 2611 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2612 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2613 { 2614 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2615 Mat_SeqSBAIJ *b; 2616 PetscErrorCode ierr; 2617 PetscTruth perm_identity,missing; 2618 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2619 const PetscInt *rip,*riip; 2620 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2621 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2622 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2623 PetscReal fill=info->fill,levels=info->levels; 2624 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2625 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2626 PetscBT lnkbt; 2627 IS iperm; 2628 2629 PetscFunctionBegin; 2630 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2631 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2632 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2633 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2634 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2635 2636 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2637 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2638 ui[0] = 0; 2639 2640 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2641 if (!levels && perm_identity) { 2642 2643 for (i=0; i<am; i++) { 2644 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2645 udiag[i] = ui[i]; 2646 } 2647 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2648 cols = uj; 2649 for (i=0; i<am; i++) { 2650 aj = a->j + a->diag[i]; 2651 ncols = ui[i+1] - ui[i]; 2652 for (j=0; j<ncols; j++) *cols++ = *aj++; 2653 } 2654 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2655 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2656 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2657 2658 /* initialization */ 2659 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2660 2661 /* jl: linked list for storing indices of the pivot rows 2662 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2663 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2664 for (i=0; i<am; i++){ 2665 jl[i] = am; il[i] = 0; 2666 } 2667 2668 /* create and initialize a linked list for storing column indices of the active row k */ 2669 nlnk = am + 1; 2670 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2671 2672 /* initial FreeSpace size is fill*(ai[am]+1) */ 2673 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2674 current_space = free_space; 2675 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2676 current_space_lvl = free_space_lvl; 2677 2678 for (k=0; k<am; k++){ /* for each active row k */ 2679 /* initialize lnk by the column indices of row rip[k] of A */ 2680 nzk = 0; 2681 ncols = ai[rip[k]+1] - ai[rip[k]]; 2682 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2683 ncols_upper = 0; 2684 for (j=0; j<ncols; j++){ 2685 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2686 if (riip[i] >= k){ /* only take upper triangular entry */ 2687 ajtmp[ncols_upper] = i; 2688 ncols_upper++; 2689 } 2690 } 2691 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2692 nzk += nlnk; 2693 2694 /* update lnk by computing fill-in for each pivot row to be merged in */ 2695 prow = jl[k]; /* 1st pivot row */ 2696 2697 while (prow < k){ 2698 nextprow = jl[prow]; 2699 2700 /* merge prow into k-th row */ 2701 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2702 jmax = ui[prow+1]; 2703 ncols = jmax-jmin; 2704 i = jmin - ui[prow]; 2705 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2706 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2707 j = *(uj - 1); 2708 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2709 nzk += nlnk; 2710 2711 /* update il and jl for prow */ 2712 if (jmin < jmax){ 2713 il[prow] = jmin; 2714 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2715 } 2716 prow = nextprow; 2717 } 2718 2719 /* if free space is not available, make more free space */ 2720 if (current_space->local_remaining<nzk) { 2721 i = am - k + 1; /* num of unfactored rows */ 2722 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2723 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2724 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2725 reallocs++; 2726 } 2727 2728 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2729 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2730 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2731 2732 /* add the k-th row into il and jl */ 2733 if (nzk > 1){ 2734 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2735 jl[k] = jl[i]; jl[i] = k; 2736 il[k] = ui[k] + 1; 2737 } 2738 uj_ptr[k] = current_space->array; 2739 uj_lvl_ptr[k] = current_space_lvl->array; 2740 2741 current_space->array += nzk; 2742 current_space->local_used += nzk; 2743 current_space->local_remaining -= nzk; 2744 2745 current_space_lvl->array += nzk; 2746 current_space_lvl->local_used += nzk; 2747 current_space_lvl->local_remaining -= nzk; 2748 2749 ui[k+1] = ui[k] + nzk; 2750 } 2751 2752 #if defined(PETSC_USE_INFO) 2753 if (ai[am] != 0) { 2754 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2755 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2756 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2757 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2758 } else { 2759 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2760 } 2761 #endif 2762 2763 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2764 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2765 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2766 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2767 2768 /* destroy list of free space and other temporary array(s) */ 2769 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2770 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2771 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2772 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2773 2774 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2775 2776 /* put together the new matrix in MATSEQSBAIJ format */ 2777 2778 b = (Mat_SeqSBAIJ*)fact->data; 2779 b->singlemalloc = PETSC_FALSE; 2780 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2781 b->j = uj; 2782 b->i = ui; 2783 b->diag = udiag; 2784 b->free_diag = PETSC_TRUE; 2785 b->ilen = 0; 2786 b->imax = 0; 2787 b->row = perm; 2788 b->col = perm; 2789 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2790 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2791 b->icol = iperm; 2792 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2793 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2794 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2795 b->maxnz = b->nz = ui[am]; 2796 b->free_a = PETSC_TRUE; 2797 b->free_ij = PETSC_TRUE; 2798 2799 fact->info.factor_mallocs = reallocs; 2800 fact->info.fill_ratio_given = fill; 2801 if (ai[am] != 0) { 2802 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2803 } else { 2804 fact->info.fill_ratio_needed = 0.0; 2805 } 2806 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2807 PetscFunctionReturn(0); 2808 } 2809 2810 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2811 { 2812 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2813 Mat_SeqSBAIJ *b; 2814 PetscErrorCode ierr; 2815 PetscTruth perm_identity; 2816 PetscReal fill = info->fill; 2817 const PetscInt *rip,*riip; 2818 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2819 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2820 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2821 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2822 PetscBT lnkbt; 2823 IS iperm; 2824 2825 PetscFunctionBegin; 2826 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2827 /* check whether perm is the identity mapping */ 2828 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2829 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2830 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2831 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2832 2833 /* initialization */ 2834 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2835 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2836 ui[0] = 0; 2837 2838 /* jl: linked list for storing indices of the pivot rows 2839 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2840 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2841 for (i=0; i<am; i++){ 2842 jl[i] = am; il[i] = 0; 2843 } 2844 2845 /* create and initialize a linked list for storing column indices of the active row k */ 2846 nlnk = am + 1; 2847 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2848 2849 /* initial FreeSpace size is fill*(ai[am]+1) */ 2850 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2851 current_space = free_space; 2852 2853 for (k=0; k<am; k++){ /* for each active row k */ 2854 /* initialize lnk by the column indices of row rip[k] of A */ 2855 nzk = 0; 2856 ncols = ai[rip[k]+1] - ai[rip[k]]; 2857 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2858 ncols_upper = 0; 2859 for (j=0; j<ncols; j++){ 2860 i = riip[*(aj + ai[rip[k]] + j)]; 2861 if (i >= k){ /* only take upper triangular entry */ 2862 cols[ncols_upper] = i; 2863 ncols_upper++; 2864 } 2865 } 2866 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2867 nzk += nlnk; 2868 2869 /* update lnk by computing fill-in for each pivot row to be merged in */ 2870 prow = jl[k]; /* 1st pivot row */ 2871 2872 while (prow < k){ 2873 nextprow = jl[prow]; 2874 /* merge prow into k-th row */ 2875 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2876 jmax = ui[prow+1]; 2877 ncols = jmax-jmin; 2878 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2879 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2880 nzk += nlnk; 2881 2882 /* update il and jl for prow */ 2883 if (jmin < jmax){ 2884 il[prow] = jmin; 2885 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2886 } 2887 prow = nextprow; 2888 } 2889 2890 /* if free space is not available, make more free space */ 2891 if (current_space->local_remaining<nzk) { 2892 i = am - k + 1; /* num of unfactored rows */ 2893 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2894 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2895 reallocs++; 2896 } 2897 2898 /* copy data into free space, then initialize lnk */ 2899 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2900 2901 /* add the k-th row into il and jl */ 2902 if (nzk > 1){ 2903 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2904 jl[k] = jl[i]; jl[i] = k; 2905 il[k] = ui[k] + 1; 2906 } 2907 ui_ptr[k] = current_space->array; 2908 current_space->array += nzk; 2909 current_space->local_used += nzk; 2910 current_space->local_remaining -= nzk; 2911 2912 ui[k+1] = ui[k] + nzk; 2913 } 2914 2915 #if defined(PETSC_USE_INFO) 2916 if (ai[am] != 0) { 2917 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 2918 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2919 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2920 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2921 } else { 2922 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2923 } 2924 #endif 2925 2926 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2927 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2928 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2929 2930 /* destroy list of free space and other temporary array(s) */ 2931 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2932 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2933 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2934 2935 /* put together the new matrix in MATSEQSBAIJ format */ 2936 2937 b = (Mat_SeqSBAIJ*)fact->data; 2938 b->singlemalloc = PETSC_FALSE; 2939 b->free_a = PETSC_TRUE; 2940 b->free_ij = PETSC_TRUE; 2941 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2942 b->j = uj; 2943 b->i = ui; 2944 b->diag = udiag; 2945 b->free_diag = PETSC_TRUE; 2946 b->ilen = 0; 2947 b->imax = 0; 2948 b->row = perm; 2949 b->col = perm; 2950 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2951 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2952 b->icol = iperm; 2953 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2954 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2955 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2956 b->maxnz = b->nz = ui[am]; 2957 2958 fact->info.factor_mallocs = reallocs; 2959 fact->info.fill_ratio_given = fill; 2960 if (ai[am] != 0) { 2961 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2962 } else { 2963 fact->info.fill_ratio_needed = 0.0; 2964 } 2965 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2966 PetscFunctionReturn(0); 2967 } 2968 2969 #undef __FUNCT__ 2970 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2971 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2972 { 2973 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2974 Mat_SeqSBAIJ *b; 2975 PetscErrorCode ierr; 2976 PetscTruth perm_identity; 2977 PetscReal fill = info->fill; 2978 const PetscInt *rip,*riip; 2979 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2980 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2981 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2982 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2983 PetscBT lnkbt; 2984 IS iperm; 2985 2986 PetscFunctionBegin; 2987 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2988 /* check whether perm is the identity mapping */ 2989 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2990 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2991 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2992 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2993 2994 /* initialization */ 2995 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2996 ui[0] = 0; 2997 2998 /* jl: linked list for storing indices of the pivot rows 2999 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3000 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 3001 for (i=0; i<am; i++){ 3002 jl[i] = am; il[i] = 0; 3003 } 3004 3005 /* create and initialize a linked list for storing column indices of the active row k */ 3006 nlnk = am + 1; 3007 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3008 3009 /* initial FreeSpace size is fill*(ai[am]+1) */ 3010 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3011 current_space = free_space; 3012 3013 for (k=0; k<am; k++){ /* for each active row k */ 3014 /* initialize lnk by the column indices of row rip[k] of A */ 3015 nzk = 0; 3016 ncols = ai[rip[k]+1] - ai[rip[k]]; 3017 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3018 ncols_upper = 0; 3019 for (j=0; j<ncols; j++){ 3020 i = riip[*(aj + ai[rip[k]] + j)]; 3021 if (i >= k){ /* only take upper triangular entry */ 3022 cols[ncols_upper] = i; 3023 ncols_upper++; 3024 } 3025 } 3026 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3027 nzk += nlnk; 3028 3029 /* update lnk by computing fill-in for each pivot row to be merged in */ 3030 prow = jl[k]; /* 1st pivot row */ 3031 3032 while (prow < k){ 3033 nextprow = jl[prow]; 3034 /* merge prow into k-th row */ 3035 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3036 jmax = ui[prow+1]; 3037 ncols = jmax-jmin; 3038 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3039 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3040 nzk += nlnk; 3041 3042 /* update il and jl for prow */ 3043 if (jmin < jmax){ 3044 il[prow] = jmin; 3045 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3046 } 3047 prow = nextprow; 3048 } 3049 3050 /* if free space is not available, make more free space */ 3051 if (current_space->local_remaining<nzk) { 3052 i = am - k + 1; /* num of unfactored rows */ 3053 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3054 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3055 reallocs++; 3056 } 3057 3058 /* copy data into free space, then initialize lnk */ 3059 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3060 3061 /* add the k-th row into il and jl */ 3062 if (nzk-1 > 0){ 3063 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3064 jl[k] = jl[i]; jl[i] = k; 3065 il[k] = ui[k] + 1; 3066 } 3067 ui_ptr[k] = current_space->array; 3068 current_space->array += nzk; 3069 current_space->local_used += nzk; 3070 current_space->local_remaining -= nzk; 3071 3072 ui[k+1] = ui[k] + nzk; 3073 } 3074 3075 #if defined(PETSC_USE_INFO) 3076 if (ai[am] != 0) { 3077 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3078 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3079 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3080 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3081 } else { 3082 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3083 } 3084 #endif 3085 3086 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3087 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3088 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3089 3090 /* destroy list of free space and other temporary array(s) */ 3091 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3092 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3093 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3094 3095 /* put together the new matrix in MATSEQSBAIJ format */ 3096 3097 b = (Mat_SeqSBAIJ*)fact->data; 3098 b->singlemalloc = PETSC_FALSE; 3099 b->free_a = PETSC_TRUE; 3100 b->free_ij = PETSC_TRUE; 3101 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3102 b->j = uj; 3103 b->i = ui; 3104 b->diag = 0; 3105 b->ilen = 0; 3106 b->imax = 0; 3107 b->row = perm; 3108 b->col = perm; 3109 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3110 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3111 b->icol = iperm; 3112 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3113 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3114 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3115 b->maxnz = b->nz = ui[am]; 3116 3117 fact->info.factor_mallocs = reallocs; 3118 fact->info.fill_ratio_given = fill; 3119 if (ai[am] != 0) { 3120 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3121 } else { 3122 fact->info.fill_ratio_needed = 0.0; 3123 } 3124 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3125 PetscFunctionReturn(0); 3126 } 3127 3128 #undef __FUNCT__ 3129 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3130 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3131 { 3132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3133 PetscErrorCode ierr; 3134 PetscInt n = A->rmap->n; 3135 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3136 PetscScalar *x,sum; 3137 const PetscScalar *b; 3138 const MatScalar *aa = a->a,*v; 3139 PetscInt i,nz; 3140 3141 PetscFunctionBegin; 3142 if (!n) PetscFunctionReturn(0); 3143 3144 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3145 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3146 3147 /* forward solve the lower triangular */ 3148 x[0] = b[0]; 3149 v = aa; 3150 vi = aj; 3151 for (i=1; i<n; i++) { 3152 nz = ai[i+1] - ai[i]; 3153 sum = b[i]; 3154 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3155 v += nz; 3156 vi += nz; 3157 x[i] = sum; 3158 } 3159 3160 /* backward solve the upper triangular */ 3161 for (i=n-1; i>=0; i--){ 3162 v = aa + adiag[i+1] + 1; 3163 vi = aj + adiag[i+1] + 1; 3164 nz = adiag[i] - adiag[i+1]-1; 3165 sum = x[i]; 3166 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3167 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3168 } 3169 3170 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3171 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3172 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3173 PetscFunctionReturn(0); 3174 } 3175 3176 #undef __FUNCT__ 3177 #define __FUNCT__ "MatSolve_SeqAIJ" 3178 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3179 { 3180 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3181 IS iscol = a->col,isrow = a->row; 3182 PetscErrorCode ierr; 3183 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3184 const PetscInt *rout,*cout,*r,*c; 3185 PetscScalar *x,*tmp,sum; 3186 const PetscScalar *b; 3187 const MatScalar *aa = a->a,*v; 3188 3189 PetscFunctionBegin; 3190 if (!n) PetscFunctionReturn(0); 3191 3192 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3193 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3194 tmp = a->solve_work; 3195 3196 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3197 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3198 3199 /* forward solve the lower triangular */ 3200 tmp[0] = b[r[0]]; 3201 v = aa; 3202 vi = aj; 3203 for (i=1; i<n; i++) { 3204 nz = ai[i+1] - ai[i]; 3205 sum = b[r[i]]; 3206 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3207 tmp[i] = sum; 3208 v += nz; vi += nz; 3209 } 3210 3211 /* backward solve the upper triangular */ 3212 for (i=n-1; i>=0; i--){ 3213 v = aa + adiag[i+1]+1; 3214 vi = aj + adiag[i+1]+1; 3215 nz = adiag[i]-adiag[i+1]-1; 3216 sum = tmp[i]; 3217 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3218 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3219 } 3220 3221 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3222 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3223 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3224 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3225 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3226 PetscFunctionReturn(0); 3227 } 3228 3229 #undef __FUNCT__ 3230 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3231 /* 3232 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3233 */ 3234 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3235 { 3236 Mat B = *fact; 3237 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3238 IS isicol; 3239 PetscErrorCode ierr; 3240 const PetscInt *r,*ic; 3241 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3242 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3243 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3244 PetscInt nlnk,*lnk; 3245 PetscBT lnkbt; 3246 PetscTruth row_identity,icol_identity; 3247 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3248 const PetscInt *ics; 3249 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3250 PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount; 3251 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3252 PetscTruth missing; 3253 3254 PetscFunctionBegin; 3255 3256 if (dt == PETSC_DEFAULT) dt = 0.005; 3257 if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */ 3258 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3259 3260 /* ------- symbolic factorization, can be reused ---------*/ 3261 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3262 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3263 adiag=a->diag; 3264 3265 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3266 3267 /* bdiag is location of diagonal in factor */ 3268 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3269 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3270 3271 /* allocate row pointers bi */ 3272 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3273 3274 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3275 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3276 nnz_max = ai[n]+2*n*dtcount+2; 3277 3278 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3279 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3280 3281 /* put together the new matrix */ 3282 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3283 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3284 b = (Mat_SeqAIJ*)B->data; 3285 b->free_a = PETSC_TRUE; 3286 b->free_ij = PETSC_TRUE; 3287 b->singlemalloc = PETSC_FALSE; 3288 b->a = ba; 3289 b->j = bj; 3290 b->i = bi; 3291 b->diag = bdiag; 3292 b->ilen = 0; 3293 b->imax = 0; 3294 b->row = isrow; 3295 b->col = iscol; 3296 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3297 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3298 b->icol = isicol; 3299 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3300 3301 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3302 b->maxnz = nnz_max; 3303 3304 B->factortype = MAT_FACTOR_ILUDT; 3305 B->info.factor_mallocs = 0; 3306 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3307 CHKMEMQ; 3308 /* ------- end of symbolic factorization ---------*/ 3309 3310 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3311 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3312 ics = ic; 3313 3314 /* linked list for storing column indices of the active row */ 3315 nlnk = n + 1; 3316 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3317 3318 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3319 ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 3320 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3321 ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 3322 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3323 3324 bi[0] = 0; 3325 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3326 bdiag_rev[n] = bdiag[0]; 3327 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3328 for (i=0; i<n; i++) { 3329 /* copy initial fill into linked list */ 3330 nzi = 0; /* nonzeros for active row i */ 3331 nzi = ai[r[i]+1] - ai[r[i]]; 3332 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3333 nzi_al = adiag[r[i]] - ai[r[i]]; 3334 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3335 ajtmp = aj + ai[r[i]]; 3336 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3337 3338 /* load in initial (unfactored row) */ 3339 aatmp = a->a + ai[r[i]]; 3340 for (j=0; j<nzi; j++) { 3341 rtmp[ics[*ajtmp++]] = *aatmp++; 3342 } 3343 3344 /* add pivot rows into linked list */ 3345 row = lnk[n]; 3346 while (row < i ) { 3347 nzi_bl = bi[row+1] - bi[row] + 1; 3348 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3349 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3350 nzi += nlnk; 3351 row = lnk[row]; 3352 } 3353 3354 /* copy data from lnk into jtmp, then initialize lnk */ 3355 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3356 3357 /* numerical factorization */ 3358 bjtmp = jtmp; 3359 row = *bjtmp++; /* 1st pivot row */ 3360 while ( row < i ) { 3361 pc = rtmp + row; 3362 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3363 multiplier = (*pc) * (*pv); 3364 *pc = multiplier; 3365 if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 3366 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3367 pv = ba + bdiag[row+1] + 1; 3368 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3369 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3370 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3371 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 3372 } 3373 row = *bjtmp++; 3374 } 3375 3376 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3377 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3378 nzi_bl = 0; j = 0; 3379 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 3380 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3381 nzi_bl++; j++; 3382 } 3383 nzi_bu = nzi - nzi_bl -1; 3384 while (j < nzi){ 3385 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3386 j++; 3387 } 3388 3389 bjtmp = bj + bi[i]; 3390 batmp = ba + bi[i]; 3391 /* apply level dropping rule to L part */ 3392 ncut = nzi_al + dtcount; 3393 if (ncut < nzi_bl){ 3394 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3395 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3396 } else { 3397 ncut = nzi_bl; 3398 } 3399 for (j=0; j<ncut; j++){ 3400 bjtmp[j] = jtmp[j]; 3401 batmp[j] = vtmp[j]; 3402 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3403 } 3404 bi[i+1] = bi[i] + ncut; 3405 nzi = ncut + 1; 3406 3407 /* apply level dropping rule to U part */ 3408 ncut = nzi_au + dtcount; 3409 if (ncut < nzi_bu){ 3410 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3411 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3412 } else { 3413 ncut = nzi_bu; 3414 } 3415 nzi += ncut; 3416 3417 /* mark bdiagonal */ 3418 bdiag[i+1] = bdiag[i] - (ncut + 1); 3419 bdiag_rev[n-i-1] = bdiag[i+1]; 3420 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3421 bjtmp = bj + bdiag[i]; 3422 batmp = ba + bdiag[i]; 3423 *bjtmp = i; 3424 *batmp = diag_tmp; /* rtmp[i]; */ 3425 if (*batmp == 0.0) { 3426 *batmp = dt+shift; 3427 /* printf(" row %d add shift %g\n",i,shift); */ 3428 } 3429 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3430 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3431 3432 bjtmp = bj + bdiag[i+1]+1; 3433 batmp = ba + bdiag[i+1]+1; 3434 for (k=0; k<ncut; k++){ 3435 bjtmp[k] = jtmp[nzi_bl+1+k]; 3436 batmp[k] = vtmp[nzi_bl+1+k]; 3437 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3438 } 3439 /* printf("\n"); */ 3440 3441 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3442 /* 3443 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3444 printf(" ----------------------------\n"); 3445 */ 3446 } /* for (i=0; i<n; i++) */ 3447 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3448 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3449 3450 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3451 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3452 3453 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3454 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3455 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3456 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3457 3458 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3459 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3460 3461 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3462 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3463 if (row_identity && icol_identity) { 3464 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3465 } else { 3466 B->ops->solve = MatSolve_SeqAIJ; 3467 } 3468 3469 B->ops->solveadd = 0; 3470 B->ops->solvetranspose = 0; 3471 B->ops->solvetransposeadd = 0; 3472 B->ops->matsolve = 0; 3473 B->assembled = PETSC_TRUE; 3474 B->preallocated = PETSC_TRUE; 3475 PetscFunctionReturn(0); 3476 } 3477 3478 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3479 #undef __FUNCT__ 3480 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3481 /* 3482 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3483 */ 3484 3485 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3486 { 3487 PetscErrorCode ierr; 3488 3489 PetscFunctionBegin; 3490 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3491 PetscFunctionReturn(0); 3492 } 3493 3494 /* 3495 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3496 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3497 */ 3498 #undef __FUNCT__ 3499 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3500 /* 3501 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3502 */ 3503 3504 PetscErrorCode PETSCMAT_DLLEXPORT MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3505 { 3506 Mat C=fact; 3507 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 3508 IS isrow = b->row,isicol = b->icol; 3509 PetscErrorCode ierr; 3510 const PetscInt *r,*ic,*ics; 3511 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3512 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3513 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3514 PetscReal dt=info->dt,shift=info->shiftamount; 3515 PetscTruth row_identity, col_identity; 3516 3517 PetscFunctionBegin; 3518 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3519 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3520 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3521 ics = ic; 3522 3523 for (i=0; i<n; i++){ 3524 /* initialize rtmp array */ 3525 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3526 bjtmp = bj + bi[i]; 3527 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3528 rtmp[i] = 0.0; 3529 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3530 bjtmp = bj + bdiag[i+1] + 1; 3531 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3532 3533 /* load in initial unfactored row of A */ 3534 /* printf("row %d\n",i); */ 3535 nz = ai[r[i]+1] - ai[r[i]]; 3536 ajtmp = aj + ai[r[i]]; 3537 v = aa + ai[r[i]]; 3538 for (j=0; j<nz; j++) { 3539 rtmp[ics[*ajtmp++]] = v[j]; 3540 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3541 } 3542 /* printf("\n"); */ 3543 3544 /* numerical factorization */ 3545 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3546 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3547 k = 0; 3548 while (k < nzl){ 3549 row = *bjtmp++; 3550 /* printf(" prow %d\n",row); */ 3551 pc = rtmp + row; 3552 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3553 multiplier = (*pc) * (*pv); 3554 *pc = multiplier; 3555 if (PetscAbsScalar(multiplier) > dt){ 3556 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3557 pv = b->a + bdiag[row+1] + 1; 3558 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3559 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3560 /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */ 3561 } 3562 k++; 3563 } 3564 3565 /* finished row so stick it into b->a */ 3566 /* L-part */ 3567 pv = b->a + bi[i] ; 3568 pj = bj + bi[i] ; 3569 nzl = bi[i+1] - bi[i]; 3570 for (j=0; j<nzl; j++) { 3571 pv[j] = rtmp[pj[j]]; 3572 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3573 } 3574 3575 /* diagonal: invert diagonal entries for simplier triangular solves */ 3576 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3577 b->a[bdiag[i]] = 1.0/rtmp[i]; 3578 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3579 3580 /* U-part */ 3581 pv = b->a + bdiag[i+1] + 1; 3582 pj = bj + bdiag[i+1] + 1; 3583 nzu = bdiag[i] - bdiag[i+1] - 1; 3584 for (j=0; j<nzu; j++) { 3585 pv[j] = rtmp[pj[j]]; 3586 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3587 } 3588 /* printf("\n"); */ 3589 } 3590 3591 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3592 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3593 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3594 3595 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3596 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3597 if (row_identity && col_identity) { 3598 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3599 } else { 3600 C->ops->solve = MatSolve_SeqAIJ; 3601 } 3602 C->ops->solveadd = 0; 3603 C->ops->solvetranspose = 0; 3604 C->ops->solvetransposeadd = 0; 3605 C->ops->matsolve = 0; 3606 C->assembled = PETSC_TRUE; 3607 C->preallocated = PETSC_TRUE; 3608 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3609 PetscFunctionReturn(0); 3610 } 3611