1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 EXTERN_C_BEGIN 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 10 /* 11 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 12 13 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 14 */ 15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 16 { 17 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 20 const PetscInt *ai = a->i, *aj = a->j; 21 const PetscScalar *aa = a->a; 22 PetscBool *done; 23 PetscReal best,past = 0,future; 24 25 PetscFunctionBegin; 26 /* pick initial row */ 27 best = -1; 28 for (i=0; i<n; i++) { 29 future = 0.0; 30 for (j=ai[i]; j<ai[i+1]; j++) { 31 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 32 else past = PetscAbsScalar(aa[j]); 33 } 34 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 35 if (past/future > best) { 36 best = past/future; 37 current = i; 38 } 39 } 40 41 ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr); 42 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 43 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 44 order[0] = current; 45 for (i=0; i<n-1; i++) { 46 done[current] = PETSC_TRUE; 47 best = -1; 48 /* loop over all neighbors of current pivot */ 49 for (j=ai[current]; j<ai[current+1]; j++) { 50 jj = aj[j]; 51 if (done[jj]) continue; 52 /* loop over columns of potential next row computing weights for below and above diagonal */ 53 past = future = 0.0; 54 for (k=ai[jj]; k<ai[jj+1]; k++) { 55 kk = aj[k]; 56 if (done[kk]) past += PetscAbsScalar(aa[k]); 57 else if (kk != jj) future += PetscAbsScalar(aa[k]); 58 } 59 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 60 if (past/future > best) { 61 best = past/future; 62 newcurrent = jj; 63 } 64 } 65 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 66 best = -1; 67 for (k=0; k<n; k++) { 68 if (done[k]) continue; 69 future = 0.0; 70 past = 0.0; 71 for (j=ai[k]; j<ai[k+1]; j++) { 72 kk = aj[j]; 73 if (done[kk]) past += PetscAbsScalar(aa[j]); 74 else if (kk != k) future += PetscAbsScalar(aa[j]); 75 } 76 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 77 if (past/future > best) { 78 best = past/future; 79 newcurrent = k; 80 } 81 } 82 } 83 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 84 current = newcurrent; 85 order[i+1] = current; 86 } 87 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 88 *icol = *irow; 89 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 90 ierr = PetscFree(done);CHKERRQ(ierr); 91 ierr = PetscFree(order);CHKERRQ(ierr); 92 PetscFunctionReturn(0); 93 } 94 EXTERN_C_END 95 96 EXTERN_C_BEGIN 97 #undef __FUNCT__ 98 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 99 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 100 { 101 PetscFunctionBegin; 102 *flg = PETSC_TRUE; 103 PetscFunctionReturn(0); 104 } 105 EXTERN_C_END 106 107 EXTERN_C_BEGIN 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 110 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 111 { 112 PetscInt n = A->rmap->n; 113 PetscErrorCode ierr; 114 115 PetscFunctionBegin; 116 #if defined(PETSC_USE_COMPLEX) 117 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC))SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 118 #endif 119 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 120 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 121 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 122 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 123 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 124 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 125 ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 126 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 127 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 128 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 129 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 130 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 131 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 132 (*B)->factortype = ftype; 133 PetscFunctionReturn(0); 134 } 135 EXTERN_C_END 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 139 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 140 { 141 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 142 IS isicol; 143 PetscErrorCode ierr; 144 const PetscInt *r,*ic; 145 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 146 PetscInt *bi,*bj,*ajtmp; 147 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 148 PetscReal f; 149 PetscInt nlnk,*lnk,k,**bi_ptr; 150 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 151 PetscBT lnkbt; 152 153 PetscFunctionBegin; 154 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 155 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 156 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 157 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 158 159 /* get new row pointers */ 160 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 161 bi[0] = 0; 162 163 /* bdiag is location of diagonal in factor */ 164 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 165 bdiag[0] = 0; 166 167 /* linked list for storing column indices of the active row */ 168 nlnk = n + 1; 169 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 170 171 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 172 173 /* initial FreeSpace size is f*(ai[n]+1) */ 174 f = info->fill; 175 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 176 current_space = free_space; 177 178 for (i=0; i<n; i++) { 179 /* copy previous fill into linked list */ 180 nzi = 0; 181 nnz = ai[r[i]+1] - ai[r[i]]; 182 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 183 ajtmp = aj + ai[r[i]]; 184 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 185 nzi += nlnk; 186 187 /* add pivot rows into linked list */ 188 row = lnk[n]; 189 while (row < i) { 190 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 191 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 192 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 193 nzi += nlnk; 194 row = lnk[row]; 195 } 196 bi[i+1] = bi[i] + nzi; 197 im[i] = nzi; 198 199 /* mark bdiag */ 200 nzbd = 0; 201 nnz = nzi; 202 k = lnk[n]; 203 while (nnz-- && k < i) { 204 nzbd++; 205 k = lnk[k]; 206 } 207 bdiag[i] = bi[i] + nzbd; 208 209 /* if free space is not available, make more free space */ 210 if (current_space->local_remaining<nzi) { 211 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 212 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 213 reallocs++; 214 } 215 216 /* copy data into free space, then initialize lnk */ 217 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 218 bi_ptr[i] = current_space->array; 219 current_space->array += nzi; 220 current_space->local_used += nzi; 221 current_space->local_remaining -= nzi; 222 } 223 #if defined(PETSC_USE_INFO) 224 if (ai[n] != 0) { 225 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 226 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 227 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 228 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 229 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 230 } else { 231 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 232 } 233 #endif 234 235 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 236 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 237 238 /* destroy list of free space and other temporary array(s) */ 239 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 240 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 241 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 242 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 243 244 /* put together the new matrix */ 245 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 246 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 247 b = (Mat_SeqAIJ*)(B)->data; 248 b->free_a = PETSC_TRUE; 249 b->free_ij = PETSC_TRUE; 250 b->singlemalloc = PETSC_FALSE; 251 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 252 b->j = bj; 253 b->i = bi; 254 b->diag = bdiag; 255 b->ilen = 0; 256 b->imax = 0; 257 b->row = isrow; 258 b->col = iscol; 259 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 260 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 261 b->icol = isicol; 262 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 263 264 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 265 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 266 b->maxnz = b->nz = bi[n] ; 267 268 (B)->factortype = MAT_FACTOR_LU; 269 (B)->info.factor_mallocs = reallocs; 270 (B)->info.fill_ratio_given = f; 271 272 if (ai[n]) { 273 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 274 } else { 275 (B)->info.fill_ratio_needed = 0.0; 276 } 277 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 278 if (a->inode.size) { 279 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 280 } 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 286 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 287 { 288 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 289 IS isicol; 290 PetscErrorCode ierr; 291 const PetscInt *r,*ic; 292 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 293 PetscInt *bi,*bj,*ajtmp; 294 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 295 PetscReal f; 296 PetscInt nlnk,*lnk,k,**bi_ptr; 297 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 298 PetscBT lnkbt; 299 300 PetscFunctionBegin; 301 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 302 /* 303 PetscBool olddatastruct=PETSC_FALSE; 304 ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 305 if (olddatastruct) { 306 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 */ 310 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 311 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 312 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 313 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 314 315 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 316 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 317 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 318 bi[0] = bdiag[0] = 0; 319 320 /* linked list for storing column indices of the active row */ 321 nlnk = n + 1; 322 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 323 324 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 325 326 /* initial FreeSpace size is f*(ai[n]+1) */ 327 f = info->fill; 328 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 329 current_space = free_space; 330 331 for (i=0; i<n; i++) { 332 /* copy previous fill into linked list */ 333 nzi = 0; 334 nnz = ai[r[i]+1] - ai[r[i]]; 335 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 336 ajtmp = aj + ai[r[i]]; 337 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 338 nzi += nlnk; 339 340 /* add pivot rows into linked list */ 341 row = lnk[n]; 342 while (row < i) { 343 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 344 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 345 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 346 nzi += nlnk; 347 row = lnk[row]; 348 } 349 bi[i+1] = bi[i] + nzi; 350 im[i] = nzi; 351 352 /* mark bdiag */ 353 nzbd = 0; 354 nnz = nzi; 355 k = lnk[n]; 356 while (nnz-- && k < i) { 357 nzbd++; 358 k = lnk[k]; 359 } 360 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 361 362 /* if free space is not available, make more free space */ 363 if (current_space->local_remaining<nzi) { 364 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 365 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 366 reallocs++; 367 } 368 369 /* copy data into free space, then initialize lnk */ 370 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 371 bi_ptr[i] = current_space->array; 372 current_space->array += nzi; 373 current_space->local_used += nzi; 374 current_space->local_remaining -= nzi; 375 } 376 377 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 378 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 379 380 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 381 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 382 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 383 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 384 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 385 386 /* put together the new matrix */ 387 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 388 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 389 b = (Mat_SeqAIJ*)(B)->data; 390 b->free_a = PETSC_TRUE; 391 b->free_ij = PETSC_TRUE; 392 b->singlemalloc = PETSC_FALSE; 393 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 394 b->j = bj; 395 b->i = bi; 396 b->diag = bdiag; 397 b->ilen = 0; 398 b->imax = 0; 399 b->row = isrow; 400 b->col = iscol; 401 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 402 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 403 b->icol = isicol; 404 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 405 406 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 407 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 408 b->maxnz = b->nz = bdiag[0]+1; 409 B->factortype = MAT_FACTOR_LU; 410 B->info.factor_mallocs = reallocs; 411 B->info.fill_ratio_given = f; 412 413 if (ai[n]) { 414 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 415 } else { 416 B->info.fill_ratio_needed = 0.0; 417 } 418 #if defined(PETSC_USE_INFO) 419 if (ai[n] != 0) { 420 PetscReal af = B->info.fill_ratio_needed; 421 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 422 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 423 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 424 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 425 } else { 426 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 427 } 428 #endif 429 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 430 if (a->inode.size) { 431 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 432 } 433 PetscFunctionReturn(0); 434 } 435 436 /* 437 Trouble in factorization, should we dump the original matrix? 438 */ 439 #undef __FUNCT__ 440 #define __FUNCT__ "MatFactorDumpMatrix" 441 PetscErrorCode MatFactorDumpMatrix(Mat A) 442 { 443 PetscErrorCode ierr; 444 PetscBool flg = PETSC_FALSE; 445 446 PetscFunctionBegin; 447 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 448 if (flg) { 449 PetscViewer viewer; 450 char filename[PETSC_MAX_PATH_LEN]; 451 452 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 453 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 454 ierr = MatView(A,viewer);CHKERRQ(ierr); 455 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 456 } 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 462 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 463 { 464 Mat C=B; 465 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 466 IS isrow = b->row,isicol = b->icol; 467 PetscErrorCode ierr; 468 const PetscInt *r,*ic,*ics; 469 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 470 PetscInt i,j,k,nz,nzL,row,*pj; 471 const PetscInt *ajtmp,*bjtmp; 472 MatScalar *rtmp,*pc,multiplier,*pv; 473 const MatScalar *aa=a->a,*v; 474 PetscBool row_identity,col_identity; 475 FactorShiftCtx sctx; 476 const PetscInt *ddiag; 477 PetscReal rs; 478 MatScalar d; 479 480 PetscFunctionBegin; 481 /* MatPivotSetUp(): initialize shift context sctx */ 482 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 483 484 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 485 ddiag = a->diag; 486 sctx.shift_top = info->zeropivot; 487 for (i=0; i<n; i++) { 488 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 489 d = (aa)[ddiag[i]]; 490 rs = -PetscAbsScalar(d) - PetscRealPart(d); 491 v = aa+ai[i]; 492 nz = ai[i+1] - ai[i]; 493 for (j=0; j<nz; j++) 494 rs += PetscAbsScalar(v[j]); 495 if (rs>sctx.shift_top) sctx.shift_top = rs; 496 } 497 sctx.shift_top *= 1.1; 498 sctx.nshift_max = 5; 499 sctx.shift_lo = 0.; 500 sctx.shift_hi = 1.; 501 } 502 503 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 504 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 505 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 506 ics = ic; 507 508 do { 509 sctx.newshift = PETSC_FALSE; 510 for (i=0; i<n; i++) { 511 /* zero rtmp */ 512 /* L part */ 513 nz = bi[i+1] - bi[i]; 514 bjtmp = bj + bi[i]; 515 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 516 517 /* U part */ 518 nz = bdiag[i]-bdiag[i+1]; 519 bjtmp = bj + bdiag[i+1]+1; 520 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 521 522 /* load in initial (unfactored row) */ 523 nz = ai[r[i]+1] - ai[r[i]]; 524 ajtmp = aj + ai[r[i]]; 525 v = aa + ai[r[i]]; 526 for (j=0; j<nz; j++) { 527 rtmp[ics[ajtmp[j]]] = v[j]; 528 } 529 /* ZeropivotApply() */ 530 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 531 532 /* elimination */ 533 bjtmp = bj + bi[i]; 534 row = *bjtmp++; 535 nzL = bi[i+1] - bi[i]; 536 for (k=0; k < nzL;k++) { 537 pc = rtmp + row; 538 if (*pc != 0.0) { 539 pv = b->a + bdiag[row]; 540 multiplier = *pc * (*pv); 541 *pc = multiplier; 542 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 543 pv = b->a + bdiag[row+1]+1; 544 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 545 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 546 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 547 } 548 row = *bjtmp++; 549 } 550 551 /* finished row so stick it into b->a */ 552 rs = 0.0; 553 /* L part */ 554 pv = b->a + bi[i] ; 555 pj = b->j + bi[i] ; 556 nz = bi[i+1] - bi[i]; 557 for (j=0; j<nz; j++) { 558 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 559 } 560 561 /* U part */ 562 pv = b->a + bdiag[i+1]+1; 563 pj = b->j + bdiag[i+1]+1; 564 nz = bdiag[i] - bdiag[i+1]-1; 565 for (j=0; j<nz; j++) { 566 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 567 } 568 569 sctx.rs = rs; 570 sctx.pv = rtmp[i]; 571 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 572 if (sctx.newshift) break; /* break for-loop */ 573 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 574 575 /* Mark diagonal and invert diagonal for simplier triangular solves */ 576 pv = b->a + bdiag[i]; 577 *pv = 1.0/rtmp[i]; 578 579 } /* endof for (i=0; i<n; i++) { */ 580 581 /* MatPivotRefine() */ 582 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 583 /* 584 * if no shift in this attempt & shifting & started shifting & can refine, 585 * then try lower shift 586 */ 587 sctx.shift_hi = sctx.shift_fraction; 588 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 589 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 590 sctx.newshift = PETSC_TRUE; 591 sctx.nshift++; 592 } 593 } while (sctx.newshift); 594 595 ierr = PetscFree(rtmp);CHKERRQ(ierr); 596 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 597 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 598 599 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 600 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 601 if (row_identity && col_identity) { 602 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 603 } else { 604 C->ops->solve = MatSolve_SeqAIJ; 605 } 606 C->ops->solveadd = MatSolveAdd_SeqAIJ; 607 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 608 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 609 C->ops->matsolve = MatMatSolve_SeqAIJ; 610 C->assembled = PETSC_TRUE; 611 C->preallocated = PETSC_TRUE; 612 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 613 614 /* MatShiftView(A,info,&sctx) */ 615 if (sctx.nshift) { 616 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 617 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 618 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 619 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 620 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 621 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 622 } 623 } 624 ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr); 625 PetscFunctionReturn(0); 626 } 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 630 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 631 { 632 Mat C=B; 633 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 634 IS isrow = b->row,isicol = b->icol; 635 PetscErrorCode ierr; 636 const PetscInt *r,*ic,*ics; 637 PetscInt nz,row,i,j,n=A->rmap->n,diag; 638 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 639 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 640 MatScalar *pv,*rtmp,*pc,multiplier,d; 641 const MatScalar *v,*aa=a->a; 642 PetscReal rs=0.0; 643 FactorShiftCtx sctx; 644 const PetscInt *ddiag; 645 PetscBool row_identity, col_identity; 646 647 PetscFunctionBegin; 648 /* MatPivotSetUp(): initialize shift context sctx */ 649 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 650 651 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 652 ddiag = a->diag; 653 sctx.shift_top = info->zeropivot; 654 for (i=0; i<n; i++) { 655 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 656 d = (aa)[ddiag[i]]; 657 rs = -PetscAbsScalar(d) - PetscRealPart(d); 658 v = aa+ai[i]; 659 nz = ai[i+1] - ai[i]; 660 for (j=0; j<nz; j++) 661 rs += PetscAbsScalar(v[j]); 662 if (rs>sctx.shift_top) sctx.shift_top = rs; 663 } 664 sctx.shift_top *= 1.1; 665 sctx.nshift_max = 5; 666 sctx.shift_lo = 0.; 667 sctx.shift_hi = 1.; 668 } 669 670 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 671 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 672 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 673 ics = ic; 674 675 do { 676 sctx.newshift = PETSC_FALSE; 677 for (i=0; i<n; i++) { 678 nz = bi[i+1] - bi[i]; 679 bjtmp = bj + bi[i]; 680 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 681 682 /* load in initial (unfactored row) */ 683 nz = ai[r[i]+1] - ai[r[i]]; 684 ajtmp = aj + ai[r[i]]; 685 v = aa + ai[r[i]]; 686 for (j=0; j<nz; j++) { 687 rtmp[ics[ajtmp[j]]] = v[j]; 688 } 689 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 690 691 row = *bjtmp++; 692 while (row < i) { 693 pc = rtmp + row; 694 if (*pc != 0.0) { 695 pv = b->a + diag_offset[row]; 696 pj = b->j + diag_offset[row] + 1; 697 multiplier = *pc / *pv++; 698 *pc = multiplier; 699 nz = bi[row+1] - diag_offset[row] - 1; 700 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 701 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 702 } 703 row = *bjtmp++; 704 } 705 /* finished row so stick it into b->a */ 706 pv = b->a + bi[i] ; 707 pj = b->j + bi[i] ; 708 nz = bi[i+1] - bi[i]; 709 diag = diag_offset[i] - bi[i]; 710 rs = 0.0; 711 for (j=0; j<nz; j++) { 712 pv[j] = rtmp[pj[j]]; 713 rs += PetscAbsScalar(pv[j]); 714 } 715 rs -= PetscAbsScalar(pv[diag]); 716 717 sctx.rs = rs; 718 sctx.pv = pv[diag]; 719 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 720 if (sctx.newshift) break; 721 pv[diag] = sctx.pv; 722 } 723 724 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 725 /* 726 * if no shift in this attempt & shifting & started shifting & can refine, 727 * then try lower shift 728 */ 729 sctx.shift_hi = sctx.shift_fraction; 730 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 731 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 732 sctx.newshift = PETSC_TRUE; 733 sctx.nshift++; 734 } 735 } while (sctx.newshift); 736 737 /* invert diagonal entries for simplier triangular solves */ 738 for (i=0; i<n; i++) { 739 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 740 } 741 ierr = PetscFree(rtmp);CHKERRQ(ierr); 742 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 743 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 744 745 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 746 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 747 if (row_identity && col_identity) { 748 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 749 } else { 750 C->ops->solve = MatSolve_SeqAIJ_inplace; 751 } 752 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 753 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 754 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 755 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 756 C->assembled = PETSC_TRUE; 757 C->preallocated = PETSC_TRUE; 758 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 759 if (sctx.nshift) { 760 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 761 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 762 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 763 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 764 } 765 } 766 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 767 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 768 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 /* 773 This routine implements inplace ILU(0) with row or/and column permutations. 774 Input: 775 A - original matrix 776 Output; 777 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 778 a->j (col index) is permuted by the inverse of colperm, then sorted 779 a->a reordered accordingly with a->j 780 a->diag (ptr to diagonal elements) is updated. 781 */ 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 784 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 785 { 786 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 787 IS isrow = a->row,isicol = a->icol; 788 PetscErrorCode ierr; 789 const PetscInt *r,*ic,*ics; 790 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 791 PetscInt *ajtmp,nz,row; 792 PetscInt *diag = a->diag,nbdiag,*pj; 793 PetscScalar *rtmp,*pc,multiplier,d; 794 MatScalar *pv,*v; 795 PetscReal rs; 796 FactorShiftCtx sctx; 797 const MatScalar *aa=a->a,*vtmp; 798 799 PetscFunctionBegin; 800 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 801 802 /* MatPivotSetUp(): initialize shift context sctx */ 803 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 804 805 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 806 const PetscInt *ddiag = a->diag; 807 sctx.shift_top = info->zeropivot; 808 for (i=0; i<n; i++) { 809 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 810 d = (aa)[ddiag[i]]; 811 rs = -PetscAbsScalar(d) - PetscRealPart(d); 812 vtmp = aa+ai[i]; 813 nz = ai[i+1] - ai[i]; 814 for (j=0; j<nz; j++) 815 rs += PetscAbsScalar(vtmp[j]); 816 if (rs>sctx.shift_top) sctx.shift_top = rs; 817 } 818 sctx.shift_top *= 1.1; 819 sctx.nshift_max = 5; 820 sctx.shift_lo = 0.; 821 sctx.shift_hi = 1.; 822 } 823 824 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 825 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 826 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 827 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 828 ics = ic; 829 830 #if defined(MV) 831 sctx.shift_top = 0.; 832 sctx.nshift_max = 0; 833 sctx.shift_lo = 0.; 834 sctx.shift_hi = 0.; 835 sctx.shift_fraction = 0.; 836 837 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 838 sctx.shift_top = 0.; 839 for (i=0; i<n; i++) { 840 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 841 d = (a->a)[diag[i]]; 842 rs = -PetscAbsScalar(d) - PetscRealPart(d); 843 v = a->a+ai[i]; 844 nz = ai[i+1] - ai[i]; 845 for (j=0; j<nz; j++) 846 rs += PetscAbsScalar(v[j]); 847 if (rs>sctx.shift_top) sctx.shift_top = rs; 848 } 849 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 850 sctx.shift_top *= 1.1; 851 sctx.nshift_max = 5; 852 sctx.shift_lo = 0.; 853 sctx.shift_hi = 1.; 854 } 855 856 sctx.shift_amount = 0.; 857 sctx.nshift = 0; 858 #endif 859 860 do { 861 sctx.newshift = PETSC_FALSE; 862 for (i=0; i<n; i++) { 863 /* load in initial unfactored row */ 864 nz = ai[r[i]+1] - ai[r[i]]; 865 ajtmp = aj + ai[r[i]]; 866 v = a->a + ai[r[i]]; 867 /* sort permuted ajtmp and values v accordingly */ 868 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 869 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 870 871 diag[r[i]] = ai[r[i]]; 872 for (j=0; j<nz; j++) { 873 rtmp[ajtmp[j]] = v[j]; 874 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 875 } 876 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 877 878 row = *ajtmp++; 879 while (row < i) { 880 pc = rtmp + row; 881 if (*pc != 0.0) { 882 pv = a->a + diag[r[row]]; 883 pj = aj + diag[r[row]] + 1; 884 885 multiplier = *pc / *pv++; 886 *pc = multiplier; 887 nz = ai[r[row]+1] - diag[r[row]] - 1; 888 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 889 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 890 } 891 row = *ajtmp++; 892 } 893 /* finished row so overwrite it onto a->a */ 894 pv = a->a + ai[r[i]] ; 895 pj = aj + ai[r[i]] ; 896 nz = ai[r[i]+1] - ai[r[i]]; 897 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 898 899 rs = 0.0; 900 for (j=0; j<nz; j++) { 901 pv[j] = rtmp[pj[j]]; 902 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 903 } 904 905 sctx.rs = rs; 906 sctx.pv = pv[nbdiag]; 907 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 908 if (sctx.newshift) break; 909 pv[nbdiag] = sctx.pv; 910 } 911 912 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 913 /* 914 * if no shift in this attempt & shifting & started shifting & can refine, 915 * then try lower shift 916 */ 917 sctx.shift_hi = sctx.shift_fraction; 918 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 919 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 920 sctx.newshift = PETSC_TRUE; 921 sctx.nshift++; 922 } 923 } while (sctx.newshift); 924 925 /* invert diagonal entries for simplier triangular solves */ 926 for (i=0; i<n; i++) { 927 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 928 } 929 930 ierr = PetscFree(rtmp);CHKERRQ(ierr); 931 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 932 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 933 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 934 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 935 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 936 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 937 A->assembled = PETSC_TRUE; 938 A->preallocated = PETSC_TRUE; 939 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 940 if (sctx.nshift) { 941 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 942 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 943 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 944 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 945 } 946 } 947 PetscFunctionReturn(0); 948 } 949 950 /* ----------------------------------------------------------- */ 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatLUFactor_SeqAIJ" 953 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 954 { 955 PetscErrorCode ierr; 956 Mat C; 957 958 PetscFunctionBegin; 959 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 960 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 961 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 962 A->ops->solve = C->ops->solve; 963 A->ops->solvetranspose = C->ops->solvetranspose; 964 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 965 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 /* ----------------------------------------------------------- */ 969 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 973 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 974 { 975 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 976 IS iscol = a->col,isrow = a->row; 977 PetscErrorCode ierr; 978 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 979 PetscInt nz; 980 const PetscInt *rout,*cout,*r,*c; 981 PetscScalar *x,*tmp,*tmps,sum; 982 const PetscScalar *b; 983 const MatScalar *aa = a->a,*v; 984 985 PetscFunctionBegin; 986 if (!n) PetscFunctionReturn(0); 987 988 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 989 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 990 tmp = a->solve_work; 991 992 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 993 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 994 995 /* forward solve the lower triangular */ 996 tmp[0] = b[*r++]; 997 tmps = tmp; 998 for (i=1; i<n; i++) { 999 v = aa + ai[i] ; 1000 vi = aj + ai[i] ; 1001 nz = a->diag[i] - ai[i]; 1002 sum = b[*r++]; 1003 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1004 tmp[i] = sum; 1005 } 1006 1007 /* backward solve the upper triangular */ 1008 for (i=n-1; i>=0; i--) { 1009 v = aa + a->diag[i] + 1; 1010 vi = aj + a->diag[i] + 1; 1011 nz = ai[i+1] - a->diag[i] - 1; 1012 sum = tmp[i]; 1013 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1014 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1015 } 1016 1017 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1018 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1019 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1020 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1021 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1027 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1028 { 1029 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1030 IS iscol = a->col,isrow = a->row; 1031 PetscErrorCode ierr; 1032 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1033 PetscInt nz,neq; 1034 const PetscInt *rout,*cout,*r,*c; 1035 PetscScalar *x,*b,*tmp,*tmps,sum; 1036 const MatScalar *aa = a->a,*v; 1037 PetscBool bisdense,xisdense; 1038 1039 PetscFunctionBegin; 1040 if (!n) PetscFunctionReturn(0); 1041 1042 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1043 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1044 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1045 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1046 1047 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1048 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1049 1050 tmp = a->solve_work; 1051 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1052 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1053 1054 for (neq=0; neq<B->cmap->n; neq++) { 1055 /* forward solve the lower triangular */ 1056 tmp[0] = b[r[0]]; 1057 tmps = tmp; 1058 for (i=1; i<n; i++) { 1059 v = aa + ai[i] ; 1060 vi = aj + ai[i] ; 1061 nz = a->diag[i] - ai[i]; 1062 sum = b[r[i]]; 1063 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1064 tmp[i] = sum; 1065 } 1066 /* backward solve the upper triangular */ 1067 for (i=n-1; i>=0; i--) { 1068 v = aa + a->diag[i] + 1; 1069 vi = aj + a->diag[i] + 1; 1070 nz = ai[i+1] - a->diag[i] - 1; 1071 sum = tmp[i]; 1072 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1073 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1074 } 1075 1076 b += n; 1077 x += n; 1078 } 1079 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1080 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1081 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1082 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1083 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1089 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1090 { 1091 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1092 IS iscol = a->col,isrow = a->row; 1093 PetscErrorCode ierr; 1094 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1095 PetscInt nz,neq; 1096 const PetscInt *rout,*cout,*r,*c; 1097 PetscScalar *x,*b,*tmp,sum; 1098 const MatScalar *aa = a->a,*v; 1099 PetscBool bisdense,xisdense; 1100 1101 PetscFunctionBegin; 1102 if (!n) PetscFunctionReturn(0); 1103 1104 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1105 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1106 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1107 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1108 1109 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1110 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1111 1112 tmp = a->solve_work; 1113 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1114 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1115 1116 for (neq=0; neq<B->cmap->n; neq++) { 1117 /* forward solve the lower triangular */ 1118 tmp[0] = b[r[0]]; 1119 v = aa; 1120 vi = aj; 1121 for (i=1; i<n; i++) { 1122 nz = ai[i+1] - ai[i]; 1123 sum = b[r[i]]; 1124 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1125 tmp[i] = sum; 1126 v += nz; vi += nz; 1127 } 1128 1129 /* backward solve the upper triangular */ 1130 for (i=n-1; i>=0; i--) { 1131 v = aa + adiag[i+1]+1; 1132 vi = aj + adiag[i+1]+1; 1133 nz = adiag[i]-adiag[i+1]-1; 1134 sum = tmp[i]; 1135 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1136 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1137 } 1138 1139 b += n; 1140 x += n; 1141 } 1142 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1143 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1144 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1145 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1146 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1152 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1153 { 1154 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1155 IS iscol = a->col,isrow = a->row; 1156 PetscErrorCode ierr; 1157 const PetscInt *r,*c,*rout,*cout; 1158 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1159 PetscInt nz,row; 1160 PetscScalar *x,*b,*tmp,*tmps,sum; 1161 const MatScalar *aa = a->a,*v; 1162 1163 PetscFunctionBegin; 1164 if (!n) PetscFunctionReturn(0); 1165 1166 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1167 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1168 tmp = a->solve_work; 1169 1170 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1171 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1172 1173 /* forward solve the lower triangular */ 1174 tmp[0] = b[*r++]; 1175 tmps = tmp; 1176 for (row=1; row<n; row++) { 1177 i = rout[row]; /* permuted row */ 1178 v = aa + ai[i] ; 1179 vi = aj + ai[i] ; 1180 nz = a->diag[i] - ai[i]; 1181 sum = b[*r++]; 1182 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1183 tmp[row] = sum; 1184 } 1185 1186 /* backward solve the upper triangular */ 1187 for (row=n-1; row>=0; row--) { 1188 i = rout[row]; /* permuted row */ 1189 v = aa + a->diag[i] + 1; 1190 vi = aj + a->diag[i] + 1; 1191 nz = ai[i+1] - a->diag[i] - 1; 1192 sum = tmp[row]; 1193 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1194 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1195 } 1196 1197 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1198 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1199 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1200 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1201 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 /* ----------------------------------------------------------- */ 1206 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1209 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1210 { 1211 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1212 PetscErrorCode ierr; 1213 PetscInt n = A->rmap->n; 1214 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1215 PetscScalar *x; 1216 const PetscScalar *b; 1217 const MatScalar *aa = a->a; 1218 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1219 PetscInt adiag_i,i,nz,ai_i; 1220 const PetscInt *vi; 1221 const MatScalar *v; 1222 PetscScalar sum; 1223 #endif 1224 1225 PetscFunctionBegin; 1226 if (!n) PetscFunctionReturn(0); 1227 1228 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1229 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1230 1231 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1232 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1233 #else 1234 /* forward solve the lower triangular */ 1235 x[0] = b[0]; 1236 for (i=1; i<n; i++) { 1237 ai_i = ai[i]; 1238 v = aa + ai_i; 1239 vi = aj + ai_i; 1240 nz = adiag[i] - ai_i; 1241 sum = b[i]; 1242 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1243 x[i] = sum; 1244 } 1245 1246 /* backward solve the upper triangular */ 1247 for (i=n-1; i>=0; i--) { 1248 adiag_i = adiag[i]; 1249 v = aa + adiag_i + 1; 1250 vi = aj + adiag_i + 1; 1251 nz = ai[i+1] - adiag_i - 1; 1252 sum = x[i]; 1253 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1254 x[i] = sum*aa[adiag_i]; 1255 } 1256 #endif 1257 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1258 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1259 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1265 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1266 { 1267 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1268 IS iscol = a->col,isrow = a->row; 1269 PetscErrorCode ierr; 1270 PetscInt i, n = A->rmap->n,j; 1271 PetscInt nz; 1272 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1273 PetscScalar *x,*tmp,sum; 1274 const PetscScalar *b; 1275 const MatScalar *aa = a->a,*v; 1276 1277 PetscFunctionBegin; 1278 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1279 1280 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1281 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1282 tmp = a->solve_work; 1283 1284 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1285 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1286 1287 /* forward solve the lower triangular */ 1288 tmp[0] = b[*r++]; 1289 for (i=1; i<n; i++) { 1290 v = aa + ai[i] ; 1291 vi = aj + ai[i] ; 1292 nz = a->diag[i] - ai[i]; 1293 sum = b[*r++]; 1294 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1295 tmp[i] = sum; 1296 } 1297 1298 /* backward solve the upper triangular */ 1299 for (i=n-1; i>=0; i--) { 1300 v = aa + a->diag[i] + 1; 1301 vi = aj + a->diag[i] + 1; 1302 nz = ai[i+1] - a->diag[i] - 1; 1303 sum = tmp[i]; 1304 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1305 tmp[i] = sum*aa[a->diag[i]]; 1306 x[*c--] += tmp[i]; 1307 } 1308 1309 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1310 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1311 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1312 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1313 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1314 1315 PetscFunctionReturn(0); 1316 } 1317 1318 #undef __FUNCT__ 1319 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1320 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1321 { 1322 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1323 IS iscol = a->col,isrow = a->row; 1324 PetscErrorCode ierr; 1325 PetscInt i, n = A->rmap->n,j; 1326 PetscInt nz; 1327 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1328 PetscScalar *x,*tmp,sum; 1329 const PetscScalar *b; 1330 const MatScalar *aa = a->a,*v; 1331 1332 PetscFunctionBegin; 1333 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1334 1335 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1336 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1337 tmp = a->solve_work; 1338 1339 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1340 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1341 1342 /* forward solve the lower triangular */ 1343 tmp[0] = b[r[0]]; 1344 v = aa; 1345 vi = aj; 1346 for (i=1; i<n; i++) { 1347 nz = ai[i+1] - ai[i]; 1348 sum = b[r[i]]; 1349 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1350 tmp[i] = sum; 1351 v += nz; vi += nz; 1352 } 1353 1354 /* backward solve the upper triangular */ 1355 v = aa + adiag[n-1]; 1356 vi = aj + adiag[n-1]; 1357 for (i=n-1; i>=0; i--) { 1358 nz = adiag[i] - adiag[i+1] - 1; 1359 sum = tmp[i]; 1360 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1361 tmp[i] = sum*v[nz]; 1362 x[c[i]] += tmp[i]; 1363 v += nz+1; vi += nz+1; 1364 } 1365 1366 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1367 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1368 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1369 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1370 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1371 1372 PetscFunctionReturn(0); 1373 } 1374 1375 #undef __FUNCT__ 1376 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1377 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1378 { 1379 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1380 IS iscol = a->col,isrow = a->row; 1381 PetscErrorCode ierr; 1382 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1383 PetscInt i,n = A->rmap->n,j; 1384 PetscInt nz; 1385 PetscScalar *x,*tmp,s1; 1386 const MatScalar *aa = a->a,*v; 1387 const PetscScalar *b; 1388 1389 PetscFunctionBegin; 1390 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1391 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1392 tmp = a->solve_work; 1393 1394 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1395 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1396 1397 /* copy the b into temp work space according to permutation */ 1398 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1399 1400 /* forward solve the U^T */ 1401 for (i=0; i<n; i++) { 1402 v = aa + diag[i] ; 1403 vi = aj + diag[i] + 1; 1404 nz = ai[i+1] - diag[i] - 1; 1405 s1 = tmp[i]; 1406 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1407 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1408 tmp[i] = s1; 1409 } 1410 1411 /* backward solve the L^T */ 1412 for (i=n-1; i>=0; i--) { 1413 v = aa + diag[i] - 1 ; 1414 vi = aj + diag[i] - 1 ; 1415 nz = diag[i] - ai[i]; 1416 s1 = tmp[i]; 1417 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1418 } 1419 1420 /* copy tmp into x according to permutation */ 1421 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1422 1423 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1424 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1425 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1426 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1427 1428 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1434 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1435 { 1436 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1437 IS iscol = a->col,isrow = a->row; 1438 PetscErrorCode ierr; 1439 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1440 PetscInt i,n = A->rmap->n,j; 1441 PetscInt nz; 1442 PetscScalar *x,*tmp,s1; 1443 const MatScalar *aa = a->a,*v; 1444 const PetscScalar *b; 1445 1446 PetscFunctionBegin; 1447 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1448 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1449 tmp = a->solve_work; 1450 1451 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1452 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1453 1454 /* copy the b into temp work space according to permutation */ 1455 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1456 1457 /* forward solve the U^T */ 1458 for (i=0; i<n; i++) { 1459 v = aa + adiag[i+1] + 1; 1460 vi = aj + adiag[i+1] + 1; 1461 nz = adiag[i] - adiag[i+1] - 1; 1462 s1 = tmp[i]; 1463 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1464 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1465 tmp[i] = s1; 1466 } 1467 1468 /* backward solve the L^T */ 1469 for (i=n-1; i>=0; i--) { 1470 v = aa + ai[i]; 1471 vi = aj + ai[i]; 1472 nz = ai[i+1] - ai[i]; 1473 s1 = tmp[i]; 1474 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1475 } 1476 1477 /* copy tmp into x according to permutation */ 1478 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1479 1480 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1481 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1482 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1483 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1484 1485 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1491 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1492 { 1493 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1494 IS iscol = a->col,isrow = a->row; 1495 PetscErrorCode ierr; 1496 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1497 PetscInt i,n = A->rmap->n,j; 1498 PetscInt nz; 1499 PetscScalar *x,*tmp,s1; 1500 const MatScalar *aa = a->a,*v; 1501 const PetscScalar *b; 1502 1503 PetscFunctionBegin; 1504 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1505 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1506 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1507 tmp = a->solve_work; 1508 1509 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1510 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1511 1512 /* copy the b into temp work space according to permutation */ 1513 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1514 1515 /* forward solve the U^T */ 1516 for (i=0; i<n; i++) { 1517 v = aa + diag[i] ; 1518 vi = aj + diag[i] + 1; 1519 nz = ai[i+1] - diag[i] - 1; 1520 s1 = tmp[i]; 1521 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1522 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1523 tmp[i] = s1; 1524 } 1525 1526 /* backward solve the L^T */ 1527 for (i=n-1; i>=0; i--) { 1528 v = aa + diag[i] - 1 ; 1529 vi = aj + diag[i] - 1 ; 1530 nz = diag[i] - ai[i]; 1531 s1 = tmp[i]; 1532 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1533 } 1534 1535 /* copy tmp into x according to permutation */ 1536 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1537 1538 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1539 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1540 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1541 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1542 1543 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1544 PetscFunctionReturn(0); 1545 } 1546 1547 #undef __FUNCT__ 1548 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1549 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1550 { 1551 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1552 IS iscol = a->col,isrow = a->row; 1553 PetscErrorCode ierr; 1554 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1555 PetscInt i,n = A->rmap->n,j; 1556 PetscInt nz; 1557 PetscScalar *x,*tmp,s1; 1558 const MatScalar *aa = a->a,*v; 1559 const PetscScalar *b; 1560 1561 PetscFunctionBegin; 1562 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1563 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1564 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1565 tmp = a->solve_work; 1566 1567 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1568 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1569 1570 /* copy the b into temp work space according to permutation */ 1571 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1572 1573 /* forward solve the U^T */ 1574 for (i=0; i<n; i++) { 1575 v = aa + adiag[i+1] + 1; 1576 vi = aj + adiag[i+1] + 1; 1577 nz = adiag[i] - adiag[i+1] - 1; 1578 s1 = tmp[i]; 1579 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1580 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1581 tmp[i] = s1; 1582 } 1583 1584 1585 /* backward solve the L^T */ 1586 for (i=n-1; i>=0; i--) { 1587 v = aa + ai[i] ; 1588 vi = aj + ai[i]; 1589 nz = ai[i+1] - ai[i]; 1590 s1 = tmp[i]; 1591 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1592 } 1593 1594 /* copy tmp into x according to permutation */ 1595 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1596 1597 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1598 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1599 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1600 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1601 1602 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1603 PetscFunctionReturn(0); 1604 } 1605 1606 /* ----------------------------------------------------------------*/ 1607 1608 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool ); 1609 1610 /* 1611 ilu() under revised new data structure. 1612 Factored arrays bj and ba are stored as 1613 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1614 1615 bi=fact->i is an array of size n+1, in which 1616 bi+ 1617 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1618 bi[n]: points to L(n-1,n-1)+1 1619 1620 bdiag=fact->diag is an array of size n+1,in which 1621 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1622 bdiag[n]: points to entry of U(n-1,0)-1 1623 1624 U(i,:) contains bdiag[i] as its last entry, i.e., 1625 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1626 */ 1627 #undef __FUNCT__ 1628 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1629 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1630 { 1631 1632 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1633 PetscErrorCode ierr; 1634 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1635 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1636 PetscBool missing; 1637 IS isicol; 1638 1639 PetscFunctionBegin; 1640 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1641 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1642 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1643 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1644 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1645 b = (Mat_SeqAIJ*)(fact)->data; 1646 1647 /* allocate matrix arrays for new data structure */ 1648 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1649 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1650 b->singlemalloc = PETSC_TRUE; 1651 if (!b->diag) { 1652 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1653 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1654 } 1655 bdiag = b->diag; 1656 1657 if (n > 0) { 1658 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1659 } 1660 1661 /* set bi and bj with new data structure */ 1662 bi = b->i; 1663 bj = b->j; 1664 1665 /* L part */ 1666 bi[0] = 0; 1667 for (i=0; i<n; i++) { 1668 nz = adiag[i] - ai[i]; 1669 bi[i+1] = bi[i] + nz; 1670 aj = a->j + ai[i]; 1671 for (j=0; j<nz; j++) { 1672 /* *bj = aj[j]; bj++; */ 1673 bj[k++] = aj[j]; 1674 } 1675 } 1676 1677 /* U part */ 1678 bdiag[n] = bi[n]-1; 1679 for (i=n-1; i>=0; i--) { 1680 nz = ai[i+1] - adiag[i] - 1; 1681 aj = a->j + adiag[i] + 1; 1682 for (j=0; j<nz; j++) { 1683 /* *bj = aj[j]; bj++; */ 1684 bj[k++] = aj[j]; 1685 } 1686 /* diag[i] */ 1687 /* *bj = i; bj++; */ 1688 bj[k++] = i; 1689 bdiag[i] = bdiag[i+1] + nz + 1; 1690 } 1691 1692 fact->factortype = MAT_FACTOR_ILU; 1693 fact->info.factor_mallocs = 0; 1694 fact->info.fill_ratio_given = info->fill; 1695 fact->info.fill_ratio_needed = 1.0; 1696 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1697 1698 b = (Mat_SeqAIJ*)(fact)->data; 1699 b->row = isrow; 1700 b->col = iscol; 1701 b->icol = isicol; 1702 ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1703 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1704 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1710 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1711 { 1712 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1713 IS isicol; 1714 PetscErrorCode ierr; 1715 const PetscInt *r,*ic; 1716 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1717 PetscInt *bi,*cols,nnz,*cols_lvl; 1718 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1719 PetscInt i,levels,diagonal_fill; 1720 PetscBool col_identity,row_identity; 1721 PetscReal f; 1722 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1723 PetscBT lnkbt; 1724 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1725 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1726 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1727 1728 PetscFunctionBegin; 1729 /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1730 /* 1731 PetscBool olddatastruct=PETSC_FALSE; 1732 ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 1733 if (olddatastruct) { 1734 ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 */ 1738 1739 levels = (PetscInt)info->levels; 1740 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1741 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1742 if (!levels && row_identity && col_identity) { 1743 /* special case: ilu(0) with natural ordering */ 1744 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1745 if (a->inode.size) { 1746 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1747 } 1748 PetscFunctionReturn(0); 1749 } 1750 1751 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1752 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1753 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1754 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1755 1756 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1757 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1758 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1759 bi[0] = bdiag[0] = 0; 1760 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1761 1762 /* create a linked list for storing column indices of the active row */ 1763 nlnk = n + 1; 1764 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1765 1766 /* initial FreeSpace size is f*(ai[n]+1) */ 1767 f = info->fill; 1768 diagonal_fill = (PetscInt)info->diagonal_fill; 1769 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1770 current_space = free_space; 1771 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1772 current_space_lvl = free_space_lvl; 1773 for (i=0; i<n; i++) { 1774 nzi = 0; 1775 /* copy current row into linked list */ 1776 nnz = ai[r[i]+1] - ai[r[i]]; 1777 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1778 cols = aj + ai[r[i]]; 1779 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1780 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1781 nzi += nlnk; 1782 1783 /* make sure diagonal entry is included */ 1784 if (diagonal_fill && lnk[i] == -1) { 1785 fm = n; 1786 while (lnk[fm] < i) fm = lnk[fm]; 1787 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1788 lnk[fm] = i; 1789 lnk_lvl[i] = 0; 1790 nzi++; dcount++; 1791 } 1792 1793 /* add pivot rows into the active row */ 1794 nzbd = 0; 1795 prow = lnk[n]; 1796 while (prow < i) { 1797 nnz = bdiag[prow]; 1798 cols = bj_ptr[prow] + nnz + 1; 1799 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1800 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1801 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1802 nzi += nlnk; 1803 prow = lnk[prow]; 1804 nzbd++; 1805 } 1806 bdiag[i] = nzbd; 1807 bi[i+1] = bi[i] + nzi; 1808 /* if free space is not available, make more free space */ 1809 if (current_space->local_remaining<nzi) { 1810 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1811 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1812 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1813 reallocs++; 1814 } 1815 1816 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1817 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1818 bj_ptr[i] = current_space->array; 1819 bjlvl_ptr[i] = current_space_lvl->array; 1820 1821 /* make sure the active row i has diagonal entry */ 1822 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1823 1824 current_space->array += nzi; 1825 current_space->local_used += nzi; 1826 current_space->local_remaining -= nzi; 1827 current_space_lvl->array += nzi; 1828 current_space_lvl->local_used += nzi; 1829 current_space_lvl->local_remaining -= nzi; 1830 } 1831 1832 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1833 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1834 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1835 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1836 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1837 1838 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1839 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1840 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1841 1842 #if defined(PETSC_USE_INFO) 1843 { 1844 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1845 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1846 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1847 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1848 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1849 if (diagonal_fill) { 1850 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1851 } 1852 } 1853 #endif 1854 /* put together the new matrix */ 1855 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1856 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1857 b = (Mat_SeqAIJ*)(fact)->data; 1858 b->free_a = PETSC_TRUE; 1859 b->free_ij = PETSC_TRUE; 1860 b->singlemalloc = PETSC_FALSE; 1861 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1862 b->j = bj; 1863 b->i = bi; 1864 b->diag = bdiag; 1865 b->ilen = 0; 1866 b->imax = 0; 1867 b->row = isrow; 1868 b->col = iscol; 1869 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1870 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1871 b->icol = isicol; 1872 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1873 /* In b structure: Free imax, ilen, old a, old j. 1874 Allocate bdiag, solve_work, new a, new j */ 1875 ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1876 b->maxnz = b->nz = bdiag[0]+1; 1877 (fact)->info.factor_mallocs = reallocs; 1878 (fact)->info.fill_ratio_given = f; 1879 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1880 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1881 if (a->inode.size) { 1882 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1883 } 1884 PetscFunctionReturn(0); 1885 } 1886 1887 #undef __FUNCT__ 1888 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1889 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1890 { 1891 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1892 IS isicol; 1893 PetscErrorCode ierr; 1894 const PetscInt *r,*ic; 1895 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1896 PetscInt *bi,*cols,nnz,*cols_lvl; 1897 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1898 PetscInt i,levels,diagonal_fill; 1899 PetscBool col_identity,row_identity; 1900 PetscReal f; 1901 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1902 PetscBT lnkbt; 1903 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1904 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1905 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1906 PetscBool missing; 1907 1908 PetscFunctionBegin; 1909 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1910 f = info->fill; 1911 levels = (PetscInt)info->levels; 1912 diagonal_fill = (PetscInt)info->diagonal_fill; 1913 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1914 1915 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1916 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1917 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1918 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1919 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1920 if (a->inode.size) { 1921 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1922 } 1923 fact->factortype = MAT_FACTOR_ILU; 1924 (fact)->info.factor_mallocs = 0; 1925 (fact)->info.fill_ratio_given = info->fill; 1926 (fact)->info.fill_ratio_needed = 1.0; 1927 b = (Mat_SeqAIJ*)(fact)->data; 1928 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1929 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1930 b->row = isrow; 1931 b->col = iscol; 1932 b->icol = isicol; 1933 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1934 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1935 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1936 PetscFunctionReturn(0); 1937 } 1938 1939 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1940 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1941 1942 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1943 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1944 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1945 bi[0] = bdiag[0] = 0; 1946 1947 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1948 1949 /* create a linked list for storing column indices of the active row */ 1950 nlnk = n + 1; 1951 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1952 1953 /* initial FreeSpace size is f*(ai[n]+1) */ 1954 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1955 current_space = free_space; 1956 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1957 current_space_lvl = free_space_lvl; 1958 1959 for (i=0; i<n; i++) { 1960 nzi = 0; 1961 /* copy current row into linked list */ 1962 nnz = ai[r[i]+1] - ai[r[i]]; 1963 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1964 cols = aj + ai[r[i]]; 1965 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1966 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1967 nzi += nlnk; 1968 1969 /* make sure diagonal entry is included */ 1970 if (diagonal_fill && lnk[i] == -1) { 1971 fm = n; 1972 while (lnk[fm] < i) fm = lnk[fm]; 1973 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1974 lnk[fm] = i; 1975 lnk_lvl[i] = 0; 1976 nzi++; dcount++; 1977 } 1978 1979 /* add pivot rows into the active row */ 1980 nzbd = 0; 1981 prow = lnk[n]; 1982 while (prow < i) { 1983 nnz = bdiag[prow]; 1984 cols = bj_ptr[prow] + nnz + 1; 1985 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1986 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1987 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1988 nzi += nlnk; 1989 prow = lnk[prow]; 1990 nzbd++; 1991 } 1992 bdiag[i] = nzbd; 1993 bi[i+1] = bi[i] + nzi; 1994 1995 /* if free space is not available, make more free space */ 1996 if (current_space->local_remaining<nzi) { 1997 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1998 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1999 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2000 reallocs++; 2001 } 2002 2003 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2004 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2005 bj_ptr[i] = current_space->array; 2006 bjlvl_ptr[i] = current_space_lvl->array; 2007 2008 /* make sure the active row i has diagonal entry */ 2009 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2010 2011 current_space->array += nzi; 2012 current_space->local_used += nzi; 2013 current_space->local_remaining -= nzi; 2014 current_space_lvl->array += nzi; 2015 current_space_lvl->local_used += nzi; 2016 current_space_lvl->local_remaining -= nzi; 2017 } 2018 2019 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2020 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2021 2022 /* destroy list of free space and other temporary arrays */ 2023 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2024 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2025 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2026 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2027 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2028 2029 #if defined(PETSC_USE_INFO) 2030 { 2031 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2032 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2033 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2034 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2035 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2036 if (diagonal_fill) { 2037 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2038 } 2039 } 2040 #endif 2041 2042 /* put together the new matrix */ 2043 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2044 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2045 b = (Mat_SeqAIJ*)(fact)->data; 2046 b->free_a = PETSC_TRUE; 2047 b->free_ij = PETSC_TRUE; 2048 b->singlemalloc = PETSC_FALSE; 2049 ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2050 b->j = bj; 2051 b->i = bi; 2052 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2053 b->diag = bdiag; 2054 b->ilen = 0; 2055 b->imax = 0; 2056 b->row = isrow; 2057 b->col = iscol; 2058 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2059 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2060 b->icol = isicol; 2061 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2062 /* In b structure: Free imax, ilen, old a, old j. 2063 Allocate bdiag, solve_work, new a, new j */ 2064 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2065 b->maxnz = b->nz = bi[n] ; 2066 (fact)->info.factor_mallocs = reallocs; 2067 (fact)->info.fill_ratio_given = f; 2068 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2069 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2070 if (a->inode.size) { 2071 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2072 } 2073 PetscFunctionReturn(0); 2074 } 2075 2076 #undef __FUNCT__ 2077 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2078 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2079 { 2080 Mat C = B; 2081 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2082 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2083 IS ip=b->row,iip = b->icol; 2084 PetscErrorCode ierr; 2085 const PetscInt *rip,*riip; 2086 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2087 PetscInt *ai=a->i,*aj=a->j; 2088 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2089 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2090 PetscBool perm_identity; 2091 FactorShiftCtx sctx; 2092 PetscReal rs; 2093 MatScalar d,*v; 2094 2095 PetscFunctionBegin; 2096 /* MatPivotSetUp(): initialize shift context sctx */ 2097 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2098 2099 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2100 sctx.shift_top = info->zeropivot; 2101 for (i=0; i<mbs; i++) { 2102 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2103 d = (aa)[a->diag[i]]; 2104 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2105 v = aa+ai[i]; 2106 nz = ai[i+1] - ai[i]; 2107 for (j=0; j<nz; j++) 2108 rs += PetscAbsScalar(v[j]); 2109 if (rs>sctx.shift_top) sctx.shift_top = rs; 2110 } 2111 sctx.shift_top *= 1.1; 2112 sctx.nshift_max = 5; 2113 sctx.shift_lo = 0.; 2114 sctx.shift_hi = 1.; 2115 } 2116 2117 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2118 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2119 2120 /* allocate working arrays 2121 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2122 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2123 */ 2124 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 2125 2126 do { 2127 sctx.newshift = PETSC_FALSE; 2128 2129 for (i=0; i<mbs; i++) c2r[i] = mbs; 2130 if (mbs) il[0] = 0; 2131 2132 for (k = 0; k<mbs; k++) { 2133 /* zero rtmp */ 2134 nz = bi[k+1] - bi[k]; 2135 bjtmp = bj + bi[k]; 2136 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2137 2138 /* load in initial unfactored row */ 2139 bval = ba + bi[k]; 2140 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2141 for (j = jmin; j < jmax; j++) { 2142 col = riip[aj[j]]; 2143 if (col >= k) { /* only take upper triangular entry */ 2144 rtmp[col] = aa[j]; 2145 *bval++ = 0.0; /* for in-place factorization */ 2146 } 2147 } 2148 /* shift the diagonal of the matrix: ZeropivotApply() */ 2149 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2150 2151 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2152 dk = rtmp[k]; 2153 i = c2r[k]; /* first row to be added to k_th row */ 2154 2155 while (i < k) { 2156 nexti = c2r[i]; /* next row to be added to k_th row */ 2157 2158 /* compute multiplier, update diag(k) and U(i,k) */ 2159 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2160 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2161 dk += uikdi*ba[ili]; /* update diag[k] */ 2162 ba[ili] = uikdi; /* -U(i,k) */ 2163 2164 /* add multiple of row i to k-th row */ 2165 jmin = ili + 1; jmax = bi[i+1]; 2166 if (jmin < jmax) { 2167 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2168 /* update il and c2r for row i */ 2169 il[i] = jmin; 2170 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2171 } 2172 i = nexti; 2173 } 2174 2175 /* copy data into U(k,:) */ 2176 rs = 0.0; 2177 jmin = bi[k]; jmax = bi[k+1]-1; 2178 if (jmin < jmax) { 2179 for (j=jmin; j<jmax; j++) { 2180 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2181 } 2182 /* add the k-th row into il and c2r */ 2183 il[k] = jmin; 2184 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2185 } 2186 2187 /* MatPivotCheck() */ 2188 sctx.rs = rs; 2189 sctx.pv = dk; 2190 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2191 if (sctx.newshift) break; 2192 dk = sctx.pv; 2193 2194 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2195 } 2196 } while (sctx.newshift); 2197 2198 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2199 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2200 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2201 2202 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2203 if (perm_identity) { 2204 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2205 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2206 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2207 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2208 } else { 2209 B->ops->solve = MatSolve_SeqSBAIJ_1; 2210 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2211 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2212 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2213 } 2214 2215 C->assembled = PETSC_TRUE; 2216 C->preallocated = PETSC_TRUE; 2217 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2218 2219 /* MatPivotView() */ 2220 if (sctx.nshift) { 2221 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2222 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2223 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2224 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2225 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2226 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 2227 } 2228 } 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2234 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2235 { 2236 Mat C = B; 2237 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2238 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2239 IS ip=b->row,iip = b->icol; 2240 PetscErrorCode ierr; 2241 const PetscInt *rip,*riip; 2242 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2243 PetscInt *ai=a->i,*aj=a->j; 2244 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2245 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2246 PetscBool perm_identity; 2247 FactorShiftCtx sctx; 2248 PetscReal rs; 2249 MatScalar d,*v; 2250 2251 PetscFunctionBegin; 2252 /* MatPivotSetUp(): initialize shift context sctx */ 2253 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2254 2255 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2256 sctx.shift_top = info->zeropivot; 2257 for (i=0; i<mbs; i++) { 2258 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2259 d = (aa)[a->diag[i]]; 2260 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2261 v = aa+ai[i]; 2262 nz = ai[i+1] - ai[i]; 2263 for (j=0; j<nz; j++) 2264 rs += PetscAbsScalar(v[j]); 2265 if (rs>sctx.shift_top) sctx.shift_top = rs; 2266 } 2267 sctx.shift_top *= 1.1; 2268 sctx.nshift_max = 5; 2269 sctx.shift_lo = 0.; 2270 sctx.shift_hi = 1.; 2271 } 2272 2273 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2274 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2275 2276 /* initialization */ 2277 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2278 2279 do { 2280 sctx.newshift = PETSC_FALSE; 2281 2282 for (i=0; i<mbs; i++) jl[i] = mbs; 2283 il[0] = 0; 2284 2285 for (k = 0; k<mbs; k++) { 2286 /* zero rtmp */ 2287 nz = bi[k+1] - bi[k]; 2288 bjtmp = bj + bi[k]; 2289 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2290 2291 bval = ba + bi[k]; 2292 /* initialize k-th row by the perm[k]-th row of A */ 2293 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2294 for (j = jmin; j < jmax; j++) { 2295 col = riip[aj[j]]; 2296 if (col >= k) { /* only take upper triangular entry */ 2297 rtmp[col] = aa[j]; 2298 *bval++ = 0.0; /* for in-place factorization */ 2299 } 2300 } 2301 /* shift the diagonal of the matrix */ 2302 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2303 2304 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2305 dk = rtmp[k]; 2306 i = jl[k]; /* first row to be added to k_th row */ 2307 2308 while (i < k) { 2309 nexti = jl[i]; /* next row to be added to k_th row */ 2310 2311 /* compute multiplier, update diag(k) and U(i,k) */ 2312 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2313 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2314 dk += uikdi*ba[ili]; 2315 ba[ili] = uikdi; /* -U(i,k) */ 2316 2317 /* add multiple of row i to k-th row */ 2318 jmin = ili + 1; jmax = bi[i+1]; 2319 if (jmin < jmax) { 2320 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2321 /* update il and jl for row i */ 2322 il[i] = jmin; 2323 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2324 } 2325 i = nexti; 2326 } 2327 2328 /* shift the diagonals when zero pivot is detected */ 2329 /* compute rs=sum of abs(off-diagonal) */ 2330 rs = 0.0; 2331 jmin = bi[k]+1; 2332 nz = bi[k+1] - jmin; 2333 bcol = bj + jmin; 2334 for (j=0; j<nz; j++) { 2335 rs += PetscAbsScalar(rtmp[bcol[j]]); 2336 } 2337 2338 sctx.rs = rs; 2339 sctx.pv = dk; 2340 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2341 if (sctx.newshift) break; 2342 dk = sctx.pv; 2343 2344 /* copy data into U(k,:) */ 2345 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2346 jmin = bi[k]+1; jmax = bi[k+1]; 2347 if (jmin < jmax) { 2348 for (j=jmin; j<jmax; j++) { 2349 col = bj[j]; ba[j] = rtmp[col]; 2350 } 2351 /* add the k-th row into il and jl */ 2352 il[k] = jmin; 2353 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2354 } 2355 } 2356 } while (sctx.newshift); 2357 2358 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2359 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2360 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2361 2362 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2363 if (perm_identity) { 2364 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2365 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2366 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2367 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2368 } else { 2369 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2370 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2371 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2372 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2373 } 2374 2375 C->assembled = PETSC_TRUE; 2376 C->preallocated = PETSC_TRUE; 2377 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2378 if (sctx.nshift) { 2379 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2380 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2381 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2382 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2383 } 2384 } 2385 PetscFunctionReturn(0); 2386 } 2387 2388 /* 2389 icc() under revised new data structure. 2390 Factored arrays bj and ba are stored as 2391 U(0,:),...,U(i,:),U(n-1,:) 2392 2393 ui=fact->i is an array of size n+1, in which 2394 ui+ 2395 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2396 ui[n]: points to U(n-1,n-1)+1 2397 2398 udiag=fact->diag is an array of size n,in which 2399 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2400 2401 U(i,:) contains udiag[i] as its last entry, i.e., 2402 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2403 */ 2404 2405 #undef __FUNCT__ 2406 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2407 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2408 { 2409 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2410 Mat_SeqSBAIJ *b; 2411 PetscErrorCode ierr; 2412 PetscBool perm_identity,missing; 2413 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2414 const PetscInt *rip,*riip; 2415 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2416 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2417 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2418 PetscReal fill=info->fill,levels=info->levels; 2419 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2420 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2421 PetscBT lnkbt; 2422 IS iperm; 2423 2424 PetscFunctionBegin; 2425 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2426 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2427 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2428 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2429 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2430 2431 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2432 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2433 ui[0] = 0; 2434 2435 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2436 if (!levels && perm_identity) { 2437 for (i=0; i<am; i++) { 2438 ncols = ai[i+1] - a->diag[i]; 2439 ui[i+1] = ui[i] + ncols; 2440 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2441 } 2442 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2443 cols = uj; 2444 for (i=0; i<am; i++) { 2445 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2446 ncols = ai[i+1] - a->diag[i] -1; 2447 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2448 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2449 } 2450 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2451 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2452 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2453 2454 /* initialization */ 2455 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2456 2457 /* jl: linked list for storing indices of the pivot rows 2458 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2459 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2460 for (i=0; i<am; i++) { 2461 jl[i] = am; il[i] = 0; 2462 } 2463 2464 /* create and initialize a linked list for storing column indices of the active row k */ 2465 nlnk = am + 1; 2466 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2467 2468 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2469 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2470 current_space = free_space; 2471 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2472 current_space_lvl = free_space_lvl; 2473 2474 for (k=0; k<am; k++) { /* for each active row k */ 2475 /* initialize lnk by the column indices of row rip[k] of A */ 2476 nzk = 0; 2477 ncols = ai[rip[k]+1] - ai[rip[k]]; 2478 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2479 ncols_upper = 0; 2480 for (j=0; j<ncols; j++) { 2481 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2482 if (riip[i] >= k) { /* only take upper triangular entry */ 2483 ajtmp[ncols_upper] = i; 2484 ncols_upper++; 2485 } 2486 } 2487 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2488 nzk += nlnk; 2489 2490 /* update lnk by computing fill-in for each pivot row to be merged in */ 2491 prow = jl[k]; /* 1st pivot row */ 2492 2493 while (prow < k) { 2494 nextprow = jl[prow]; 2495 2496 /* merge prow into k-th row */ 2497 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2498 jmax = ui[prow+1]; 2499 ncols = jmax-jmin; 2500 i = jmin - ui[prow]; 2501 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2502 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2503 j = *(uj - 1); 2504 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2505 nzk += nlnk; 2506 2507 /* update il and jl for prow */ 2508 if (jmin < jmax) { 2509 il[prow] = jmin; 2510 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2511 } 2512 prow = nextprow; 2513 } 2514 2515 /* if free space is not available, make more free space */ 2516 if (current_space->local_remaining<nzk) { 2517 i = am - k + 1; /* num of unfactored rows */ 2518 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2519 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2520 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2521 reallocs++; 2522 } 2523 2524 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2525 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2526 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2527 2528 /* add the k-th row into il and jl */ 2529 if (nzk > 1) { 2530 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2531 jl[k] = jl[i]; jl[i] = k; 2532 il[k] = ui[k] + 1; 2533 } 2534 uj_ptr[k] = current_space->array; 2535 uj_lvl_ptr[k] = current_space_lvl->array; 2536 2537 current_space->array += nzk; 2538 current_space->local_used += nzk; 2539 current_space->local_remaining -= nzk; 2540 2541 current_space_lvl->array += nzk; 2542 current_space_lvl->local_used += nzk; 2543 current_space_lvl->local_remaining -= nzk; 2544 2545 ui[k+1] = ui[k] + nzk; 2546 } 2547 2548 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2549 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2550 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2551 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2552 2553 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2554 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2555 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2556 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2557 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2558 2559 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2560 2561 /* put together the new matrix in MATSEQSBAIJ format */ 2562 b = (Mat_SeqSBAIJ*)(fact)->data; 2563 b->singlemalloc = PETSC_FALSE; 2564 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2565 b->j = uj; 2566 b->i = ui; 2567 b->diag = udiag; 2568 b->free_diag = PETSC_TRUE; 2569 b->ilen = 0; 2570 b->imax = 0; 2571 b->row = perm; 2572 b->col = perm; 2573 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2574 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2575 b->icol = iperm; 2576 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2577 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2578 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2579 b->maxnz = b->nz = ui[am]; 2580 b->free_a = PETSC_TRUE; 2581 b->free_ij = PETSC_TRUE; 2582 2583 fact->info.factor_mallocs = reallocs; 2584 fact->info.fill_ratio_given = fill; 2585 if (ai[am] != 0) { 2586 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2587 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2588 } else { 2589 fact->info.fill_ratio_needed = 0.0; 2590 } 2591 #if defined(PETSC_USE_INFO) 2592 if (ai[am] != 0) { 2593 PetscReal af = fact->info.fill_ratio_needed; 2594 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2595 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2596 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2597 } else { 2598 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2599 } 2600 #endif 2601 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2602 PetscFunctionReturn(0); 2603 } 2604 2605 #undef __FUNCT__ 2606 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2607 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2608 { 2609 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2610 Mat_SeqSBAIJ *b; 2611 PetscErrorCode ierr; 2612 PetscBool perm_identity,missing; 2613 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2614 const PetscInt *rip,*riip; 2615 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2616 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2617 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2618 PetscReal fill=info->fill,levels=info->levels; 2619 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2620 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2621 PetscBT lnkbt; 2622 IS iperm; 2623 2624 PetscFunctionBegin; 2625 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2626 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2627 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2628 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2629 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2630 2631 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2632 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2633 ui[0] = 0; 2634 2635 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2636 if (!levels && perm_identity) { 2637 2638 for (i=0; i<am; i++) { 2639 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2640 udiag[i] = ui[i]; 2641 } 2642 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2643 cols = uj; 2644 for (i=0; i<am; i++) { 2645 aj = a->j + a->diag[i]; 2646 ncols = ui[i+1] - ui[i]; 2647 for (j=0; j<ncols; j++) *cols++ = *aj++; 2648 } 2649 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2650 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2651 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2652 2653 /* initialization */ 2654 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2655 2656 /* jl: linked list for storing indices of the pivot rows 2657 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2658 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2659 for (i=0; i<am; i++) { 2660 jl[i] = am; il[i] = 0; 2661 } 2662 2663 /* create and initialize a linked list for storing column indices of the active row k */ 2664 nlnk = am + 1; 2665 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2666 2667 /* initial FreeSpace size is fill*(ai[am]+1) */ 2668 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2669 current_space = free_space; 2670 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2671 current_space_lvl = free_space_lvl; 2672 2673 for (k=0; k<am; k++) { /* for each active row k */ 2674 /* initialize lnk by the column indices of row rip[k] of A */ 2675 nzk = 0; 2676 ncols = ai[rip[k]+1] - ai[rip[k]]; 2677 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2678 ncols_upper = 0; 2679 for (j=0; j<ncols; j++) { 2680 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2681 if (riip[i] >= k) { /* only take upper triangular entry */ 2682 ajtmp[ncols_upper] = i; 2683 ncols_upper++; 2684 } 2685 } 2686 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2687 nzk += nlnk; 2688 2689 /* update lnk by computing fill-in for each pivot row to be merged in */ 2690 prow = jl[k]; /* 1st pivot row */ 2691 2692 while (prow < k) { 2693 nextprow = jl[prow]; 2694 2695 /* merge prow into k-th row */ 2696 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2697 jmax = ui[prow+1]; 2698 ncols = jmax-jmin; 2699 i = jmin - ui[prow]; 2700 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2701 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2702 j = *(uj - 1); 2703 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2704 nzk += nlnk; 2705 2706 /* update il and jl for prow */ 2707 if (jmin < jmax) { 2708 il[prow] = jmin; 2709 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2710 } 2711 prow = nextprow; 2712 } 2713 2714 /* if free space is not available, make more free space */ 2715 if (current_space->local_remaining<nzk) { 2716 i = am - k + 1; /* num of unfactored rows */ 2717 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2718 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2719 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2720 reallocs++; 2721 } 2722 2723 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2724 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2725 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2726 2727 /* add the k-th row into il and jl */ 2728 if (nzk > 1) { 2729 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2730 jl[k] = jl[i]; jl[i] = k; 2731 il[k] = ui[k] + 1; 2732 } 2733 uj_ptr[k] = current_space->array; 2734 uj_lvl_ptr[k] = current_space_lvl->array; 2735 2736 current_space->array += nzk; 2737 current_space->local_used += nzk; 2738 current_space->local_remaining -= nzk; 2739 2740 current_space_lvl->array += nzk; 2741 current_space_lvl->local_used += nzk; 2742 current_space_lvl->local_remaining -= nzk; 2743 2744 ui[k+1] = ui[k] + nzk; 2745 } 2746 2747 #if defined(PETSC_USE_INFO) 2748 if (ai[am] != 0) { 2749 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2750 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2751 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2752 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2753 } else { 2754 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2755 } 2756 #endif 2757 2758 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2759 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2760 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2761 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2762 2763 /* destroy list of free space and other temporary array(s) */ 2764 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2765 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2766 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2767 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2768 2769 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2770 2771 /* put together the new matrix in MATSEQSBAIJ format */ 2772 2773 b = (Mat_SeqSBAIJ*)fact->data; 2774 b->singlemalloc = PETSC_FALSE; 2775 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2776 b->j = uj; 2777 b->i = ui; 2778 b->diag = udiag; 2779 b->free_diag = PETSC_TRUE; 2780 b->ilen = 0; 2781 b->imax = 0; 2782 b->row = perm; 2783 b->col = perm; 2784 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2785 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2786 b->icol = iperm; 2787 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2788 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2789 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2790 b->maxnz = b->nz = ui[am]; 2791 b->free_a = PETSC_TRUE; 2792 b->free_ij = PETSC_TRUE; 2793 2794 fact->info.factor_mallocs = reallocs; 2795 fact->info.fill_ratio_given = fill; 2796 if (ai[am] != 0) { 2797 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2798 } else { 2799 fact->info.fill_ratio_needed = 0.0; 2800 } 2801 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2802 PetscFunctionReturn(0); 2803 } 2804 2805 #undef __FUNCT__ 2806 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2807 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2808 { 2809 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2810 Mat_SeqSBAIJ *b; 2811 PetscErrorCode ierr; 2812 PetscBool perm_identity; 2813 PetscReal fill = info->fill; 2814 const PetscInt *rip,*riip; 2815 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2816 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2817 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2818 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2819 PetscBT lnkbt; 2820 IS iperm; 2821 2822 PetscFunctionBegin; 2823 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2824 /* check whether perm is the identity mapping */ 2825 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2826 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2827 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2828 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2829 2830 /* initialization */ 2831 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2832 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2833 ui[0] = 0; 2834 2835 /* jl: linked list for storing indices of the pivot rows 2836 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2837 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2838 for (i=0; i<am; i++) { 2839 jl[i] = am; il[i] = 0; 2840 } 2841 2842 /* create and initialize a linked list for storing column indices of the active row k */ 2843 nlnk = am + 1; 2844 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2845 2846 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2847 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2848 current_space = free_space; 2849 2850 for (k=0; k<am; k++) { /* for each active row k */ 2851 /* initialize lnk by the column indices of row rip[k] of A */ 2852 nzk = 0; 2853 ncols = ai[rip[k]+1] - ai[rip[k]]; 2854 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2855 ncols_upper = 0; 2856 for (j=0; j<ncols; j++) { 2857 i = riip[*(aj + ai[rip[k]] + j)]; 2858 if (i >= k) { /* only take upper triangular entry */ 2859 cols[ncols_upper] = i; 2860 ncols_upper++; 2861 } 2862 } 2863 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2864 nzk += nlnk; 2865 2866 /* update lnk by computing fill-in for each pivot row to be merged in */ 2867 prow = jl[k]; /* 1st pivot row */ 2868 2869 while (prow < k) { 2870 nextprow = jl[prow]; 2871 /* merge prow into k-th row */ 2872 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2873 jmax = ui[prow+1]; 2874 ncols = jmax-jmin; 2875 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2876 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2877 nzk += nlnk; 2878 2879 /* update il and jl for prow */ 2880 if (jmin < jmax) { 2881 il[prow] = jmin; 2882 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2883 } 2884 prow = nextprow; 2885 } 2886 2887 /* if free space is not available, make more free space */ 2888 if (current_space->local_remaining<nzk) { 2889 i = am - k + 1; /* num of unfactored rows */ 2890 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2891 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2892 reallocs++; 2893 } 2894 2895 /* copy data into free space, then initialize lnk */ 2896 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2897 2898 /* add the k-th row into il and jl */ 2899 if (nzk > 1) { 2900 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2901 jl[k] = jl[i]; jl[i] = k; 2902 il[k] = ui[k] + 1; 2903 } 2904 ui_ptr[k] = current_space->array; 2905 current_space->array += nzk; 2906 current_space->local_used += nzk; 2907 current_space->local_remaining -= nzk; 2908 2909 ui[k+1] = ui[k] + nzk; 2910 } 2911 2912 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2913 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2914 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2915 2916 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2917 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2918 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2919 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2920 2921 /* put together the new matrix in MATSEQSBAIJ format */ 2922 2923 b = (Mat_SeqSBAIJ*)fact->data; 2924 b->singlemalloc = PETSC_FALSE; 2925 b->free_a = PETSC_TRUE; 2926 b->free_ij = PETSC_TRUE; 2927 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2928 b->j = uj; 2929 b->i = ui; 2930 b->diag = udiag; 2931 b->free_diag = PETSC_TRUE; 2932 b->ilen = 0; 2933 b->imax = 0; 2934 b->row = perm; 2935 b->col = perm; 2936 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2937 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2938 b->icol = iperm; 2939 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2940 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2941 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2942 b->maxnz = b->nz = ui[am]; 2943 2944 fact->info.factor_mallocs = reallocs; 2945 fact->info.fill_ratio_given = fill; 2946 if (ai[am] != 0) { 2947 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2948 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2949 } else { 2950 fact->info.fill_ratio_needed = 0.0; 2951 } 2952 #if defined(PETSC_USE_INFO) 2953 if (ai[am] != 0) { 2954 PetscReal af = fact->info.fill_ratio_needed; 2955 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2956 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2957 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2958 } else { 2959 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2960 } 2961 #endif 2962 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2963 PetscFunctionReturn(0); 2964 } 2965 2966 #undef __FUNCT__ 2967 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2968 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2969 { 2970 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2971 Mat_SeqSBAIJ *b; 2972 PetscErrorCode ierr; 2973 PetscBool perm_identity; 2974 PetscReal fill = info->fill; 2975 const PetscInt *rip,*riip; 2976 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2977 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2978 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2979 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2980 PetscBT lnkbt; 2981 IS iperm; 2982 2983 PetscFunctionBegin; 2984 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2985 /* check whether perm is the identity mapping */ 2986 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2987 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2988 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2989 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2990 2991 /* initialization */ 2992 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2993 ui[0] = 0; 2994 2995 /* jl: linked list for storing indices of the pivot rows 2996 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2997 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2998 for (i=0; i<am; i++) { 2999 jl[i] = am; il[i] = 0; 3000 } 3001 3002 /* create and initialize a linked list for storing column indices of the active row k */ 3003 nlnk = am + 1; 3004 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3005 3006 /* initial FreeSpace size is fill*(ai[am]+1) */ 3007 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3008 current_space = free_space; 3009 3010 for (k=0; k<am; k++) { /* for each active row k */ 3011 /* initialize lnk by the column indices of row rip[k] of A */ 3012 nzk = 0; 3013 ncols = ai[rip[k]+1] - ai[rip[k]]; 3014 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3015 ncols_upper = 0; 3016 for (j=0; j<ncols; j++) { 3017 i = riip[*(aj + ai[rip[k]] + j)]; 3018 if (i >= k) { /* only take upper triangular entry */ 3019 cols[ncols_upper] = i; 3020 ncols_upper++; 3021 } 3022 } 3023 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3024 nzk += nlnk; 3025 3026 /* update lnk by computing fill-in for each pivot row to be merged in */ 3027 prow = jl[k]; /* 1st pivot row */ 3028 3029 while (prow < k) { 3030 nextprow = jl[prow]; 3031 /* merge prow into k-th row */ 3032 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3033 jmax = ui[prow+1]; 3034 ncols = jmax-jmin; 3035 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3036 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3037 nzk += nlnk; 3038 3039 /* update il and jl for prow */ 3040 if (jmin < jmax) { 3041 il[prow] = jmin; 3042 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3043 } 3044 prow = nextprow; 3045 } 3046 3047 /* if free space is not available, make more free space */ 3048 if (current_space->local_remaining<nzk) { 3049 i = am - k + 1; /* num of unfactored rows */ 3050 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3051 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3052 reallocs++; 3053 } 3054 3055 /* copy data into free space, then initialize lnk */ 3056 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3057 3058 /* add the k-th row into il and jl */ 3059 if (nzk-1 > 0) { 3060 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3061 jl[k] = jl[i]; jl[i] = k; 3062 il[k] = ui[k] + 1; 3063 } 3064 ui_ptr[k] = current_space->array; 3065 current_space->array += nzk; 3066 current_space->local_used += nzk; 3067 current_space->local_remaining -= nzk; 3068 3069 ui[k+1] = ui[k] + nzk; 3070 } 3071 3072 #if defined(PETSC_USE_INFO) 3073 if (ai[am] != 0) { 3074 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3075 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3076 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3077 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3078 } else { 3079 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3080 } 3081 #endif 3082 3083 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3084 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3085 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3086 3087 /* destroy list of free space and other temporary array(s) */ 3088 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3089 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3090 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3091 3092 /* put together the new matrix in MATSEQSBAIJ format */ 3093 3094 b = (Mat_SeqSBAIJ*)fact->data; 3095 b->singlemalloc = PETSC_FALSE; 3096 b->free_a = PETSC_TRUE; 3097 b->free_ij = PETSC_TRUE; 3098 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3099 b->j = uj; 3100 b->i = ui; 3101 b->diag = 0; 3102 b->ilen = 0; 3103 b->imax = 0; 3104 b->row = perm; 3105 b->col = perm; 3106 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3107 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3108 b->icol = iperm; 3109 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3110 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3111 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3112 b->maxnz = b->nz = ui[am]; 3113 3114 fact->info.factor_mallocs = reallocs; 3115 fact->info.fill_ratio_given = fill; 3116 if (ai[am] != 0) { 3117 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3118 } else { 3119 fact->info.fill_ratio_needed = 0.0; 3120 } 3121 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3122 PetscFunctionReturn(0); 3123 } 3124 3125 #undef __FUNCT__ 3126 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3127 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3128 { 3129 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3130 PetscErrorCode ierr; 3131 PetscInt n = A->rmap->n; 3132 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3133 PetscScalar *x,sum; 3134 const PetscScalar *b; 3135 const MatScalar *aa = a->a,*v; 3136 PetscInt i,nz; 3137 3138 PetscFunctionBegin; 3139 if (!n) PetscFunctionReturn(0); 3140 3141 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3142 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3143 3144 /* forward solve the lower triangular */ 3145 x[0] = b[0]; 3146 v = aa; 3147 vi = aj; 3148 for (i=1; i<n; i++) { 3149 nz = ai[i+1] - ai[i]; 3150 sum = b[i]; 3151 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3152 v += nz; 3153 vi += nz; 3154 x[i] = sum; 3155 } 3156 3157 /* backward solve the upper triangular */ 3158 for (i=n-1; i>=0; i--) { 3159 v = aa + adiag[i+1] + 1; 3160 vi = aj + adiag[i+1] + 1; 3161 nz = adiag[i] - adiag[i+1]-1; 3162 sum = x[i]; 3163 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3164 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3165 } 3166 3167 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3168 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3169 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3170 PetscFunctionReturn(0); 3171 } 3172 3173 #undef __FUNCT__ 3174 #define __FUNCT__ "MatSolve_SeqAIJ" 3175 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3176 { 3177 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3178 IS iscol = a->col,isrow = a->row; 3179 PetscErrorCode ierr; 3180 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3181 const PetscInt *rout,*cout,*r,*c; 3182 PetscScalar *x,*tmp,sum; 3183 const PetscScalar *b; 3184 const MatScalar *aa = a->a,*v; 3185 3186 PetscFunctionBegin; 3187 if (!n) PetscFunctionReturn(0); 3188 3189 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3190 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3191 tmp = a->solve_work; 3192 3193 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3194 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3195 3196 /* forward solve the lower triangular */ 3197 tmp[0] = b[r[0]]; 3198 v = aa; 3199 vi = aj; 3200 for (i=1; i<n; i++) { 3201 nz = ai[i+1] - ai[i]; 3202 sum = b[r[i]]; 3203 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3204 tmp[i] = sum; 3205 v += nz; vi += nz; 3206 } 3207 3208 /* backward solve the upper triangular */ 3209 for (i=n-1; i>=0; i--) { 3210 v = aa + adiag[i+1]+1; 3211 vi = aj + adiag[i+1]+1; 3212 nz = adiag[i]-adiag[i+1]-1; 3213 sum = tmp[i]; 3214 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3215 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3216 } 3217 3218 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3219 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3220 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3221 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3222 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3223 PetscFunctionReturn(0); 3224 } 3225 3226 #undef __FUNCT__ 3227 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3228 /* 3229 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3230 */ 3231 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3232 { 3233 Mat B = *fact; 3234 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3235 IS isicol; 3236 PetscErrorCode ierr; 3237 const PetscInt *r,*ic; 3238 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3239 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3240 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3241 PetscInt nlnk,*lnk; 3242 PetscBT lnkbt; 3243 PetscBool row_identity,icol_identity; 3244 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3245 const PetscInt *ics; 3246 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3247 PetscReal dt=info->dt,shift=info->shiftamount; 3248 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3249 PetscBool missing; 3250 3251 PetscFunctionBegin; 3252 3253 if (dt == PETSC_DEFAULT) dt = 0.005; 3254 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3255 3256 /* ------- symbolic factorization, can be reused ---------*/ 3257 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3258 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3259 adiag=a->diag; 3260 3261 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3262 3263 /* bdiag is location of diagonal in factor */ 3264 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3265 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3266 3267 /* allocate row pointers bi */ 3268 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3269 3270 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3271 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3272 nnz_max = ai[n]+2*n*dtcount+2; 3273 3274 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3275 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3276 3277 /* put together the new matrix */ 3278 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3279 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3280 b = (Mat_SeqAIJ*)B->data; 3281 b->free_a = PETSC_TRUE; 3282 b->free_ij = PETSC_TRUE; 3283 b->singlemalloc = PETSC_FALSE; 3284 b->a = ba; 3285 b->j = bj; 3286 b->i = bi; 3287 b->diag = bdiag; 3288 b->ilen = 0; 3289 b->imax = 0; 3290 b->row = isrow; 3291 b->col = iscol; 3292 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3293 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3294 b->icol = isicol; 3295 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3296 3297 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3298 b->maxnz = nnz_max; 3299 3300 B->factortype = MAT_FACTOR_ILUDT; 3301 B->info.factor_mallocs = 0; 3302 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3303 CHKMEMQ; 3304 /* ------- end of symbolic factorization ---------*/ 3305 3306 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3307 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3308 ics = ic; 3309 3310 /* linked list for storing column indices of the active row */ 3311 nlnk = n + 1; 3312 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3313 3314 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3315 ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 3316 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3317 ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 3318 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3319 3320 bi[0] = 0; 3321 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3322 bdiag_rev[n] = bdiag[0]; 3323 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3324 for (i=0; i<n; i++) { 3325 /* copy initial fill into linked list */ 3326 nzi = ai[r[i]+1] - ai[r[i]]; 3327 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3328 nzi_al = adiag[r[i]] - ai[r[i]]; 3329 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3330 ajtmp = aj + ai[r[i]]; 3331 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3332 3333 /* load in initial (unfactored row) */ 3334 aatmp = a->a + ai[r[i]]; 3335 for (j=0; j<nzi; j++) { 3336 rtmp[ics[*ajtmp++]] = *aatmp++; 3337 } 3338 3339 /* add pivot rows into linked list */ 3340 row = lnk[n]; 3341 while (row < i ) { 3342 nzi_bl = bi[row+1] - bi[row] + 1; 3343 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3344 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3345 nzi += nlnk; 3346 row = lnk[row]; 3347 } 3348 3349 /* copy data from lnk into jtmp, then initialize lnk */ 3350 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3351 3352 /* numerical factorization */ 3353 bjtmp = jtmp; 3354 row = *bjtmp++; /* 1st pivot row */ 3355 while ( row < i ) { 3356 pc = rtmp + row; 3357 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3358 multiplier = (*pc) * (*pv); 3359 *pc = multiplier; 3360 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3361 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3362 pv = ba + bdiag[row+1] + 1; 3363 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3364 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3365 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3366 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3367 } 3368 row = *bjtmp++; 3369 } 3370 3371 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3372 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3373 nzi_bl = 0; j = 0; 3374 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3375 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3376 nzi_bl++; j++; 3377 } 3378 nzi_bu = nzi - nzi_bl -1; 3379 while (j < nzi) { 3380 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3381 j++; 3382 } 3383 3384 bjtmp = bj + bi[i]; 3385 batmp = ba + bi[i]; 3386 /* apply level dropping rule to L part */ 3387 ncut = nzi_al + dtcount; 3388 if (ncut < nzi_bl) { 3389 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3390 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3391 } else { 3392 ncut = nzi_bl; 3393 } 3394 for (j=0; j<ncut; j++) { 3395 bjtmp[j] = jtmp[j]; 3396 batmp[j] = vtmp[j]; 3397 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3398 } 3399 bi[i+1] = bi[i] + ncut; 3400 nzi = ncut + 1; 3401 3402 /* apply level dropping rule to U part */ 3403 ncut = nzi_au + dtcount; 3404 if (ncut < nzi_bu) { 3405 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3406 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3407 } else { 3408 ncut = nzi_bu; 3409 } 3410 nzi += ncut; 3411 3412 /* mark bdiagonal */ 3413 bdiag[i+1] = bdiag[i] - (ncut + 1); 3414 bdiag_rev[n-i-1] = bdiag[i+1]; 3415 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3416 bjtmp = bj + bdiag[i]; 3417 batmp = ba + bdiag[i]; 3418 *bjtmp = i; 3419 *batmp = diag_tmp; /* rtmp[i]; */ 3420 if (*batmp == 0.0) { 3421 *batmp = dt+shift; 3422 /* printf(" row %d add shift %g\n",i,shift); */ 3423 } 3424 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3425 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3426 3427 bjtmp = bj + bdiag[i+1]+1; 3428 batmp = ba + bdiag[i+1]+1; 3429 for (k=0; k<ncut; k++) { 3430 bjtmp[k] = jtmp[nzi_bl+1+k]; 3431 batmp[k] = vtmp[nzi_bl+1+k]; 3432 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3433 } 3434 /* printf("\n"); */ 3435 3436 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3437 /* 3438 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3439 printf(" ----------------------------\n"); 3440 */ 3441 } /* for (i=0; i<n; i++) */ 3442 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3443 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3444 3445 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3446 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3447 3448 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3449 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3450 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3451 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3452 3453 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3454 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3455 3456 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3457 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3458 if (row_identity && icol_identity) { 3459 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3460 } else { 3461 B->ops->solve = MatSolve_SeqAIJ; 3462 } 3463 3464 B->ops->solveadd = 0; 3465 B->ops->solvetranspose = 0; 3466 B->ops->solvetransposeadd = 0; 3467 B->ops->matsolve = 0; 3468 B->assembled = PETSC_TRUE; 3469 B->preallocated = PETSC_TRUE; 3470 PetscFunctionReturn(0); 3471 } 3472 3473 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3474 #undef __FUNCT__ 3475 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3476 /* 3477 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3478 */ 3479 3480 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3481 { 3482 PetscErrorCode ierr; 3483 3484 PetscFunctionBegin; 3485 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3486 PetscFunctionReturn(0); 3487 } 3488 3489 /* 3490 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3491 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3492 */ 3493 #undef __FUNCT__ 3494 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3495 /* 3496 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3497 */ 3498 3499 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3500 { 3501 Mat C=fact; 3502 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 3503 IS isrow = b->row,isicol = b->icol; 3504 PetscErrorCode ierr; 3505 const PetscInt *r,*ic,*ics; 3506 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3507 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3508 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3509 PetscReal dt=info->dt,shift=info->shiftamount; 3510 PetscBool row_identity, col_identity; 3511 3512 PetscFunctionBegin; 3513 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3514 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3515 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3516 ics = ic; 3517 3518 for (i=0; i<n; i++) { 3519 /* initialize rtmp array */ 3520 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3521 bjtmp = bj + bi[i]; 3522 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3523 rtmp[i] = 0.0; 3524 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3525 bjtmp = bj + bdiag[i+1] + 1; 3526 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3527 3528 /* load in initial unfactored row of A */ 3529 /* printf("row %d\n",i); */ 3530 nz = ai[r[i]+1] - ai[r[i]]; 3531 ajtmp = aj + ai[r[i]]; 3532 v = aa + ai[r[i]]; 3533 for (j=0; j<nz; j++) { 3534 rtmp[ics[*ajtmp++]] = v[j]; 3535 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3536 } 3537 /* printf("\n"); */ 3538 3539 /* numerical factorization */ 3540 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3541 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3542 k = 0; 3543 while (k < nzl) { 3544 row = *bjtmp++; 3545 /* printf(" prow %d\n",row); */ 3546 pc = rtmp + row; 3547 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3548 multiplier = (*pc) * (*pv); 3549 *pc = multiplier; 3550 if (PetscAbsScalar(multiplier) > dt) { 3551 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3552 pv = b->a + bdiag[row+1] + 1; 3553 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3554 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3555 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3556 } 3557 k++; 3558 } 3559 3560 /* finished row so stick it into b->a */ 3561 /* L-part */ 3562 pv = b->a + bi[i] ; 3563 pj = bj + bi[i] ; 3564 nzl = bi[i+1] - bi[i]; 3565 for (j=0; j<nzl; j++) { 3566 pv[j] = rtmp[pj[j]]; 3567 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3568 } 3569 3570 /* diagonal: invert diagonal entries for simplier triangular solves */ 3571 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3572 b->a[bdiag[i]] = 1.0/rtmp[i]; 3573 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3574 3575 /* U-part */ 3576 pv = b->a + bdiag[i+1] + 1; 3577 pj = bj + bdiag[i+1] + 1; 3578 nzu = bdiag[i] - bdiag[i+1] - 1; 3579 for (j=0; j<nzu; j++) { 3580 pv[j] = rtmp[pj[j]]; 3581 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3582 } 3583 /* printf("\n"); */ 3584 } 3585 3586 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3587 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3588 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3589 3590 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3591 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3592 if (row_identity && col_identity) { 3593 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3594 } else { 3595 C->ops->solve = MatSolve_SeqAIJ; 3596 } 3597 C->ops->solveadd = 0; 3598 C->ops->solvetranspose = 0; 3599 C->ops->solvetransposeadd = 0; 3600 C->ops->matsolve = 0; 3601 C->assembled = PETSC_TRUE; 3602 C->preallocated = PETSC_TRUE; 3603 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3604 PetscFunctionReturn(0); 3605 } 3606