1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 EXTERN_C_BEGIN 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 10 /* 11 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 12 13 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 14 */ 15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 16 { 17 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 20 const PetscInt *ai = a->i, *aj = a->j; 21 const PetscScalar *aa = a->a; 22 PetscBool *done; 23 PetscReal best,past = 0,future; 24 25 PetscFunctionBegin; 26 /* pick initial row */ 27 best = -1; 28 for (i=0; i<n; i++) { 29 future = 0.0; 30 for (j=ai[i]; j<ai[i+1]; j++) { 31 if (aj[j] != i) future += PetscAbsScalar(aa[j]); 32 else past = PetscAbsScalar(aa[j]); 33 } 34 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 35 if (past/future > best) { 36 best = past/future; 37 current = i; 38 } 39 } 40 41 ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr); 42 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 43 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 44 order[0] = current; 45 for (i=0; i<n-1; i++) { 46 done[current] = PETSC_TRUE; 47 best = -1; 48 /* loop over all neighbors of current pivot */ 49 for (j=ai[current]; j<ai[current+1]; j++) { 50 jj = aj[j]; 51 if (done[jj]) continue; 52 /* loop over columns of potential next row computing weights for below and above diagonal */ 53 past = future = 0.0; 54 for (k=ai[jj]; k<ai[jj+1]; k++) { 55 kk = aj[k]; 56 if (done[kk]) past += PetscAbsScalar(aa[k]); 57 else if (kk != jj) future += PetscAbsScalar(aa[k]); 58 } 59 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 60 if (past/future > best) { 61 best = past/future; 62 newcurrent = jj; 63 } 64 } 65 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 66 best = -1; 67 for (k=0; k<n; k++) { 68 if (done[k]) continue; 69 future = 0.0; 70 past = 0.0; 71 for (j=ai[k]; j<ai[k+1]; j++) { 72 kk = aj[j]; 73 if (done[kk]) past += PetscAbsScalar(aa[j]); 74 else if (kk != k) future += PetscAbsScalar(aa[j]); 75 } 76 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 77 if (past/future > best) { 78 best = past/future; 79 newcurrent = k; 80 } 81 } 82 } 83 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 84 current = newcurrent; 85 order[i+1] = current; 86 } 87 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 88 *icol = *irow; 89 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 90 ierr = PetscFree(done);CHKERRQ(ierr); 91 ierr = PetscFree(order);CHKERRQ(ierr); 92 PetscFunctionReturn(0); 93 } 94 EXTERN_C_END 95 96 EXTERN_C_BEGIN 97 #undef __FUNCT__ 98 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 99 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 100 { 101 PetscFunctionBegin; 102 *flg = PETSC_TRUE; 103 PetscFunctionReturn(0); 104 } 105 EXTERN_C_END 106 107 EXTERN_C_BEGIN 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 110 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 111 { 112 PetscInt n = A->rmap->n; 113 PetscErrorCode ierr; 114 115 PetscFunctionBegin; 116 #if defined(PETSC_USE_COMPLEX) 117 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC))SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 118 #endif 119 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 120 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 121 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT) { 122 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 123 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 124 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 125 ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 126 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 127 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 128 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 129 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 130 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 131 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 132 (*B)->factortype = ftype; 133 PetscFunctionReturn(0); 134 } 135 EXTERN_C_END 136 137 #undef __FUNCT__ 138 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 139 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 140 { 141 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 142 IS isicol; 143 PetscErrorCode ierr; 144 const PetscInt *r,*ic; 145 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 146 PetscInt *bi,*bj,*ajtmp; 147 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 148 PetscReal f; 149 PetscInt nlnk,*lnk,k,**bi_ptr; 150 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 151 PetscBT lnkbt; 152 153 PetscFunctionBegin; 154 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 155 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 156 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 157 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 158 159 /* get new row pointers */ 160 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 161 bi[0] = 0; 162 163 /* bdiag is location of diagonal in factor */ 164 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 165 bdiag[0] = 0; 166 167 /* linked list for storing column indices of the active row */ 168 nlnk = n + 1; 169 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 170 171 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 172 173 /* initial FreeSpace size is f*(ai[n]+1) */ 174 f = info->fill; 175 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 176 current_space = free_space; 177 178 for (i=0; i<n; i++) { 179 /* copy previous fill into linked list */ 180 nzi = 0; 181 nnz = ai[r[i]+1] - ai[r[i]]; 182 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 183 ajtmp = aj + ai[r[i]]; 184 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 185 nzi += nlnk; 186 187 /* add pivot rows into linked list */ 188 row = lnk[n]; 189 while (row < i) { 190 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 191 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 192 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 193 nzi += nlnk; 194 row = lnk[row]; 195 } 196 bi[i+1] = bi[i] + nzi; 197 im[i] = nzi; 198 199 /* mark bdiag */ 200 nzbd = 0; 201 nnz = nzi; 202 k = lnk[n]; 203 while (nnz-- && k < i) { 204 nzbd++; 205 k = lnk[k]; 206 } 207 bdiag[i] = bi[i] + nzbd; 208 209 /* if free space is not available, make more free space */ 210 if (current_space->local_remaining<nzi) { 211 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 212 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 213 reallocs++; 214 } 215 216 /* copy data into free space, then initialize lnk */ 217 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 218 bi_ptr[i] = current_space->array; 219 current_space->array += nzi; 220 current_space->local_used += nzi; 221 current_space->local_remaining -= nzi; 222 } 223 #if defined(PETSC_USE_INFO) 224 if (ai[n] != 0) { 225 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 226 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 227 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 228 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 229 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 230 } else { 231 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 232 } 233 #endif 234 235 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 236 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 237 238 /* destroy list of free space and other temporary array(s) */ 239 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 240 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 241 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 242 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 243 244 /* put together the new matrix */ 245 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 246 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 247 b = (Mat_SeqAIJ*)(B)->data; 248 b->free_a = PETSC_TRUE; 249 b->free_ij = PETSC_TRUE; 250 b->singlemalloc = PETSC_FALSE; 251 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 252 b->j = bj; 253 b->i = bi; 254 b->diag = bdiag; 255 b->ilen = 0; 256 b->imax = 0; 257 b->row = isrow; 258 b->col = iscol; 259 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 260 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 261 b->icol = isicol; 262 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 263 264 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 265 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 266 b->maxnz = b->nz = bi[n] ; 267 268 (B)->factortype = MAT_FACTOR_LU; 269 (B)->info.factor_mallocs = reallocs; 270 (B)->info.fill_ratio_given = f; 271 272 if (ai[n]) { 273 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 274 } else { 275 (B)->info.fill_ratio_needed = 0.0; 276 } 277 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 278 if (a->inode.size) { 279 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 280 } 281 PetscFunctionReturn(0); 282 } 283 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 286 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 287 { 288 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 289 IS isicol; 290 PetscErrorCode ierr; 291 const PetscInt *r,*ic; 292 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 293 PetscInt *bi,*bj,*ajtmp; 294 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 295 PetscReal f; 296 PetscInt nlnk,*lnk,k,**bi_ptr; 297 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 298 PetscBT lnkbt; 299 300 PetscFunctionBegin; 301 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 302 /* 303 PetscBool olddatastruct=PETSC_FALSE; 304 ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 305 if (olddatastruct) { 306 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 307 PetscFunctionReturn(0); 308 } 309 */ 310 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 311 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 312 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 313 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 314 315 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 316 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 317 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 318 bi[0] = bdiag[0] = 0; 319 320 /* linked list for storing column indices of the active row */ 321 nlnk = n + 1; 322 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 323 324 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 325 326 /* initial FreeSpace size is f*(ai[n]+1) */ 327 f = info->fill; 328 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 329 current_space = free_space; 330 331 for (i=0; i<n; i++) { 332 /* copy previous fill into linked list */ 333 nzi = 0; 334 nnz = ai[r[i]+1] - ai[r[i]]; 335 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 336 ajtmp = aj + ai[r[i]]; 337 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 338 nzi += nlnk; 339 340 /* add pivot rows into linked list */ 341 row = lnk[n]; 342 while (row < i) { 343 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 344 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 345 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 346 nzi += nlnk; 347 row = lnk[row]; 348 } 349 bi[i+1] = bi[i] + nzi; 350 im[i] = nzi; 351 352 /* mark bdiag */ 353 nzbd = 0; 354 nnz = nzi; 355 k = lnk[n]; 356 while (nnz-- && k < i) { 357 nzbd++; 358 k = lnk[k]; 359 } 360 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 361 362 /* if free space is not available, make more free space */ 363 if (current_space->local_remaining<nzi) { 364 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 365 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 366 reallocs++; 367 } 368 369 /* copy data into free space, then initialize lnk */ 370 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 371 bi_ptr[i] = current_space->array; 372 current_space->array += nzi; 373 current_space->local_used += nzi; 374 current_space->local_remaining -= nzi; 375 } 376 377 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 378 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 379 380 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 381 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 382 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 383 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 384 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 385 386 /* put together the new matrix */ 387 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 388 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 389 b = (Mat_SeqAIJ*)(B)->data; 390 b->free_a = PETSC_TRUE; 391 b->free_ij = PETSC_TRUE; 392 b->singlemalloc = PETSC_FALSE; 393 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 394 b->j = bj; 395 b->i = bi; 396 b->diag = bdiag; 397 b->ilen = 0; 398 b->imax = 0; 399 b->row = isrow; 400 b->col = iscol; 401 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 402 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 403 b->icol = isicol; 404 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 405 406 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 407 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 408 b->maxnz = b->nz = bdiag[0]+1; 409 B->factortype = MAT_FACTOR_LU; 410 B->info.factor_mallocs = reallocs; 411 B->info.fill_ratio_given = f; 412 413 if (ai[n]) { 414 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 415 } else { 416 B->info.fill_ratio_needed = 0.0; 417 } 418 #if defined(PETSC_USE_INFO) 419 if (ai[n] != 0) { 420 PetscReal af = B->info.fill_ratio_needed; 421 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 422 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 423 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 424 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 425 } else { 426 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 427 } 428 #endif 429 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 430 if (a->inode.size) { 431 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 432 } 433 PetscFunctionReturn(0); 434 } 435 436 /* 437 Trouble in factorization, should we dump the original matrix? 438 */ 439 #undef __FUNCT__ 440 #define __FUNCT__ "MatFactorDumpMatrix" 441 PetscErrorCode MatFactorDumpMatrix(Mat A) 442 { 443 PetscErrorCode ierr; 444 PetscBool flg = PETSC_FALSE; 445 446 PetscFunctionBegin; 447 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 448 if (flg) { 449 PetscViewer viewer; 450 char filename[PETSC_MAX_PATH_LEN]; 451 452 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 453 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 454 ierr = MatView(A,viewer);CHKERRQ(ierr); 455 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 456 } 457 PetscFunctionReturn(0); 458 } 459 460 #undef __FUNCT__ 461 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 462 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 463 { 464 Mat C=B; 465 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 466 IS isrow = b->row,isicol = b->icol; 467 PetscErrorCode ierr; 468 const PetscInt *r,*ic,*ics; 469 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 470 PetscInt i,j,k,nz,nzL,row,*pj; 471 const PetscInt *ajtmp,*bjtmp; 472 MatScalar *rtmp,*pc,multiplier,*pv; 473 const MatScalar *aa=a->a,*v; 474 PetscBool row_identity,col_identity; 475 FactorShiftCtx sctx; 476 const PetscInt *ddiag; 477 PetscReal rs; 478 MatScalar d; 479 480 PetscFunctionBegin; 481 /* MatPivotSetUp(): initialize shift context sctx */ 482 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 483 484 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 485 ddiag = a->diag; 486 sctx.shift_top = info->zeropivot; 487 for (i=0; i<n; i++) { 488 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 489 d = (aa)[ddiag[i]]; 490 rs = -PetscAbsScalar(d) - PetscRealPart(d); 491 v = aa+ai[i]; 492 nz = ai[i+1] - ai[i]; 493 for (j=0; j<nz; j++) 494 rs += PetscAbsScalar(v[j]); 495 if (rs>sctx.shift_top) sctx.shift_top = rs; 496 } 497 sctx.shift_top *= 1.1; 498 sctx.nshift_max = 5; 499 sctx.shift_lo = 0.; 500 sctx.shift_hi = 1.; 501 } 502 503 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 504 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 505 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 506 ics = ic; 507 508 do { 509 sctx.newshift = PETSC_FALSE; 510 for (i=0; i<n; i++) { 511 /* zero rtmp */ 512 /* L part */ 513 nz = bi[i+1] - bi[i]; 514 bjtmp = bj + bi[i]; 515 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 516 517 /* U part */ 518 nz = bdiag[i]-bdiag[i+1]; 519 bjtmp = bj + bdiag[i+1]+1; 520 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 521 522 /* load in initial (unfactored row) */ 523 nz = ai[r[i]+1] - ai[r[i]]; 524 ajtmp = aj + ai[r[i]]; 525 v = aa + ai[r[i]]; 526 for (j=0; j<nz; j++) { 527 rtmp[ics[ajtmp[j]]] = v[j]; 528 } 529 /* ZeropivotApply() */ 530 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 531 532 /* elimination */ 533 bjtmp = bj + bi[i]; 534 row = *bjtmp++; 535 nzL = bi[i+1] - bi[i]; 536 for (k=0; k < nzL;k++) { 537 pc = rtmp + row; 538 if (*pc != 0.0) { 539 pv = b->a + bdiag[row]; 540 multiplier = *pc * (*pv); 541 *pc = multiplier; 542 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 543 pv = b->a + bdiag[row+1]+1; 544 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 545 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 546 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 547 } 548 row = *bjtmp++; 549 } 550 551 /* finished row so stick it into b->a */ 552 rs = 0.0; 553 /* L part */ 554 pv = b->a + bi[i] ; 555 pj = b->j + bi[i] ; 556 nz = bi[i+1] - bi[i]; 557 for (j=0; j<nz; j++) { 558 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 559 } 560 561 /* U part */ 562 pv = b->a + bdiag[i+1]+1; 563 pj = b->j + bdiag[i+1]+1; 564 nz = bdiag[i] - bdiag[i+1]-1; 565 for (j=0; j<nz; j++) { 566 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 567 } 568 569 sctx.rs = rs; 570 sctx.pv = rtmp[i]; 571 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 572 if (sctx.newshift) break; /* break for-loop */ 573 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 574 575 /* Mark diagonal and invert diagonal for simplier triangular solves */ 576 pv = b->a + bdiag[i]; 577 *pv = 1.0/rtmp[i]; 578 579 } /* endof for (i=0; i<n; i++) { */ 580 581 /* MatPivotRefine() */ 582 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 583 /* 584 * if no shift in this attempt & shifting & started shifting & can refine, 585 * then try lower shift 586 */ 587 sctx.shift_hi = sctx.shift_fraction; 588 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 589 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 590 sctx.newshift = PETSC_TRUE; 591 sctx.nshift++; 592 } 593 } while (sctx.newshift); 594 595 ierr = PetscFree(rtmp);CHKERRQ(ierr); 596 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 597 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 598 599 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 600 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 601 if (row_identity && col_identity) { 602 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 603 } else { 604 C->ops->solve = MatSolve_SeqAIJ; 605 } 606 C->ops->solveadd = MatSolveAdd_SeqAIJ; 607 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 608 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 609 C->ops->matsolve = MatMatSolve_SeqAIJ; 610 C->assembled = PETSC_TRUE; 611 C->preallocated = PETSC_TRUE; 612 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 613 614 /* MatShiftView(A,info,&sctx) */ 615 if (sctx.nshift) { 616 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 617 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 618 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 619 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 620 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 621 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 622 } 623 } 624 ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr); 625 PetscFunctionReturn(0); 626 } 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 630 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 631 { 632 Mat C=B; 633 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 634 IS isrow = b->row,isicol = b->icol; 635 PetscErrorCode ierr; 636 const PetscInt *r,*ic,*ics; 637 PetscInt nz,row,i,j,n=A->rmap->n,diag; 638 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 639 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 640 MatScalar *pv,*rtmp,*pc,multiplier,d; 641 const MatScalar *v,*aa=a->a; 642 PetscReal rs=0.0; 643 FactorShiftCtx sctx; 644 const PetscInt *ddiag; 645 PetscBool row_identity, col_identity; 646 647 PetscFunctionBegin; 648 /* MatPivotSetUp(): initialize shift context sctx */ 649 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 650 651 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 652 ddiag = a->diag; 653 sctx.shift_top = info->zeropivot; 654 for (i=0; i<n; i++) { 655 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 656 d = (aa)[ddiag[i]]; 657 rs = -PetscAbsScalar(d) - PetscRealPart(d); 658 v = aa+ai[i]; 659 nz = ai[i+1] - ai[i]; 660 for (j=0; j<nz; j++) 661 rs += PetscAbsScalar(v[j]); 662 if (rs>sctx.shift_top) sctx.shift_top = rs; 663 } 664 sctx.shift_top *= 1.1; 665 sctx.nshift_max = 5; 666 sctx.shift_lo = 0.; 667 sctx.shift_hi = 1.; 668 } 669 670 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 671 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 672 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 673 ics = ic; 674 675 do { 676 sctx.newshift = PETSC_FALSE; 677 for (i=0; i<n; i++) { 678 nz = bi[i+1] - bi[i]; 679 bjtmp = bj + bi[i]; 680 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 681 682 /* load in initial (unfactored row) */ 683 nz = ai[r[i]+1] - ai[r[i]]; 684 ajtmp = aj + ai[r[i]]; 685 v = aa + ai[r[i]]; 686 for (j=0; j<nz; j++) { 687 rtmp[ics[ajtmp[j]]] = v[j]; 688 } 689 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 690 691 row = *bjtmp++; 692 while (row < i) { 693 pc = rtmp + row; 694 if (*pc != 0.0) { 695 pv = b->a + diag_offset[row]; 696 pj = b->j + diag_offset[row] + 1; 697 multiplier = *pc / *pv++; 698 *pc = multiplier; 699 nz = bi[row+1] - diag_offset[row] - 1; 700 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 701 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 702 } 703 row = *bjtmp++; 704 } 705 /* finished row so stick it into b->a */ 706 pv = b->a + bi[i] ; 707 pj = b->j + bi[i] ; 708 nz = bi[i+1] - bi[i]; 709 diag = diag_offset[i] - bi[i]; 710 rs = 0.0; 711 for (j=0; j<nz; j++) { 712 pv[j] = rtmp[pj[j]]; 713 rs += PetscAbsScalar(pv[j]); 714 } 715 rs -= PetscAbsScalar(pv[diag]); 716 717 sctx.rs = rs; 718 sctx.pv = pv[diag]; 719 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 720 if (sctx.newshift) break; 721 pv[diag] = sctx.pv; 722 } 723 724 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 725 /* 726 * if no shift in this attempt & shifting & started shifting & can refine, 727 * then try lower shift 728 */ 729 sctx.shift_hi = sctx.shift_fraction; 730 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 731 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 732 sctx.newshift = PETSC_TRUE; 733 sctx.nshift++; 734 } 735 } while (sctx.newshift); 736 737 /* invert diagonal entries for simplier triangular solves */ 738 for (i=0; i<n; i++) { 739 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 740 } 741 ierr = PetscFree(rtmp);CHKERRQ(ierr); 742 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 743 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 744 745 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 746 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 747 if (row_identity && col_identity) { 748 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 749 } else { 750 C->ops->solve = MatSolve_SeqAIJ_inplace; 751 } 752 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 753 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 754 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 755 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 756 C->assembled = PETSC_TRUE; 757 C->preallocated = PETSC_TRUE; 758 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 759 if (sctx.nshift) { 760 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 761 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 762 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 763 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 764 } 765 } 766 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 767 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 768 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 769 PetscFunctionReturn(0); 770 } 771 772 /* 773 This routine implements inplace ILU(0) with row or/and column permutations. 774 Input: 775 A - original matrix 776 Output; 777 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 778 a->j (col index) is permuted by the inverse of colperm, then sorted 779 a->a reordered accordingly with a->j 780 a->diag (ptr to diagonal elements) is updated. 781 */ 782 #undef __FUNCT__ 783 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 784 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 785 { 786 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 787 IS isrow = a->row,isicol = a->icol; 788 PetscErrorCode ierr; 789 const PetscInt *r,*ic,*ics; 790 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 791 PetscInt *ajtmp,nz,row; 792 PetscInt *diag = a->diag,nbdiag,*pj; 793 PetscScalar *rtmp,*pc,multiplier,d; 794 MatScalar *pv,*v; 795 PetscReal rs; 796 FactorShiftCtx sctx; 797 const MatScalar *aa=a->a,*vtmp; 798 799 PetscFunctionBegin; 800 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 801 802 /* MatPivotSetUp(): initialize shift context sctx */ 803 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 804 805 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 806 const PetscInt *ddiag = a->diag; 807 sctx.shift_top = info->zeropivot; 808 for (i=0; i<n; i++) { 809 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 810 d = (aa)[ddiag[i]]; 811 rs = -PetscAbsScalar(d) - PetscRealPart(d); 812 vtmp = aa+ai[i]; 813 nz = ai[i+1] - ai[i]; 814 for (j=0; j<nz; j++) 815 rs += PetscAbsScalar(vtmp[j]); 816 if (rs>sctx.shift_top) sctx.shift_top = rs; 817 } 818 sctx.shift_top *= 1.1; 819 sctx.nshift_max = 5; 820 sctx.shift_lo = 0.; 821 sctx.shift_hi = 1.; 822 } 823 824 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 825 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 826 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 827 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 828 ics = ic; 829 830 #if defined(MV) 831 sctx.shift_top = 0.; 832 sctx.nshift_max = 0; 833 sctx.shift_lo = 0.; 834 sctx.shift_hi = 0.; 835 sctx.shift_fraction = 0.; 836 837 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 838 sctx.shift_top = 0.; 839 for (i=0; i<n; i++) { 840 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 841 d = (a->a)[diag[i]]; 842 rs = -PetscAbsScalar(d) - PetscRealPart(d); 843 v = a->a+ai[i]; 844 nz = ai[i+1] - ai[i]; 845 for (j=0; j<nz; j++) 846 rs += PetscAbsScalar(v[j]); 847 if (rs>sctx.shift_top) sctx.shift_top = rs; 848 } 849 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 850 sctx.shift_top *= 1.1; 851 sctx.nshift_max = 5; 852 sctx.shift_lo = 0.; 853 sctx.shift_hi = 1.; 854 } 855 856 sctx.shift_amount = 0.; 857 sctx.nshift = 0; 858 #endif 859 860 do { 861 sctx.newshift = PETSC_FALSE; 862 for (i=0; i<n; i++) { 863 /* load in initial unfactored row */ 864 nz = ai[r[i]+1] - ai[r[i]]; 865 ajtmp = aj + ai[r[i]]; 866 v = a->a + ai[r[i]]; 867 /* sort permuted ajtmp and values v accordingly */ 868 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 869 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 870 871 diag[r[i]] = ai[r[i]]; 872 for (j=0; j<nz; j++) { 873 rtmp[ajtmp[j]] = v[j]; 874 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 875 } 876 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 877 878 row = *ajtmp++; 879 while (row < i) { 880 pc = rtmp + row; 881 if (*pc != 0.0) { 882 pv = a->a + diag[r[row]]; 883 pj = aj + diag[r[row]] + 1; 884 885 multiplier = *pc / *pv++; 886 *pc = multiplier; 887 nz = ai[r[row]+1] - diag[r[row]] - 1; 888 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 889 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 890 } 891 row = *ajtmp++; 892 } 893 /* finished row so overwrite it onto a->a */ 894 pv = a->a + ai[r[i]] ; 895 pj = aj + ai[r[i]] ; 896 nz = ai[r[i]+1] - ai[r[i]]; 897 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 898 899 rs = 0.0; 900 for (j=0; j<nz; j++) { 901 pv[j] = rtmp[pj[j]]; 902 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 903 } 904 905 sctx.rs = rs; 906 sctx.pv = pv[nbdiag]; 907 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 908 if (sctx.newshift) break; 909 pv[nbdiag] = sctx.pv; 910 } 911 912 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 913 /* 914 * if no shift in this attempt & shifting & started shifting & can refine, 915 * then try lower shift 916 */ 917 sctx.shift_hi = sctx.shift_fraction; 918 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 919 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 920 sctx.newshift = PETSC_TRUE; 921 sctx.nshift++; 922 } 923 } while (sctx.newshift); 924 925 /* invert diagonal entries for simplier triangular solves */ 926 for (i=0; i<n; i++) { 927 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 928 } 929 930 ierr = PetscFree(rtmp);CHKERRQ(ierr); 931 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 932 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 933 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 934 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 935 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 936 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 937 A->assembled = PETSC_TRUE; 938 A->preallocated = PETSC_TRUE; 939 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 940 if (sctx.nshift) { 941 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 942 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 943 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 944 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 945 } 946 } 947 PetscFunctionReturn(0); 948 } 949 950 /* ----------------------------------------------------------- */ 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatLUFactor_SeqAIJ" 953 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 954 { 955 PetscErrorCode ierr; 956 Mat C; 957 958 PetscFunctionBegin; 959 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 960 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 961 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 962 A->ops->solve = C->ops->solve; 963 A->ops->solvetranspose = C->ops->solvetranspose; 964 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 965 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 /* ----------------------------------------------------------- */ 969 970 971 #undef __FUNCT__ 972 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 973 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 974 { 975 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 976 IS iscol = a->col,isrow = a->row; 977 PetscErrorCode ierr; 978 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 979 PetscInt nz; 980 const PetscInt *rout,*cout,*r,*c; 981 PetscScalar *x,*tmp,*tmps,sum; 982 const PetscScalar *b; 983 const MatScalar *aa = a->a,*v; 984 985 PetscFunctionBegin; 986 if (!n) PetscFunctionReturn(0); 987 988 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 989 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 990 tmp = a->solve_work; 991 992 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 993 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 994 995 /* forward solve the lower triangular */ 996 tmp[0] = b[*r++]; 997 tmps = tmp; 998 for (i=1; i<n; i++) { 999 v = aa + ai[i] ; 1000 vi = aj + ai[i] ; 1001 nz = a->diag[i] - ai[i]; 1002 sum = b[*r++]; 1003 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1004 tmp[i] = sum; 1005 } 1006 1007 /* backward solve the upper triangular */ 1008 for (i=n-1; i>=0; i--) { 1009 v = aa + a->diag[i] + 1; 1010 vi = aj + a->diag[i] + 1; 1011 nz = ai[i+1] - a->diag[i] - 1; 1012 sum = tmp[i]; 1013 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1014 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1015 } 1016 1017 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1018 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1019 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1020 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1021 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1022 PetscFunctionReturn(0); 1023 } 1024 1025 #undef __FUNCT__ 1026 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1027 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1028 { 1029 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1030 IS iscol = a->col,isrow = a->row; 1031 PetscErrorCode ierr; 1032 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1033 PetscInt nz,neq; 1034 const PetscInt *rout,*cout,*r,*c; 1035 PetscScalar *x,*b,*tmp,*tmps,sum; 1036 const MatScalar *aa = a->a,*v; 1037 PetscBool bisdense,xisdense; 1038 1039 PetscFunctionBegin; 1040 if (!n) PetscFunctionReturn(0); 1041 1042 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1043 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1044 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1045 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1046 1047 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1048 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1049 1050 tmp = a->solve_work; 1051 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1052 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1053 1054 for (neq=0; neq<B->cmap->n; neq++) { 1055 /* forward solve the lower triangular */ 1056 tmp[0] = b[r[0]]; 1057 tmps = tmp; 1058 for (i=1; i<n; i++) { 1059 v = aa + ai[i] ; 1060 vi = aj + ai[i] ; 1061 nz = a->diag[i] - ai[i]; 1062 sum = b[r[i]]; 1063 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1064 tmp[i] = sum; 1065 } 1066 /* backward solve the upper triangular */ 1067 for (i=n-1; i>=0; i--) { 1068 v = aa + a->diag[i] + 1; 1069 vi = aj + a->diag[i] + 1; 1070 nz = ai[i+1] - a->diag[i] - 1; 1071 sum = tmp[i]; 1072 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1073 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1074 } 1075 1076 b += n; 1077 x += n; 1078 } 1079 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1080 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1081 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1082 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1083 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 #undef __FUNCT__ 1088 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1089 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1090 { 1091 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1092 IS iscol = a->col,isrow = a->row; 1093 PetscErrorCode ierr; 1094 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1095 PetscInt nz,neq; 1096 const PetscInt *rout,*cout,*r,*c; 1097 PetscScalar *x,*b,*tmp,sum; 1098 const MatScalar *aa = a->a,*v; 1099 PetscBool bisdense,xisdense; 1100 1101 PetscFunctionBegin; 1102 if (!n) PetscFunctionReturn(0); 1103 1104 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1105 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1106 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1107 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1108 1109 ierr = MatDenseGetArray(B,&b);CHKERRQ(ierr); 1110 ierr = MatDenseGetArray(X,&x);CHKERRQ(ierr); 1111 1112 tmp = a->solve_work; 1113 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1114 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1115 1116 for (neq=0; neq<B->cmap->n; neq++) { 1117 /* forward solve the lower triangular */ 1118 tmp[0] = b[r[0]]; 1119 v = aa; 1120 vi = aj; 1121 for (i=1; i<n; i++) { 1122 nz = ai[i+1] - ai[i]; 1123 sum = b[r[i]]; 1124 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1125 tmp[i] = sum; 1126 v += nz; vi += nz; 1127 } 1128 1129 /* backward solve the upper triangular */ 1130 for (i=n-1; i>=0; i--) { 1131 v = aa + adiag[i+1]+1; 1132 vi = aj + adiag[i+1]+1; 1133 nz = adiag[i]-adiag[i+1]-1; 1134 sum = tmp[i]; 1135 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1136 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1137 } 1138 1139 b += n; 1140 x += n; 1141 } 1142 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1143 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1144 ierr = MatDenseRestoreArray(B,&b);CHKERRQ(ierr); 1145 ierr = MatDenseRestoreArray(X,&x);CHKERRQ(ierr); 1146 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 #undef __FUNCT__ 1151 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1152 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1153 { 1154 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1155 IS iscol = a->col,isrow = a->row; 1156 PetscErrorCode ierr; 1157 const PetscInt *r,*c,*rout,*cout; 1158 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1159 PetscInt nz,row; 1160 PetscScalar *x,*b,*tmp,*tmps,sum; 1161 const MatScalar *aa = a->a,*v; 1162 1163 PetscFunctionBegin; 1164 if (!n) PetscFunctionReturn(0); 1165 1166 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1167 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1168 tmp = a->solve_work; 1169 1170 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1171 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1172 1173 /* forward solve the lower triangular */ 1174 tmp[0] = b[*r++]; 1175 tmps = tmp; 1176 for (row=1; row<n; row++) { 1177 i = rout[row]; /* permuted row */ 1178 v = aa + ai[i] ; 1179 vi = aj + ai[i] ; 1180 nz = a->diag[i] - ai[i]; 1181 sum = b[*r++]; 1182 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1183 tmp[row] = sum; 1184 } 1185 1186 /* backward solve the upper triangular */ 1187 for (row=n-1; row>=0; row--) { 1188 i = rout[row]; /* permuted row */ 1189 v = aa + a->diag[i] + 1; 1190 vi = aj + a->diag[i] + 1; 1191 nz = ai[i+1] - a->diag[i] - 1; 1192 sum = tmp[row]; 1193 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1194 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1195 } 1196 1197 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1198 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1199 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1200 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1201 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 /* ----------------------------------------------------------- */ 1206 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1207 #undef __FUNCT__ 1208 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1209 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1210 { 1211 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1212 PetscErrorCode ierr; 1213 PetscInt n = A->rmap->n; 1214 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1215 PetscScalar *x; 1216 const PetscScalar *b; 1217 const MatScalar *aa = a->a; 1218 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1219 PetscInt adiag_i,i,nz,ai_i; 1220 const PetscInt *vi; 1221 const MatScalar *v; 1222 PetscScalar sum; 1223 #endif 1224 1225 PetscFunctionBegin; 1226 if (!n) PetscFunctionReturn(0); 1227 1228 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1229 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1230 1231 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1232 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1233 #else 1234 /* forward solve the lower triangular */ 1235 x[0] = b[0]; 1236 for (i=1; i<n; i++) { 1237 ai_i = ai[i]; 1238 v = aa + ai_i; 1239 vi = aj + ai_i; 1240 nz = adiag[i] - ai_i; 1241 sum = b[i]; 1242 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1243 x[i] = sum; 1244 } 1245 1246 /* backward solve the upper triangular */ 1247 for (i=n-1; i>=0; i--) { 1248 adiag_i = adiag[i]; 1249 v = aa + adiag_i + 1; 1250 vi = aj + adiag_i + 1; 1251 nz = ai[i+1] - adiag_i - 1; 1252 sum = x[i]; 1253 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1254 x[i] = sum*aa[adiag_i]; 1255 } 1256 #endif 1257 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1258 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1259 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 #undef __FUNCT__ 1264 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1265 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1266 { 1267 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1268 IS iscol = a->col,isrow = a->row; 1269 PetscErrorCode ierr; 1270 PetscInt i, n = A->rmap->n,j; 1271 PetscInt nz; 1272 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1273 PetscScalar *x,*tmp,sum; 1274 const PetscScalar *b; 1275 const MatScalar *aa = a->a,*v; 1276 1277 PetscFunctionBegin; 1278 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1279 1280 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1281 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1282 tmp = a->solve_work; 1283 1284 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1285 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1286 1287 /* forward solve the lower triangular */ 1288 tmp[0] = b[*r++]; 1289 for (i=1; i<n; i++) { 1290 v = aa + ai[i] ; 1291 vi = aj + ai[i] ; 1292 nz = a->diag[i] - ai[i]; 1293 sum = b[*r++]; 1294 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1295 tmp[i] = sum; 1296 } 1297 1298 /* backward solve the upper triangular */ 1299 for (i=n-1; i>=0; i--) { 1300 v = aa + a->diag[i] + 1; 1301 vi = aj + a->diag[i] + 1; 1302 nz = ai[i+1] - a->diag[i] - 1; 1303 sum = tmp[i]; 1304 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1305 tmp[i] = sum*aa[a->diag[i]]; 1306 x[*c--] += tmp[i]; 1307 } 1308 1309 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1310 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1311 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1312 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1313 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1319 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1320 { 1321 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1322 IS iscol = a->col,isrow = a->row; 1323 PetscErrorCode ierr; 1324 PetscInt i, n = A->rmap->n,j; 1325 PetscInt nz; 1326 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1327 PetscScalar *x,*tmp,sum; 1328 const PetscScalar *b; 1329 const MatScalar *aa = a->a,*v; 1330 1331 PetscFunctionBegin; 1332 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1333 1334 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1335 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1336 tmp = a->solve_work; 1337 1338 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1339 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1340 1341 /* forward solve the lower triangular */ 1342 tmp[0] = b[r[0]]; 1343 v = aa; 1344 vi = aj; 1345 for (i=1; i<n; i++) { 1346 nz = ai[i+1] - ai[i]; 1347 sum = b[r[i]]; 1348 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1349 tmp[i] = sum; 1350 v += nz; vi += nz; 1351 } 1352 1353 /* backward solve the upper triangular */ 1354 v = aa + adiag[n-1]; 1355 vi = aj + adiag[n-1]; 1356 for (i=n-1; i>=0; i--) { 1357 nz = adiag[i] - adiag[i+1] - 1; 1358 sum = tmp[i]; 1359 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1360 tmp[i] = sum*v[nz]; 1361 x[c[i]] += tmp[i]; 1362 v += nz+1; vi += nz+1; 1363 } 1364 1365 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1366 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1367 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1368 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1369 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1370 PetscFunctionReturn(0); 1371 } 1372 1373 #undef __FUNCT__ 1374 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1375 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1376 { 1377 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1378 IS iscol = a->col,isrow = a->row; 1379 PetscErrorCode ierr; 1380 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1381 PetscInt i,n = A->rmap->n,j; 1382 PetscInt nz; 1383 PetscScalar *x,*tmp,s1; 1384 const MatScalar *aa = a->a,*v; 1385 const PetscScalar *b; 1386 1387 PetscFunctionBegin; 1388 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1389 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1390 tmp = a->solve_work; 1391 1392 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1393 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1394 1395 /* copy the b into temp work space according to permutation */ 1396 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1397 1398 /* forward solve the U^T */ 1399 for (i=0; i<n; i++) { 1400 v = aa + diag[i] ; 1401 vi = aj + diag[i] + 1; 1402 nz = ai[i+1] - diag[i] - 1; 1403 s1 = tmp[i]; 1404 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1405 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1406 tmp[i] = s1; 1407 } 1408 1409 /* backward solve the L^T */ 1410 for (i=n-1; i>=0; i--) { 1411 v = aa + diag[i] - 1 ; 1412 vi = aj + diag[i] - 1 ; 1413 nz = diag[i] - ai[i]; 1414 s1 = tmp[i]; 1415 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1416 } 1417 1418 /* copy tmp into x according to permutation */ 1419 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1420 1421 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1422 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1423 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1424 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1425 1426 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1432 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1433 { 1434 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1435 IS iscol = a->col,isrow = a->row; 1436 PetscErrorCode ierr; 1437 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1438 PetscInt i,n = A->rmap->n,j; 1439 PetscInt nz; 1440 PetscScalar *x,*tmp,s1; 1441 const MatScalar *aa = a->a,*v; 1442 const PetscScalar *b; 1443 1444 PetscFunctionBegin; 1445 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1446 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1447 tmp = a->solve_work; 1448 1449 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1450 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1451 1452 /* copy the b into temp work space according to permutation */ 1453 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1454 1455 /* forward solve the U^T */ 1456 for (i=0; i<n; i++) { 1457 v = aa + adiag[i+1] + 1; 1458 vi = aj + adiag[i+1] + 1; 1459 nz = adiag[i] - adiag[i+1] - 1; 1460 s1 = tmp[i]; 1461 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1462 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1463 tmp[i] = s1; 1464 } 1465 1466 /* backward solve the L^T */ 1467 for (i=n-1; i>=0; i--) { 1468 v = aa + ai[i]; 1469 vi = aj + ai[i]; 1470 nz = ai[i+1] - ai[i]; 1471 s1 = tmp[i]; 1472 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1473 } 1474 1475 /* copy tmp into x according to permutation */ 1476 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1477 1478 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1479 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1480 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1481 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1482 1483 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1484 PetscFunctionReturn(0); 1485 } 1486 1487 #undef __FUNCT__ 1488 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1489 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1490 { 1491 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1492 IS iscol = a->col,isrow = a->row; 1493 PetscErrorCode ierr; 1494 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1495 PetscInt i,n = A->rmap->n,j; 1496 PetscInt nz; 1497 PetscScalar *x,*tmp,s1; 1498 const MatScalar *aa = a->a,*v; 1499 const PetscScalar *b; 1500 1501 PetscFunctionBegin; 1502 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1503 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1504 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1505 tmp = a->solve_work; 1506 1507 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1508 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1509 1510 /* copy the b into temp work space according to permutation */ 1511 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1512 1513 /* forward solve the U^T */ 1514 for (i=0; i<n; i++) { 1515 v = aa + diag[i] ; 1516 vi = aj + diag[i] + 1; 1517 nz = ai[i+1] - diag[i] - 1; 1518 s1 = tmp[i]; 1519 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1520 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1521 tmp[i] = s1; 1522 } 1523 1524 /* backward solve the L^T */ 1525 for (i=n-1; i>=0; i--) { 1526 v = aa + diag[i] - 1 ; 1527 vi = aj + diag[i] - 1 ; 1528 nz = diag[i] - ai[i]; 1529 s1 = tmp[i]; 1530 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1531 } 1532 1533 /* copy tmp into x according to permutation */ 1534 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1535 1536 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1537 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1538 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1539 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1540 1541 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1542 PetscFunctionReturn(0); 1543 } 1544 1545 #undef __FUNCT__ 1546 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1547 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1548 { 1549 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1550 IS iscol = a->col,isrow = a->row; 1551 PetscErrorCode ierr; 1552 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1553 PetscInt i,n = A->rmap->n,j; 1554 PetscInt nz; 1555 PetscScalar *x,*tmp,s1; 1556 const MatScalar *aa = a->a,*v; 1557 const PetscScalar *b; 1558 1559 PetscFunctionBegin; 1560 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1561 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1562 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1563 tmp = a->solve_work; 1564 1565 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1566 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1567 1568 /* copy the b into temp work space according to permutation */ 1569 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1570 1571 /* forward solve the U^T */ 1572 for (i=0; i<n; i++) { 1573 v = aa + adiag[i+1] + 1; 1574 vi = aj + adiag[i+1] + 1; 1575 nz = adiag[i] - adiag[i+1] - 1; 1576 s1 = tmp[i]; 1577 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1578 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1579 tmp[i] = s1; 1580 } 1581 1582 1583 /* backward solve the L^T */ 1584 for (i=n-1; i>=0; i--) { 1585 v = aa + ai[i] ; 1586 vi = aj + ai[i]; 1587 nz = ai[i+1] - ai[i]; 1588 s1 = tmp[i]; 1589 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1590 } 1591 1592 /* copy tmp into x according to permutation */ 1593 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1594 1595 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1596 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1597 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1598 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1599 1600 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1601 PetscFunctionReturn(0); 1602 } 1603 1604 /* ----------------------------------------------------------------*/ 1605 1606 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool); 1607 1608 /* 1609 ilu() under revised new data structure. 1610 Factored arrays bj and ba are stored as 1611 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1612 1613 bi=fact->i is an array of size n+1, in which 1614 bi+ 1615 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1616 bi[n]: points to L(n-1,n-1)+1 1617 1618 bdiag=fact->diag is an array of size n+1,in which 1619 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1620 bdiag[n]: points to entry of U(n-1,0)-1 1621 1622 U(i,:) contains bdiag[i] as its last entry, i.e., 1623 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1624 */ 1625 #undef __FUNCT__ 1626 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1627 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1628 { 1629 1630 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1631 PetscErrorCode ierr; 1632 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1633 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1634 PetscBool missing; 1635 IS isicol; 1636 1637 PetscFunctionBegin; 1638 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1639 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1640 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1641 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1642 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1643 b = (Mat_SeqAIJ*)(fact)->data; 1644 1645 /* allocate matrix arrays for new data structure */ 1646 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1647 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1648 b->singlemalloc = PETSC_TRUE; 1649 if (!b->diag) { 1650 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1651 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1652 } 1653 bdiag = b->diag; 1654 1655 if (n > 0) { 1656 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1657 } 1658 1659 /* set bi and bj with new data structure */ 1660 bi = b->i; 1661 bj = b->j; 1662 1663 /* L part */ 1664 bi[0] = 0; 1665 for (i=0; i<n; i++) { 1666 nz = adiag[i] - ai[i]; 1667 bi[i+1] = bi[i] + nz; 1668 aj = a->j + ai[i]; 1669 for (j=0; j<nz; j++) { 1670 /* *bj = aj[j]; bj++; */ 1671 bj[k++] = aj[j]; 1672 } 1673 } 1674 1675 /* U part */ 1676 bdiag[n] = bi[n]-1; 1677 for (i=n-1; i>=0; i--) { 1678 nz = ai[i+1] - adiag[i] - 1; 1679 aj = a->j + adiag[i] + 1; 1680 for (j=0; j<nz; j++) { 1681 /* *bj = aj[j]; bj++; */ 1682 bj[k++] = aj[j]; 1683 } 1684 /* diag[i] */ 1685 /* *bj = i; bj++; */ 1686 bj[k++] = i; 1687 bdiag[i] = bdiag[i+1] + nz + 1; 1688 } 1689 1690 fact->factortype = MAT_FACTOR_ILU; 1691 fact->info.factor_mallocs = 0; 1692 fact->info.fill_ratio_given = info->fill; 1693 fact->info.fill_ratio_needed = 1.0; 1694 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1695 1696 b = (Mat_SeqAIJ*)(fact)->data; 1697 b->row = isrow; 1698 b->col = iscol; 1699 b->icol = isicol; 1700 ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1701 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1702 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1703 PetscFunctionReturn(0); 1704 } 1705 1706 #undef __FUNCT__ 1707 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1708 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1709 { 1710 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1711 IS isicol; 1712 PetscErrorCode ierr; 1713 const PetscInt *r,*ic; 1714 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1715 PetscInt *bi,*cols,nnz,*cols_lvl; 1716 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1717 PetscInt i,levels,diagonal_fill; 1718 PetscBool col_identity,row_identity; 1719 PetscReal f; 1720 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1721 PetscBT lnkbt; 1722 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1723 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1724 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1725 1726 PetscFunctionBegin; 1727 /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1728 /* 1729 PetscBool olddatastruct=PETSC_FALSE; 1730 ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 1731 if (olddatastruct) { 1732 ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1733 PetscFunctionReturn(0); 1734 } 1735 */ 1736 1737 levels = (PetscInt)info->levels; 1738 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1739 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1740 if (!levels && row_identity && col_identity) { 1741 /* special case: ilu(0) with natural ordering */ 1742 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1743 if (a->inode.size) { 1744 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1745 } 1746 PetscFunctionReturn(0); 1747 } 1748 1749 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1750 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1751 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1752 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1753 1754 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1755 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1756 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1757 bi[0] = bdiag[0] = 0; 1758 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1759 1760 /* create a linked list for storing column indices of the active row */ 1761 nlnk = n + 1; 1762 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1763 1764 /* initial FreeSpace size is f*(ai[n]+1) */ 1765 f = info->fill; 1766 diagonal_fill = (PetscInt)info->diagonal_fill; 1767 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1768 current_space = free_space; 1769 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1770 current_space_lvl = free_space_lvl; 1771 for (i=0; i<n; i++) { 1772 nzi = 0; 1773 /* copy current row into linked list */ 1774 nnz = ai[r[i]+1] - ai[r[i]]; 1775 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1776 cols = aj + ai[r[i]]; 1777 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1778 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1779 nzi += nlnk; 1780 1781 /* make sure diagonal entry is included */ 1782 if (diagonal_fill && lnk[i] == -1) { 1783 fm = n; 1784 while (lnk[fm] < i) fm = lnk[fm]; 1785 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1786 lnk[fm] = i; 1787 lnk_lvl[i] = 0; 1788 nzi++; dcount++; 1789 } 1790 1791 /* add pivot rows into the active row */ 1792 nzbd = 0; 1793 prow = lnk[n]; 1794 while (prow < i) { 1795 nnz = bdiag[prow]; 1796 cols = bj_ptr[prow] + nnz + 1; 1797 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1798 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1799 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1800 nzi += nlnk; 1801 prow = lnk[prow]; 1802 nzbd++; 1803 } 1804 bdiag[i] = nzbd; 1805 bi[i+1] = bi[i] + nzi; 1806 /* if free space is not available, make more free space */ 1807 if (current_space->local_remaining<nzi) { 1808 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1809 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1810 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1811 reallocs++; 1812 } 1813 1814 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1815 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1816 bj_ptr[i] = current_space->array; 1817 bjlvl_ptr[i] = current_space_lvl->array; 1818 1819 /* make sure the active row i has diagonal entry */ 1820 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1821 1822 current_space->array += nzi; 1823 current_space->local_used += nzi; 1824 current_space->local_remaining -= nzi; 1825 current_space_lvl->array += nzi; 1826 current_space_lvl->local_used += nzi; 1827 current_space_lvl->local_remaining -= nzi; 1828 } 1829 1830 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1831 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1832 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1833 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1834 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1835 1836 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1837 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1838 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1839 1840 #if defined(PETSC_USE_INFO) 1841 { 1842 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1843 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1844 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1845 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1846 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1847 if (diagonal_fill) { 1848 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1849 } 1850 } 1851 #endif 1852 /* put together the new matrix */ 1853 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1854 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1855 b = (Mat_SeqAIJ*)(fact)->data; 1856 b->free_a = PETSC_TRUE; 1857 b->free_ij = PETSC_TRUE; 1858 b->singlemalloc = PETSC_FALSE; 1859 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1860 b->j = bj; 1861 b->i = bi; 1862 b->diag = bdiag; 1863 b->ilen = 0; 1864 b->imax = 0; 1865 b->row = isrow; 1866 b->col = iscol; 1867 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1868 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1869 b->icol = isicol; 1870 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1871 /* In b structure: Free imax, ilen, old a, old j. 1872 Allocate bdiag, solve_work, new a, new j */ 1873 ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1874 b->maxnz = b->nz = bdiag[0]+1; 1875 (fact)->info.factor_mallocs = reallocs; 1876 (fact)->info.fill_ratio_given = f; 1877 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1878 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1879 if (a->inode.size) { 1880 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1881 } 1882 PetscFunctionReturn(0); 1883 } 1884 1885 #undef __FUNCT__ 1886 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1887 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1888 { 1889 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1890 IS isicol; 1891 PetscErrorCode ierr; 1892 const PetscInt *r,*ic; 1893 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1894 PetscInt *bi,*cols,nnz,*cols_lvl; 1895 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1896 PetscInt i,levels,diagonal_fill; 1897 PetscBool col_identity,row_identity; 1898 PetscReal f; 1899 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1900 PetscBT lnkbt; 1901 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1902 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1903 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1904 PetscBool missing; 1905 1906 PetscFunctionBegin; 1907 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1908 f = info->fill; 1909 levels = (PetscInt)info->levels; 1910 diagonal_fill = (PetscInt)info->diagonal_fill; 1911 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1912 1913 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1914 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1915 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1916 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1917 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1918 if (a->inode.size) { 1919 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1920 } 1921 fact->factortype = MAT_FACTOR_ILU; 1922 (fact)->info.factor_mallocs = 0; 1923 (fact)->info.fill_ratio_given = info->fill; 1924 (fact)->info.fill_ratio_needed = 1.0; 1925 b = (Mat_SeqAIJ*)(fact)->data; 1926 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1927 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1928 b->row = isrow; 1929 b->col = iscol; 1930 b->icol = isicol; 1931 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1932 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1933 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1934 PetscFunctionReturn(0); 1935 } 1936 1937 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1938 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1939 1940 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1941 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1942 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1943 bi[0] = bdiag[0] = 0; 1944 1945 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1946 1947 /* create a linked list for storing column indices of the active row */ 1948 nlnk = n + 1; 1949 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1950 1951 /* initial FreeSpace size is f*(ai[n]+1) */ 1952 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1953 current_space = free_space; 1954 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1955 current_space_lvl = free_space_lvl; 1956 1957 for (i=0; i<n; i++) { 1958 nzi = 0; 1959 /* copy current row into linked list */ 1960 nnz = ai[r[i]+1] - ai[r[i]]; 1961 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1962 cols = aj + ai[r[i]]; 1963 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1964 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1965 nzi += nlnk; 1966 1967 /* make sure diagonal entry is included */ 1968 if (diagonal_fill && lnk[i] == -1) { 1969 fm = n; 1970 while (lnk[fm] < i) fm = lnk[fm]; 1971 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1972 lnk[fm] = i; 1973 lnk_lvl[i] = 0; 1974 nzi++; dcount++; 1975 } 1976 1977 /* add pivot rows into the active row */ 1978 nzbd = 0; 1979 prow = lnk[n]; 1980 while (prow < i) { 1981 nnz = bdiag[prow]; 1982 cols = bj_ptr[prow] + nnz + 1; 1983 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1984 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1985 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1986 nzi += nlnk; 1987 prow = lnk[prow]; 1988 nzbd++; 1989 } 1990 bdiag[i] = nzbd; 1991 bi[i+1] = bi[i] + nzi; 1992 1993 /* if free space is not available, make more free space */ 1994 if (current_space->local_remaining<nzi) { 1995 nnz = nzi*(n - i); /* estimated and max additional space needed */ 1996 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1997 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1998 reallocs++; 1999 } 2000 2001 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2002 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2003 bj_ptr[i] = current_space->array; 2004 bjlvl_ptr[i] = current_space_lvl->array; 2005 2006 /* make sure the active row i has diagonal entry */ 2007 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2008 2009 current_space->array += nzi; 2010 current_space->local_used += nzi; 2011 current_space->local_remaining -= nzi; 2012 current_space_lvl->array += nzi; 2013 current_space_lvl->local_used += nzi; 2014 current_space_lvl->local_remaining -= nzi; 2015 } 2016 2017 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2018 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2019 2020 /* destroy list of free space and other temporary arrays */ 2021 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2022 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2023 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2024 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2025 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2026 2027 #if defined(PETSC_USE_INFO) 2028 { 2029 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2030 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2031 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2032 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2033 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2034 if (diagonal_fill) { 2035 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2036 } 2037 } 2038 #endif 2039 2040 /* put together the new matrix */ 2041 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2042 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2043 b = (Mat_SeqAIJ*)(fact)->data; 2044 b->free_a = PETSC_TRUE; 2045 b->free_ij = PETSC_TRUE; 2046 b->singlemalloc = PETSC_FALSE; 2047 ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2048 b->j = bj; 2049 b->i = bi; 2050 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2051 b->diag = bdiag; 2052 b->ilen = 0; 2053 b->imax = 0; 2054 b->row = isrow; 2055 b->col = iscol; 2056 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2057 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2058 b->icol = isicol; 2059 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2060 /* In b structure: Free imax, ilen, old a, old j. 2061 Allocate bdiag, solve_work, new a, new j */ 2062 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2063 b->maxnz = b->nz = bi[n] ; 2064 (fact)->info.factor_mallocs = reallocs; 2065 (fact)->info.fill_ratio_given = f; 2066 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2067 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2068 if (a->inode.size) { 2069 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2070 } 2071 PetscFunctionReturn(0); 2072 } 2073 2074 #undef __FUNCT__ 2075 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2076 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2077 { 2078 Mat C = B; 2079 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2080 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2081 IS ip=b->row,iip = b->icol; 2082 PetscErrorCode ierr; 2083 const PetscInt *rip,*riip; 2084 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2085 PetscInt *ai=a->i,*aj=a->j; 2086 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2087 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2088 PetscBool perm_identity; 2089 FactorShiftCtx sctx; 2090 PetscReal rs; 2091 MatScalar d,*v; 2092 2093 PetscFunctionBegin; 2094 /* MatPivotSetUp(): initialize shift context sctx */ 2095 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2096 2097 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2098 sctx.shift_top = info->zeropivot; 2099 for (i=0; i<mbs; i++) { 2100 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2101 d = (aa)[a->diag[i]]; 2102 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2103 v = aa+ai[i]; 2104 nz = ai[i+1] - ai[i]; 2105 for (j=0; j<nz; j++) 2106 rs += PetscAbsScalar(v[j]); 2107 if (rs>sctx.shift_top) sctx.shift_top = rs; 2108 } 2109 sctx.shift_top *= 1.1; 2110 sctx.nshift_max = 5; 2111 sctx.shift_lo = 0.; 2112 sctx.shift_hi = 1.; 2113 } 2114 2115 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2116 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2117 2118 /* allocate working arrays 2119 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2120 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2121 */ 2122 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 2123 2124 do { 2125 sctx.newshift = PETSC_FALSE; 2126 2127 for (i=0; i<mbs; i++) c2r[i] = mbs; 2128 if (mbs) il[0] = 0; 2129 2130 for (k = 0; k<mbs; k++) { 2131 /* zero rtmp */ 2132 nz = bi[k+1] - bi[k]; 2133 bjtmp = bj + bi[k]; 2134 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2135 2136 /* load in initial unfactored row */ 2137 bval = ba + bi[k]; 2138 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2139 for (j = jmin; j < jmax; j++) { 2140 col = riip[aj[j]]; 2141 if (col >= k) { /* only take upper triangular entry */ 2142 rtmp[col] = aa[j]; 2143 *bval++ = 0.0; /* for in-place factorization */ 2144 } 2145 } 2146 /* shift the diagonal of the matrix: ZeropivotApply() */ 2147 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2148 2149 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2150 dk = rtmp[k]; 2151 i = c2r[k]; /* first row to be added to k_th row */ 2152 2153 while (i < k) { 2154 nexti = c2r[i]; /* next row to be added to k_th row */ 2155 2156 /* compute multiplier, update diag(k) and U(i,k) */ 2157 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2158 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2159 dk += uikdi*ba[ili]; /* update diag[k] */ 2160 ba[ili] = uikdi; /* -U(i,k) */ 2161 2162 /* add multiple of row i to k-th row */ 2163 jmin = ili + 1; jmax = bi[i+1]; 2164 if (jmin < jmax) { 2165 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2166 /* update il and c2r for row i */ 2167 il[i] = jmin; 2168 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2169 } 2170 i = nexti; 2171 } 2172 2173 /* copy data into U(k,:) */ 2174 rs = 0.0; 2175 jmin = bi[k]; jmax = bi[k+1]-1; 2176 if (jmin < jmax) { 2177 for (j=jmin; j<jmax; j++) { 2178 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2179 } 2180 /* add the k-th row into il and c2r */ 2181 il[k] = jmin; 2182 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2183 } 2184 2185 /* MatPivotCheck() */ 2186 sctx.rs = rs; 2187 sctx.pv = dk; 2188 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2189 if (sctx.newshift) break; 2190 dk = sctx.pv; 2191 2192 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2193 } 2194 } while (sctx.newshift); 2195 2196 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2197 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2198 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2199 2200 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2201 if (perm_identity) { 2202 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2203 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2204 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2205 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2206 } else { 2207 B->ops->solve = MatSolve_SeqSBAIJ_1; 2208 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2209 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2210 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2211 } 2212 2213 C->assembled = PETSC_TRUE; 2214 C->preallocated = PETSC_TRUE; 2215 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2216 2217 /* MatPivotView() */ 2218 if (sctx.nshift) { 2219 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2220 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2221 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2222 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2223 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) { 2224 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 2225 } 2226 } 2227 PetscFunctionReturn(0); 2228 } 2229 2230 #undef __FUNCT__ 2231 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2232 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2233 { 2234 Mat C = B; 2235 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2236 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2237 IS ip=b->row,iip = b->icol; 2238 PetscErrorCode ierr; 2239 const PetscInt *rip,*riip; 2240 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2241 PetscInt *ai=a->i,*aj=a->j; 2242 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2243 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2244 PetscBool perm_identity; 2245 FactorShiftCtx sctx; 2246 PetscReal rs; 2247 MatScalar d,*v; 2248 2249 PetscFunctionBegin; 2250 /* MatPivotSetUp(): initialize shift context sctx */ 2251 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2252 2253 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2254 sctx.shift_top = info->zeropivot; 2255 for (i=0; i<mbs; i++) { 2256 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2257 d = (aa)[a->diag[i]]; 2258 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2259 v = aa+ai[i]; 2260 nz = ai[i+1] - ai[i]; 2261 for (j=0; j<nz; j++) 2262 rs += PetscAbsScalar(v[j]); 2263 if (rs>sctx.shift_top) sctx.shift_top = rs; 2264 } 2265 sctx.shift_top *= 1.1; 2266 sctx.nshift_max = 5; 2267 sctx.shift_lo = 0.; 2268 sctx.shift_hi = 1.; 2269 } 2270 2271 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2272 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2273 2274 /* initialization */ 2275 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2276 2277 do { 2278 sctx.newshift = PETSC_FALSE; 2279 2280 for (i=0; i<mbs; i++) jl[i] = mbs; 2281 il[0] = 0; 2282 2283 for (k = 0; k<mbs; k++) { 2284 /* zero rtmp */ 2285 nz = bi[k+1] - bi[k]; 2286 bjtmp = bj + bi[k]; 2287 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2288 2289 bval = ba + bi[k]; 2290 /* initialize k-th row by the perm[k]-th row of A */ 2291 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2292 for (j = jmin; j < jmax; j++) { 2293 col = riip[aj[j]]; 2294 if (col >= k) { /* only take upper triangular entry */ 2295 rtmp[col] = aa[j]; 2296 *bval++ = 0.0; /* for in-place factorization */ 2297 } 2298 } 2299 /* shift the diagonal of the matrix */ 2300 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2301 2302 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2303 dk = rtmp[k]; 2304 i = jl[k]; /* first row to be added to k_th row */ 2305 2306 while (i < k) { 2307 nexti = jl[i]; /* next row to be added to k_th row */ 2308 2309 /* compute multiplier, update diag(k) and U(i,k) */ 2310 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2311 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2312 dk += uikdi*ba[ili]; 2313 ba[ili] = uikdi; /* -U(i,k) */ 2314 2315 /* add multiple of row i to k-th row */ 2316 jmin = ili + 1; jmax = bi[i+1]; 2317 if (jmin < jmax) { 2318 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2319 /* update il and jl for row i */ 2320 il[i] = jmin; 2321 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2322 } 2323 i = nexti; 2324 } 2325 2326 /* shift the diagonals when zero pivot is detected */ 2327 /* compute rs=sum of abs(off-diagonal) */ 2328 rs = 0.0; 2329 jmin = bi[k]+1; 2330 nz = bi[k+1] - jmin; 2331 bcol = bj + jmin; 2332 for (j=0; j<nz; j++) { 2333 rs += PetscAbsScalar(rtmp[bcol[j]]); 2334 } 2335 2336 sctx.rs = rs; 2337 sctx.pv = dk; 2338 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2339 if (sctx.newshift) break; 2340 dk = sctx.pv; 2341 2342 /* copy data into U(k,:) */ 2343 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2344 jmin = bi[k]+1; jmax = bi[k+1]; 2345 if (jmin < jmax) { 2346 for (j=jmin; j<jmax; j++) { 2347 col = bj[j]; ba[j] = rtmp[col]; 2348 } 2349 /* add the k-th row into il and jl */ 2350 il[k] = jmin; 2351 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2352 } 2353 } 2354 } while (sctx.newshift); 2355 2356 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2357 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2358 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2359 2360 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2361 if (perm_identity) { 2362 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2363 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2364 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2365 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2366 } else { 2367 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2368 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2369 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2370 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2371 } 2372 2373 C->assembled = PETSC_TRUE; 2374 C->preallocated = PETSC_TRUE; 2375 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2376 if (sctx.nshift) { 2377 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2378 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2379 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2380 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2381 } 2382 } 2383 PetscFunctionReturn(0); 2384 } 2385 2386 /* 2387 icc() under revised new data structure. 2388 Factored arrays bj and ba are stored as 2389 U(0,:),...,U(i,:),U(n-1,:) 2390 2391 ui=fact->i is an array of size n+1, in which 2392 ui+ 2393 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2394 ui[n]: points to U(n-1,n-1)+1 2395 2396 udiag=fact->diag is an array of size n,in which 2397 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2398 2399 U(i,:) contains udiag[i] as its last entry, i.e., 2400 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2401 */ 2402 2403 #undef __FUNCT__ 2404 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2405 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2406 { 2407 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2408 Mat_SeqSBAIJ *b; 2409 PetscErrorCode ierr; 2410 PetscBool perm_identity,missing; 2411 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2412 const PetscInt *rip,*riip; 2413 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2414 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2415 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2416 PetscReal fill=info->fill,levels=info->levels; 2417 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2418 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2419 PetscBT lnkbt; 2420 IS iperm; 2421 2422 PetscFunctionBegin; 2423 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2424 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2425 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2426 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2427 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2428 2429 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2430 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2431 ui[0] = 0; 2432 2433 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2434 if (!levels && perm_identity) { 2435 for (i=0; i<am; i++) { 2436 ncols = ai[i+1] - a->diag[i]; 2437 ui[i+1] = ui[i] + ncols; 2438 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2439 } 2440 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2441 cols = uj; 2442 for (i=0; i<am; i++) { 2443 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2444 ncols = ai[i+1] - a->diag[i] -1; 2445 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2446 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2447 } 2448 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2449 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2450 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2451 2452 /* initialization */ 2453 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2454 2455 /* jl: linked list for storing indices of the pivot rows 2456 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2457 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2458 for (i=0; i<am; i++) { 2459 jl[i] = am; il[i] = 0; 2460 } 2461 2462 /* create and initialize a linked list for storing column indices of the active row k */ 2463 nlnk = am + 1; 2464 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2465 2466 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2467 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2468 current_space = free_space; 2469 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2470 current_space_lvl = free_space_lvl; 2471 2472 for (k=0; k<am; k++) { /* for each active row k */ 2473 /* initialize lnk by the column indices of row rip[k] of A */ 2474 nzk = 0; 2475 ncols = ai[rip[k]+1] - ai[rip[k]]; 2476 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2477 ncols_upper = 0; 2478 for (j=0; j<ncols; j++) { 2479 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2480 if (riip[i] >= k) { /* only take upper triangular entry */ 2481 ajtmp[ncols_upper] = i; 2482 ncols_upper++; 2483 } 2484 } 2485 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2486 nzk += nlnk; 2487 2488 /* update lnk by computing fill-in for each pivot row to be merged in */ 2489 prow = jl[k]; /* 1st pivot row */ 2490 2491 while (prow < k) { 2492 nextprow = jl[prow]; 2493 2494 /* merge prow into k-th row */ 2495 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2496 jmax = ui[prow+1]; 2497 ncols = jmax-jmin; 2498 i = jmin - ui[prow]; 2499 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2500 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2501 j = *(uj - 1); 2502 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2503 nzk += nlnk; 2504 2505 /* update il and jl for prow */ 2506 if (jmin < jmax) { 2507 il[prow] = jmin; 2508 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2509 } 2510 prow = nextprow; 2511 } 2512 2513 /* if free space is not available, make more free space */ 2514 if (current_space->local_remaining<nzk) { 2515 i = am - k + 1; /* num of unfactored rows */ 2516 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2517 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2518 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2519 reallocs++; 2520 } 2521 2522 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2523 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2524 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2525 2526 /* add the k-th row into il and jl */ 2527 if (nzk > 1) { 2528 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2529 jl[k] = jl[i]; jl[i] = k; 2530 il[k] = ui[k] + 1; 2531 } 2532 uj_ptr[k] = current_space->array; 2533 uj_lvl_ptr[k] = current_space_lvl->array; 2534 2535 current_space->array += nzk; 2536 current_space->local_used += nzk; 2537 current_space->local_remaining -= nzk; 2538 2539 current_space_lvl->array += nzk; 2540 current_space_lvl->local_used += nzk; 2541 current_space_lvl->local_remaining -= nzk; 2542 2543 ui[k+1] = ui[k] + nzk; 2544 } 2545 2546 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2547 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2548 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2549 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2550 2551 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2552 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2553 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2554 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2555 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2556 2557 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2558 2559 /* put together the new matrix in MATSEQSBAIJ format */ 2560 b = (Mat_SeqSBAIJ*)(fact)->data; 2561 b->singlemalloc = PETSC_FALSE; 2562 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2563 b->j = uj; 2564 b->i = ui; 2565 b->diag = udiag; 2566 b->free_diag = PETSC_TRUE; 2567 b->ilen = 0; 2568 b->imax = 0; 2569 b->row = perm; 2570 b->col = perm; 2571 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2572 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2573 b->icol = iperm; 2574 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2575 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2576 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2577 b->maxnz = b->nz = ui[am]; 2578 b->free_a = PETSC_TRUE; 2579 b->free_ij = PETSC_TRUE; 2580 2581 fact->info.factor_mallocs = reallocs; 2582 fact->info.fill_ratio_given = fill; 2583 if (ai[am] != 0) { 2584 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2585 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2586 } else { 2587 fact->info.fill_ratio_needed = 0.0; 2588 } 2589 #if defined(PETSC_USE_INFO) 2590 if (ai[am] != 0) { 2591 PetscReal af = fact->info.fill_ratio_needed; 2592 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2593 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2594 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2595 } else { 2596 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2597 } 2598 #endif 2599 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2600 PetscFunctionReturn(0); 2601 } 2602 2603 #undef __FUNCT__ 2604 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2605 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2606 { 2607 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2608 Mat_SeqSBAIJ *b; 2609 PetscErrorCode ierr; 2610 PetscBool perm_identity,missing; 2611 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2612 const PetscInt *rip,*riip; 2613 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2614 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2615 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2616 PetscReal fill=info->fill,levels=info->levels; 2617 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2618 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2619 PetscBT lnkbt; 2620 IS iperm; 2621 2622 PetscFunctionBegin; 2623 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2624 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2625 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2626 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2627 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2628 2629 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2630 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2631 ui[0] = 0; 2632 2633 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2634 if (!levels && perm_identity) { 2635 2636 for (i=0; i<am; i++) { 2637 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2638 udiag[i] = ui[i]; 2639 } 2640 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2641 cols = uj; 2642 for (i=0; i<am; i++) { 2643 aj = a->j + a->diag[i]; 2644 ncols = ui[i+1] - ui[i]; 2645 for (j=0; j<ncols; j++) *cols++ = *aj++; 2646 } 2647 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2648 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2649 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2650 2651 /* initialization */ 2652 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2653 2654 /* jl: linked list for storing indices of the pivot rows 2655 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2656 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2657 for (i=0; i<am; i++) { 2658 jl[i] = am; il[i] = 0; 2659 } 2660 2661 /* create and initialize a linked list for storing column indices of the active row k */ 2662 nlnk = am + 1; 2663 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2664 2665 /* initial FreeSpace size is fill*(ai[am]+1) */ 2666 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2667 current_space = free_space; 2668 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2669 current_space_lvl = free_space_lvl; 2670 2671 for (k=0; k<am; k++) { /* for each active row k */ 2672 /* initialize lnk by the column indices of row rip[k] of A */ 2673 nzk = 0; 2674 ncols = ai[rip[k]+1] - ai[rip[k]]; 2675 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2676 ncols_upper = 0; 2677 for (j=0; j<ncols; j++) { 2678 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2679 if (riip[i] >= k) { /* only take upper triangular entry */ 2680 ajtmp[ncols_upper] = i; 2681 ncols_upper++; 2682 } 2683 } 2684 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2685 nzk += nlnk; 2686 2687 /* update lnk by computing fill-in for each pivot row to be merged in */ 2688 prow = jl[k]; /* 1st pivot row */ 2689 2690 while (prow < k) { 2691 nextprow = jl[prow]; 2692 2693 /* merge prow into k-th row */ 2694 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2695 jmax = ui[prow+1]; 2696 ncols = jmax-jmin; 2697 i = jmin - ui[prow]; 2698 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2699 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2700 j = *(uj - 1); 2701 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2702 nzk += nlnk; 2703 2704 /* update il and jl for prow */ 2705 if (jmin < jmax) { 2706 il[prow] = jmin; 2707 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2708 } 2709 prow = nextprow; 2710 } 2711 2712 /* if free space is not available, make more free space */ 2713 if (current_space->local_remaining<nzk) { 2714 i = am - k + 1; /* num of unfactored rows */ 2715 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2716 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2717 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2718 reallocs++; 2719 } 2720 2721 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2722 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2723 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2724 2725 /* add the k-th row into il and jl */ 2726 if (nzk > 1) { 2727 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2728 jl[k] = jl[i]; jl[i] = k; 2729 il[k] = ui[k] + 1; 2730 } 2731 uj_ptr[k] = current_space->array; 2732 uj_lvl_ptr[k] = current_space_lvl->array; 2733 2734 current_space->array += nzk; 2735 current_space->local_used += nzk; 2736 current_space->local_remaining -= nzk; 2737 2738 current_space_lvl->array += nzk; 2739 current_space_lvl->local_used += nzk; 2740 current_space_lvl->local_remaining -= nzk; 2741 2742 ui[k+1] = ui[k] + nzk; 2743 } 2744 2745 #if defined(PETSC_USE_INFO) 2746 if (ai[am] != 0) { 2747 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2748 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2749 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2750 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2751 } else { 2752 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2753 } 2754 #endif 2755 2756 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2757 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2758 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2759 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2760 2761 /* destroy list of free space and other temporary array(s) */ 2762 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2763 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2764 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2765 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2766 2767 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2768 2769 /* put together the new matrix in MATSEQSBAIJ format */ 2770 2771 b = (Mat_SeqSBAIJ*)fact->data; 2772 b->singlemalloc = PETSC_FALSE; 2773 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2774 b->j = uj; 2775 b->i = ui; 2776 b->diag = udiag; 2777 b->free_diag = PETSC_TRUE; 2778 b->ilen = 0; 2779 b->imax = 0; 2780 b->row = perm; 2781 b->col = perm; 2782 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2783 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2784 b->icol = iperm; 2785 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2786 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2787 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2788 b->maxnz = b->nz = ui[am]; 2789 b->free_a = PETSC_TRUE; 2790 b->free_ij = PETSC_TRUE; 2791 2792 fact->info.factor_mallocs = reallocs; 2793 fact->info.fill_ratio_given = fill; 2794 if (ai[am] != 0) { 2795 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2796 } else { 2797 fact->info.fill_ratio_needed = 0.0; 2798 } 2799 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2800 PetscFunctionReturn(0); 2801 } 2802 2803 #undef __FUNCT__ 2804 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2805 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2806 { 2807 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2808 Mat_SeqSBAIJ *b; 2809 PetscErrorCode ierr; 2810 PetscBool perm_identity; 2811 PetscReal fill = info->fill; 2812 const PetscInt *rip,*riip; 2813 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2814 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2815 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2816 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2817 PetscBT lnkbt; 2818 IS iperm; 2819 2820 PetscFunctionBegin; 2821 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2822 /* check whether perm is the identity mapping */ 2823 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2824 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2825 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2826 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2827 2828 /* initialization */ 2829 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2830 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2831 ui[0] = 0; 2832 2833 /* jl: linked list for storing indices of the pivot rows 2834 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2835 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2836 for (i=0; i<am; i++) { 2837 jl[i] = am; il[i] = 0; 2838 } 2839 2840 /* create and initialize a linked list for storing column indices of the active row k */ 2841 nlnk = am + 1; 2842 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2843 2844 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2845 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2846 current_space = free_space; 2847 2848 for (k=0; k<am; k++) { /* for each active row k */ 2849 /* initialize lnk by the column indices of row rip[k] of A */ 2850 nzk = 0; 2851 ncols = ai[rip[k]+1] - ai[rip[k]]; 2852 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2853 ncols_upper = 0; 2854 for (j=0; j<ncols; j++) { 2855 i = riip[*(aj + ai[rip[k]] + j)]; 2856 if (i >= k) { /* only take upper triangular entry */ 2857 cols[ncols_upper] = i; 2858 ncols_upper++; 2859 } 2860 } 2861 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2862 nzk += nlnk; 2863 2864 /* update lnk by computing fill-in for each pivot row to be merged in */ 2865 prow = jl[k]; /* 1st pivot row */ 2866 2867 while (prow < k) { 2868 nextprow = jl[prow]; 2869 /* merge prow into k-th row */ 2870 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2871 jmax = ui[prow+1]; 2872 ncols = jmax-jmin; 2873 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2874 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2875 nzk += nlnk; 2876 2877 /* update il and jl for prow */ 2878 if (jmin < jmax) { 2879 il[prow] = jmin; 2880 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2881 } 2882 prow = nextprow; 2883 } 2884 2885 /* if free space is not available, make more free space */ 2886 if (current_space->local_remaining<nzk) { 2887 i = am - k + 1; /* num of unfactored rows */ 2888 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2889 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2890 reallocs++; 2891 } 2892 2893 /* copy data into free space, then initialize lnk */ 2894 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2895 2896 /* add the k-th row into il and jl */ 2897 if (nzk > 1) { 2898 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2899 jl[k] = jl[i]; jl[i] = k; 2900 il[k] = ui[k] + 1; 2901 } 2902 ui_ptr[k] = current_space->array; 2903 current_space->array += nzk; 2904 current_space->local_used += nzk; 2905 current_space->local_remaining -= nzk; 2906 2907 ui[k+1] = ui[k] + nzk; 2908 } 2909 2910 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2911 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2912 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2913 2914 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2915 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2916 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2917 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2918 2919 /* put together the new matrix in MATSEQSBAIJ format */ 2920 2921 b = (Mat_SeqSBAIJ*)fact->data; 2922 b->singlemalloc = PETSC_FALSE; 2923 b->free_a = PETSC_TRUE; 2924 b->free_ij = PETSC_TRUE; 2925 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2926 b->j = uj; 2927 b->i = ui; 2928 b->diag = udiag; 2929 b->free_diag = PETSC_TRUE; 2930 b->ilen = 0; 2931 b->imax = 0; 2932 b->row = perm; 2933 b->col = perm; 2934 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2935 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2936 b->icol = iperm; 2937 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2938 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2939 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2940 b->maxnz = b->nz = ui[am]; 2941 2942 fact->info.factor_mallocs = reallocs; 2943 fact->info.fill_ratio_given = fill; 2944 if (ai[am] != 0) { 2945 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2946 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2947 } else { 2948 fact->info.fill_ratio_needed = 0.0; 2949 } 2950 #if defined(PETSC_USE_INFO) 2951 if (ai[am] != 0) { 2952 PetscReal af = fact->info.fill_ratio_needed; 2953 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2954 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2955 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2956 } else { 2957 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2958 } 2959 #endif 2960 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2961 PetscFunctionReturn(0); 2962 } 2963 2964 #undef __FUNCT__ 2965 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2966 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2967 { 2968 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2969 Mat_SeqSBAIJ *b; 2970 PetscErrorCode ierr; 2971 PetscBool perm_identity; 2972 PetscReal fill = info->fill; 2973 const PetscInt *rip,*riip; 2974 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2975 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2976 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2977 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2978 PetscBT lnkbt; 2979 IS iperm; 2980 2981 PetscFunctionBegin; 2982 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2983 /* check whether perm is the identity mapping */ 2984 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2985 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2986 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2987 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2988 2989 /* initialization */ 2990 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2991 ui[0] = 0; 2992 2993 /* jl: linked list for storing indices of the pivot rows 2994 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2995 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2996 for (i=0; i<am; i++) { 2997 jl[i] = am; il[i] = 0; 2998 } 2999 3000 /* create and initialize a linked list for storing column indices of the active row k */ 3001 nlnk = am + 1; 3002 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3003 3004 /* initial FreeSpace size is fill*(ai[am]+1) */ 3005 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3006 current_space = free_space; 3007 3008 for (k=0; k<am; k++) { /* for each active row k */ 3009 /* initialize lnk by the column indices of row rip[k] of A */ 3010 nzk = 0; 3011 ncols = ai[rip[k]+1] - ai[rip[k]]; 3012 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3013 ncols_upper = 0; 3014 for (j=0; j<ncols; j++) { 3015 i = riip[*(aj + ai[rip[k]] + j)]; 3016 if (i >= k) { /* only take upper triangular entry */ 3017 cols[ncols_upper] = i; 3018 ncols_upper++; 3019 } 3020 } 3021 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3022 nzk += nlnk; 3023 3024 /* update lnk by computing fill-in for each pivot row to be merged in */ 3025 prow = jl[k]; /* 1st pivot row */ 3026 3027 while (prow < k) { 3028 nextprow = jl[prow]; 3029 /* merge prow into k-th row */ 3030 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3031 jmax = ui[prow+1]; 3032 ncols = jmax-jmin; 3033 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3034 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3035 nzk += nlnk; 3036 3037 /* update il and jl for prow */ 3038 if (jmin < jmax) { 3039 il[prow] = jmin; 3040 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3041 } 3042 prow = nextprow; 3043 } 3044 3045 /* if free space is not available, make more free space */ 3046 if (current_space->local_remaining<nzk) { 3047 i = am - k + 1; /* num of unfactored rows */ 3048 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3049 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3050 reallocs++; 3051 } 3052 3053 /* copy data into free space, then initialize lnk */ 3054 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3055 3056 /* add the k-th row into il and jl */ 3057 if (nzk-1 > 0) { 3058 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3059 jl[k] = jl[i]; jl[i] = k; 3060 il[k] = ui[k] + 1; 3061 } 3062 ui_ptr[k] = current_space->array; 3063 current_space->array += nzk; 3064 current_space->local_used += nzk; 3065 current_space->local_remaining -= nzk; 3066 3067 ui[k+1] = ui[k] + nzk; 3068 } 3069 3070 #if defined(PETSC_USE_INFO) 3071 if (ai[am] != 0) { 3072 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3073 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3074 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3075 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3076 } else { 3077 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3078 } 3079 #endif 3080 3081 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3082 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3083 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3084 3085 /* destroy list of free space and other temporary array(s) */ 3086 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3087 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3088 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3089 3090 /* put together the new matrix in MATSEQSBAIJ format */ 3091 3092 b = (Mat_SeqSBAIJ*)fact->data; 3093 b->singlemalloc = PETSC_FALSE; 3094 b->free_a = PETSC_TRUE; 3095 b->free_ij = PETSC_TRUE; 3096 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3097 b->j = uj; 3098 b->i = ui; 3099 b->diag = 0; 3100 b->ilen = 0; 3101 b->imax = 0; 3102 b->row = perm; 3103 b->col = perm; 3104 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3105 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3106 b->icol = iperm; 3107 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3108 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3109 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3110 b->maxnz = b->nz = ui[am]; 3111 3112 fact->info.factor_mallocs = reallocs; 3113 fact->info.fill_ratio_given = fill; 3114 if (ai[am] != 0) { 3115 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3116 } else { 3117 fact->info.fill_ratio_needed = 0.0; 3118 } 3119 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3120 PetscFunctionReturn(0); 3121 } 3122 3123 #undef __FUNCT__ 3124 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3125 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3126 { 3127 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3128 PetscErrorCode ierr; 3129 PetscInt n = A->rmap->n; 3130 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3131 PetscScalar *x,sum; 3132 const PetscScalar *b; 3133 const MatScalar *aa = a->a,*v; 3134 PetscInt i,nz; 3135 3136 PetscFunctionBegin; 3137 if (!n) PetscFunctionReturn(0); 3138 3139 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3140 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3141 3142 /* forward solve the lower triangular */ 3143 x[0] = b[0]; 3144 v = aa; 3145 vi = aj; 3146 for (i=1; i<n; i++) { 3147 nz = ai[i+1] - ai[i]; 3148 sum = b[i]; 3149 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3150 v += nz; 3151 vi += nz; 3152 x[i] = sum; 3153 } 3154 3155 /* backward solve the upper triangular */ 3156 for (i=n-1; i>=0; i--) { 3157 v = aa + adiag[i+1] + 1; 3158 vi = aj + adiag[i+1] + 1; 3159 nz = adiag[i] - adiag[i+1]-1; 3160 sum = x[i]; 3161 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3162 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3163 } 3164 3165 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3166 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3167 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3168 PetscFunctionReturn(0); 3169 } 3170 3171 #undef __FUNCT__ 3172 #define __FUNCT__ "MatSolve_SeqAIJ" 3173 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3174 { 3175 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3176 IS iscol = a->col,isrow = a->row; 3177 PetscErrorCode ierr; 3178 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3179 const PetscInt *rout,*cout,*r,*c; 3180 PetscScalar *x,*tmp,sum; 3181 const PetscScalar *b; 3182 const MatScalar *aa = a->a,*v; 3183 3184 PetscFunctionBegin; 3185 if (!n) PetscFunctionReturn(0); 3186 3187 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3188 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3189 tmp = a->solve_work; 3190 3191 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3192 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3193 3194 /* forward solve the lower triangular */ 3195 tmp[0] = b[r[0]]; 3196 v = aa; 3197 vi = aj; 3198 for (i=1; i<n; i++) { 3199 nz = ai[i+1] - ai[i]; 3200 sum = b[r[i]]; 3201 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3202 tmp[i] = sum; 3203 v += nz; vi += nz; 3204 } 3205 3206 /* backward solve the upper triangular */ 3207 for (i=n-1; i>=0; i--) { 3208 v = aa + adiag[i+1]+1; 3209 vi = aj + adiag[i+1]+1; 3210 nz = adiag[i]-adiag[i+1]-1; 3211 sum = tmp[i]; 3212 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3213 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3214 } 3215 3216 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3217 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3218 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3219 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3220 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3221 PetscFunctionReturn(0); 3222 } 3223 3224 #undef __FUNCT__ 3225 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3226 /* 3227 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3228 */ 3229 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3230 { 3231 Mat B = *fact; 3232 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3233 IS isicol; 3234 PetscErrorCode ierr; 3235 const PetscInt *r,*ic; 3236 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3237 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3238 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3239 PetscInt nlnk,*lnk; 3240 PetscBT lnkbt; 3241 PetscBool row_identity,icol_identity; 3242 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3243 const PetscInt *ics; 3244 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3245 PetscReal dt=info->dt,shift=info->shiftamount; 3246 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3247 PetscBool missing; 3248 3249 PetscFunctionBegin; 3250 if (dt == PETSC_DEFAULT) dt = 0.005; 3251 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3252 3253 /* ------- symbolic factorization, can be reused ---------*/ 3254 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3255 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3256 adiag=a->diag; 3257 3258 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3259 3260 /* bdiag is location of diagonal in factor */ 3261 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3262 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3263 3264 /* allocate row pointers bi */ 3265 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3266 3267 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3268 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3269 nnz_max = ai[n]+2*n*dtcount+2; 3270 3271 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3272 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3273 3274 /* put together the new matrix */ 3275 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3276 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3277 b = (Mat_SeqAIJ*)B->data; 3278 b->free_a = PETSC_TRUE; 3279 b->free_ij = PETSC_TRUE; 3280 b->singlemalloc = PETSC_FALSE; 3281 b->a = ba; 3282 b->j = bj; 3283 b->i = bi; 3284 b->diag = bdiag; 3285 b->ilen = 0; 3286 b->imax = 0; 3287 b->row = isrow; 3288 b->col = iscol; 3289 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3290 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3291 b->icol = isicol; 3292 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3293 3294 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3295 b->maxnz = nnz_max; 3296 3297 B->factortype = MAT_FACTOR_ILUDT; 3298 B->info.factor_mallocs = 0; 3299 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3300 CHKMEMQ; 3301 /* ------- end of symbolic factorization ---------*/ 3302 3303 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3304 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3305 ics = ic; 3306 3307 /* linked list for storing column indices of the active row */ 3308 nlnk = n + 1; 3309 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3310 3311 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3312 ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 3313 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3314 ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 3315 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3316 3317 bi[0] = 0; 3318 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3319 bdiag_rev[n] = bdiag[0]; 3320 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3321 for (i=0; i<n; i++) { 3322 /* copy initial fill into linked list */ 3323 nzi = ai[r[i]+1] - ai[r[i]]; 3324 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3325 nzi_al = adiag[r[i]] - ai[r[i]]; 3326 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3327 ajtmp = aj + ai[r[i]]; 3328 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3329 3330 /* load in initial (unfactored row) */ 3331 aatmp = a->a + ai[r[i]]; 3332 for (j=0; j<nzi; j++) { 3333 rtmp[ics[*ajtmp++]] = *aatmp++; 3334 } 3335 3336 /* add pivot rows into linked list */ 3337 row = lnk[n]; 3338 while (row < i) { 3339 nzi_bl = bi[row+1] - bi[row] + 1; 3340 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3341 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3342 nzi += nlnk; 3343 row = lnk[row]; 3344 } 3345 3346 /* copy data from lnk into jtmp, then initialize lnk */ 3347 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3348 3349 /* numerical factorization */ 3350 bjtmp = jtmp; 3351 row = *bjtmp++; /* 1st pivot row */ 3352 while (row < i) { 3353 pc = rtmp + row; 3354 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3355 multiplier = (*pc) * (*pv); 3356 *pc = multiplier; 3357 if (PetscAbsScalar(*pc) > dt) { /* apply tolerance dropping rule */ 3358 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3359 pv = ba + bdiag[row+1] + 1; 3360 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3361 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3362 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3363 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3364 } 3365 row = *bjtmp++; 3366 } 3367 3368 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3369 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3370 nzi_bl = 0; j = 0; 3371 while (jtmp[j] < i) { /* Note: jtmp is sorted */ 3372 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3373 nzi_bl++; j++; 3374 } 3375 nzi_bu = nzi - nzi_bl -1; 3376 while (j < nzi) { 3377 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3378 j++; 3379 } 3380 3381 bjtmp = bj + bi[i]; 3382 batmp = ba + bi[i]; 3383 /* apply level dropping rule to L part */ 3384 ncut = nzi_al + dtcount; 3385 if (ncut < nzi_bl) { 3386 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3387 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3388 } else { 3389 ncut = nzi_bl; 3390 } 3391 for (j=0; j<ncut; j++) { 3392 bjtmp[j] = jtmp[j]; 3393 batmp[j] = vtmp[j]; 3394 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3395 } 3396 bi[i+1] = bi[i] + ncut; 3397 nzi = ncut + 1; 3398 3399 /* apply level dropping rule to U part */ 3400 ncut = nzi_au + dtcount; 3401 if (ncut < nzi_bu) { 3402 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3403 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3404 } else { 3405 ncut = nzi_bu; 3406 } 3407 nzi += ncut; 3408 3409 /* mark bdiagonal */ 3410 bdiag[i+1] = bdiag[i] - (ncut + 1); 3411 bdiag_rev[n-i-1] = bdiag[i+1]; 3412 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3413 bjtmp = bj + bdiag[i]; 3414 batmp = ba + bdiag[i]; 3415 *bjtmp = i; 3416 *batmp = diag_tmp; /* rtmp[i]; */ 3417 if (*batmp == 0.0) { 3418 *batmp = dt+shift; 3419 /* printf(" row %d add shift %g\n",i,shift); */ 3420 } 3421 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3422 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3423 3424 bjtmp = bj + bdiag[i+1]+1; 3425 batmp = ba + bdiag[i+1]+1; 3426 for (k=0; k<ncut; k++) { 3427 bjtmp[k] = jtmp[nzi_bl+1+k]; 3428 batmp[k] = vtmp[nzi_bl+1+k]; 3429 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3430 } 3431 /* printf("\n"); */ 3432 3433 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3434 /* 3435 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3436 printf(" ----------------------------\n"); 3437 */ 3438 } /* for (i=0; i<n; i++) */ 3439 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3440 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3441 3442 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3443 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3444 3445 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3446 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3447 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3448 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3449 3450 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3451 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3452 3453 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3454 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3455 if (row_identity && icol_identity) { 3456 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3457 } else { 3458 B->ops->solve = MatSolve_SeqAIJ; 3459 } 3460 3461 B->ops->solveadd = 0; 3462 B->ops->solvetranspose = 0; 3463 B->ops->solvetransposeadd = 0; 3464 B->ops->matsolve = 0; 3465 B->assembled = PETSC_TRUE; 3466 B->preallocated = PETSC_TRUE; 3467 PetscFunctionReturn(0); 3468 } 3469 3470 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3471 #undef __FUNCT__ 3472 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3473 /* 3474 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3475 */ 3476 3477 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3478 { 3479 PetscErrorCode ierr; 3480 3481 PetscFunctionBegin; 3482 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3483 PetscFunctionReturn(0); 3484 } 3485 3486 /* 3487 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3488 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3489 */ 3490 #undef __FUNCT__ 3491 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3492 /* 3493 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3494 */ 3495 3496 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3497 { 3498 Mat C=fact; 3499 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 3500 IS isrow = b->row,isicol = b->icol; 3501 PetscErrorCode ierr; 3502 const PetscInt *r,*ic,*ics; 3503 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3504 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3505 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3506 PetscReal dt=info->dt,shift=info->shiftamount; 3507 PetscBool row_identity, col_identity; 3508 3509 PetscFunctionBegin; 3510 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3511 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3512 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3513 ics = ic; 3514 3515 for (i=0; i<n; i++) { 3516 /* initialize rtmp array */ 3517 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3518 bjtmp = bj + bi[i]; 3519 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3520 rtmp[i] = 0.0; 3521 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3522 bjtmp = bj + bdiag[i+1] + 1; 3523 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3524 3525 /* load in initial unfactored row of A */ 3526 /* printf("row %d\n",i); */ 3527 nz = ai[r[i]+1] - ai[r[i]]; 3528 ajtmp = aj + ai[r[i]]; 3529 v = aa + ai[r[i]]; 3530 for (j=0; j<nz; j++) { 3531 rtmp[ics[*ajtmp++]] = v[j]; 3532 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3533 } 3534 /* printf("\n"); */ 3535 3536 /* numerical factorization */ 3537 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3538 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3539 k = 0; 3540 while (k < nzl) { 3541 row = *bjtmp++; 3542 /* printf(" prow %d\n",row); */ 3543 pc = rtmp + row; 3544 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3545 multiplier = (*pc) * (*pv); 3546 *pc = multiplier; 3547 if (PetscAbsScalar(multiplier) > dt) { 3548 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3549 pv = b->a + bdiag[row+1] + 1; 3550 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3551 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3552 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3553 } 3554 k++; 3555 } 3556 3557 /* finished row so stick it into b->a */ 3558 /* L-part */ 3559 pv = b->a + bi[i] ; 3560 pj = bj + bi[i] ; 3561 nzl = bi[i+1] - bi[i]; 3562 for (j=0; j<nzl; j++) { 3563 pv[j] = rtmp[pj[j]]; 3564 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3565 } 3566 3567 /* diagonal: invert diagonal entries for simplier triangular solves */ 3568 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3569 b->a[bdiag[i]] = 1.0/rtmp[i]; 3570 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3571 3572 /* U-part */ 3573 pv = b->a + bdiag[i+1] + 1; 3574 pj = bj + bdiag[i+1] + 1; 3575 nzu = bdiag[i] - bdiag[i+1] - 1; 3576 for (j=0; j<nzu; j++) { 3577 pv[j] = rtmp[pj[j]]; 3578 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3579 } 3580 /* printf("\n"); */ 3581 } 3582 3583 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3584 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3585 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3586 3587 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3588 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3589 if (row_identity && col_identity) { 3590 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3591 } else { 3592 C->ops->solve = MatSolve_SeqAIJ; 3593 } 3594 C->ops->solveadd = 0; 3595 C->ops->solvetranspose = 0; 3596 C->ops->solvetransposeadd = 0; 3597 C->ops->matsolve = 0; 3598 C->assembled = PETSC_TRUE; 3599 C->preallocated = PETSC_TRUE; 3600 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3601 PetscFunctionReturn(0); 3602 } 3603