1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 EXTERN_C_BEGIN 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 10 /* 11 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 12 13 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 14 */ 15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 16 { 17 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 20 const PetscInt *ai = a->i, *aj = a->j; 21 const PetscScalar *aa = a->a; 22 PetscBool *done; 23 PetscReal best,past = 0,future; 24 25 PetscFunctionBegin; 26 /* pick initial row */ 27 best = -1; 28 for (i=0; i<n; i++) { 29 future = 0.0; 30 for (j=ai[i]; j<ai[i+1]; j++) { 31 if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 EXTERN_C_END 94 95 EXTERN_C_BEGIN 96 #undef __FUNCT__ 97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 99 { 100 PetscFunctionBegin; 101 *flg = PETSC_TRUE; 102 PetscFunctionReturn(0); 103 } 104 EXTERN_C_END 105 106 EXTERN_C_BEGIN 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 110 { 111 PetscInt n = A->rmap->n; 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 #if defined(PETSC_USE_COMPLEX) 116 if (A->hermitian && (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC))SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian Factor is not supported"); 117 #endif 118 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 119 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 120 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 121 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 122 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 123 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 124 ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 125 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 126 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 127 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 128 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 129 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 130 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 131 (*B)->factortype = ftype; 132 PetscFunctionReturn(0); 133 } 134 EXTERN_C_END 135 136 #undef __FUNCT__ 137 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 138 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 139 { 140 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 141 IS isicol; 142 PetscErrorCode ierr; 143 const PetscInt *r,*ic; 144 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 145 PetscInt *bi,*bj,*ajtmp; 146 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 147 PetscReal f; 148 PetscInt nlnk,*lnk,k,**bi_ptr; 149 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 150 PetscBT lnkbt; 151 152 PetscFunctionBegin; 153 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 154 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 155 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 156 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 157 158 /* get new row pointers */ 159 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 160 bi[0] = 0; 161 162 /* bdiag is location of diagonal in factor */ 163 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 164 bdiag[0] = 0; 165 166 /* linked list for storing column indices of the active row */ 167 nlnk = n + 1; 168 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 169 170 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 171 172 /* initial FreeSpace size is f*(ai[n]+1) */ 173 f = info->fill; 174 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 175 current_space = free_space; 176 177 for (i=0; i<n; i++) { 178 /* copy previous fill into linked list */ 179 nzi = 0; 180 nnz = ai[r[i]+1] - ai[r[i]]; 181 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 182 ajtmp = aj + ai[r[i]]; 183 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 184 nzi += nlnk; 185 186 /* add pivot rows into linked list */ 187 row = lnk[n]; 188 while (row < i) { 189 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 190 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 191 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 192 nzi += nlnk; 193 row = lnk[row]; 194 } 195 bi[i+1] = bi[i] + nzi; 196 im[i] = nzi; 197 198 /* mark bdiag */ 199 nzbd = 0; 200 nnz = nzi; 201 k = lnk[n]; 202 while (nnz-- && k < i){ 203 nzbd++; 204 k = lnk[k]; 205 } 206 bdiag[i] = bi[i] + nzbd; 207 208 /* if free space is not available, make more free space */ 209 if (current_space->local_remaining<nzi) { 210 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 211 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 212 reallocs++; 213 } 214 215 /* copy data into free space, then initialize lnk */ 216 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 217 bi_ptr[i] = current_space->array; 218 current_space->array += nzi; 219 current_space->local_used += nzi; 220 current_space->local_remaining -= nzi; 221 } 222 #if defined(PETSC_USE_INFO) 223 if (ai[n] != 0) { 224 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 225 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 226 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 227 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 228 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 229 } else { 230 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 231 } 232 #endif 233 234 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 235 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 236 237 /* destroy list of free space and other temporary array(s) */ 238 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 239 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 240 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 241 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 242 243 /* put together the new matrix */ 244 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 245 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 246 b = (Mat_SeqAIJ*)(B)->data; 247 b->free_a = PETSC_TRUE; 248 b->free_ij = PETSC_TRUE; 249 b->singlemalloc = PETSC_FALSE; 250 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 251 b->j = bj; 252 b->i = bi; 253 b->diag = bdiag; 254 b->ilen = 0; 255 b->imax = 0; 256 b->row = isrow; 257 b->col = iscol; 258 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 259 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 260 b->icol = isicol; 261 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 262 263 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 264 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 265 b->maxnz = b->nz = bi[n] ; 266 267 (B)->factortype = MAT_FACTOR_LU; 268 (B)->info.factor_mallocs = reallocs; 269 (B)->info.fill_ratio_given = f; 270 271 if (ai[n]) { 272 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 273 } else { 274 (B)->info.fill_ratio_needed = 0.0; 275 } 276 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 277 if (a->inode.size) { 278 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 279 } 280 PetscFunctionReturn(0); 281 } 282 283 #undef __FUNCT__ 284 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 285 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 286 { 287 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 288 IS isicol; 289 PetscErrorCode ierr; 290 const PetscInt *r,*ic; 291 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 292 PetscInt *bi,*bj,*ajtmp; 293 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 294 PetscReal f; 295 PetscInt nlnk,*lnk,k,**bi_ptr; 296 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 297 PetscBT lnkbt; 298 299 PetscFunctionBegin; 300 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 301 /* 302 PetscBool olddatastruct=PETSC_FALSE; 303 ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 304 if(olddatastruct){ 305 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 306 PetscFunctionReturn(0); 307 } 308 */ 309 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 310 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 311 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 312 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 313 314 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 315 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 316 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 317 bi[0] = bdiag[0] = 0; 318 319 /* linked list for storing column indices of the active row */ 320 nlnk = n + 1; 321 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 322 323 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 324 325 /* initial FreeSpace size is f*(ai[n]+1) */ 326 f = info->fill; 327 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 328 current_space = free_space; 329 330 for (i=0; i<n; i++) { 331 /* copy previous fill into linked list */ 332 nzi = 0; 333 nnz = ai[r[i]+1] - ai[r[i]]; 334 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 335 ajtmp = aj + ai[r[i]]; 336 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 337 nzi += nlnk; 338 339 /* add pivot rows into linked list */ 340 row = lnk[n]; 341 while (row < i){ 342 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 343 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 344 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 345 nzi += nlnk; 346 row = lnk[row]; 347 } 348 bi[i+1] = bi[i] + nzi; 349 im[i] = nzi; 350 351 /* mark bdiag */ 352 nzbd = 0; 353 nnz = nzi; 354 k = lnk[n]; 355 while (nnz-- && k < i){ 356 nzbd++; 357 k = lnk[k]; 358 } 359 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 360 361 /* if free space is not available, make more free space */ 362 if (current_space->local_remaining<nzi) { 363 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 364 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 365 reallocs++; 366 } 367 368 /* copy data into free space, then initialize lnk */ 369 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 370 bi_ptr[i] = current_space->array; 371 current_space->array += nzi; 372 current_space->local_used += nzi; 373 current_space->local_remaining -= nzi; 374 } 375 376 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 377 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 378 379 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 380 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 381 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 382 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 383 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 384 385 /* put together the new matrix */ 386 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 387 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 388 b = (Mat_SeqAIJ*)(B)->data; 389 b->free_a = PETSC_TRUE; 390 b->free_ij = PETSC_TRUE; 391 b->singlemalloc = PETSC_FALSE; 392 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 393 b->j = bj; 394 b->i = bi; 395 b->diag = bdiag; 396 b->ilen = 0; 397 b->imax = 0; 398 b->row = isrow; 399 b->col = iscol; 400 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 401 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 402 b->icol = isicol; 403 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 404 405 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 406 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 407 b->maxnz = b->nz = bdiag[0]+1; 408 B->factortype = MAT_FACTOR_LU; 409 B->info.factor_mallocs = reallocs; 410 B->info.fill_ratio_given = f; 411 412 if (ai[n]) { 413 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 414 } else { 415 B->info.fill_ratio_needed = 0.0; 416 } 417 #if defined(PETSC_USE_INFO) 418 if (ai[n] != 0) { 419 PetscReal af = B->info.fill_ratio_needed; 420 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 421 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 422 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 423 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 424 } else { 425 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 426 } 427 #endif 428 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 429 if (a->inode.size) { 430 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 431 } 432 PetscFunctionReturn(0); 433 } 434 435 /* 436 Trouble in factorization, should we dump the original matrix? 437 */ 438 #undef __FUNCT__ 439 #define __FUNCT__ "MatFactorDumpMatrix" 440 PetscErrorCode MatFactorDumpMatrix(Mat A) 441 { 442 PetscErrorCode ierr; 443 PetscBool flg = PETSC_FALSE; 444 445 PetscFunctionBegin; 446 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 447 if (flg) { 448 PetscViewer viewer; 449 char filename[PETSC_MAX_PATH_LEN]; 450 451 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 452 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 453 ierr = MatView(A,viewer);CHKERRQ(ierr); 454 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 455 } 456 PetscFunctionReturn(0); 457 } 458 459 #undef __FUNCT__ 460 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 461 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 462 { 463 Mat C=B; 464 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 465 IS isrow = b->row,isicol = b->icol; 466 PetscErrorCode ierr; 467 const PetscInt *r,*ic,*ics; 468 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 469 PetscInt i,j,k,nz,nzL,row,*pj; 470 const PetscInt *ajtmp,*bjtmp; 471 MatScalar *rtmp,*pc,multiplier,*pv; 472 const MatScalar *aa=a->a,*v; 473 PetscBool row_identity,col_identity; 474 FactorShiftCtx sctx; 475 const PetscInt *ddiag; 476 PetscReal rs; 477 MatScalar d; 478 479 PetscFunctionBegin; 480 /* MatPivotSetUp(): initialize shift context sctx */ 481 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 482 483 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 484 ddiag = a->diag; 485 sctx.shift_top = info->zeropivot; 486 for (i=0; i<n; i++) { 487 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 488 d = (aa)[ddiag[i]]; 489 rs = -PetscAbsScalar(d) - PetscRealPart(d); 490 v = aa+ai[i]; 491 nz = ai[i+1] - ai[i]; 492 for (j=0; j<nz; j++) 493 rs += PetscAbsScalar(v[j]); 494 if (rs>sctx.shift_top) sctx.shift_top = rs; 495 } 496 sctx.shift_top *= 1.1; 497 sctx.nshift_max = 5; 498 sctx.shift_lo = 0.; 499 sctx.shift_hi = 1.; 500 } 501 502 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 503 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 504 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 505 ics = ic; 506 507 do { 508 sctx.newshift = PETSC_FALSE; 509 for (i=0; i<n; i++){ 510 /* zero rtmp */ 511 /* L part */ 512 nz = bi[i+1] - bi[i]; 513 bjtmp = bj + bi[i]; 514 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 515 516 /* U part */ 517 nz = bdiag[i]-bdiag[i+1]; 518 bjtmp = bj + bdiag[i+1]+1; 519 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 520 521 /* load in initial (unfactored row) */ 522 nz = ai[r[i]+1] - ai[r[i]]; 523 ajtmp = aj + ai[r[i]]; 524 v = aa + ai[r[i]]; 525 for (j=0; j<nz; j++) { 526 rtmp[ics[ajtmp[j]]] = v[j]; 527 } 528 /* ZeropivotApply() */ 529 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 530 531 /* elimination */ 532 bjtmp = bj + bi[i]; 533 row = *bjtmp++; 534 nzL = bi[i+1] - bi[i]; 535 for(k=0; k < nzL;k++) { 536 pc = rtmp + row; 537 if (*pc != 0.0) { 538 pv = b->a + bdiag[row]; 539 multiplier = *pc * (*pv); 540 *pc = multiplier; 541 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 542 pv = b->a + bdiag[row+1]+1; 543 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 544 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 545 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 546 } 547 row = *bjtmp++; 548 } 549 550 /* finished row so stick it into b->a */ 551 rs = 0.0; 552 /* L part */ 553 pv = b->a + bi[i] ; 554 pj = b->j + bi[i] ; 555 nz = bi[i+1] - bi[i]; 556 for (j=0; j<nz; j++) { 557 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 558 } 559 560 /* U part */ 561 pv = b->a + bdiag[i+1]+1; 562 pj = b->j + bdiag[i+1]+1; 563 nz = bdiag[i] - bdiag[i+1]-1; 564 for (j=0; j<nz; j++) { 565 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 566 } 567 568 sctx.rs = rs; 569 sctx.pv = rtmp[i]; 570 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 571 if(sctx.newshift) break; /* break for-loop */ 572 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 573 574 /* Mark diagonal and invert diagonal for simplier triangular solves */ 575 pv = b->a + bdiag[i]; 576 *pv = 1.0/rtmp[i]; 577 578 } /* endof for (i=0; i<n; i++){ */ 579 580 /* MatPivotRefine() */ 581 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){ 582 /* 583 * if no shift in this attempt & shifting & started shifting & can refine, 584 * then try lower shift 585 */ 586 sctx.shift_hi = sctx.shift_fraction; 587 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 588 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 589 sctx.newshift = PETSC_TRUE; 590 sctx.nshift++; 591 } 592 } while (sctx.newshift); 593 594 ierr = PetscFree(rtmp);CHKERRQ(ierr); 595 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 596 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 597 598 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 599 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 600 if (row_identity && col_identity) { 601 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 602 } else { 603 C->ops->solve = MatSolve_SeqAIJ; 604 } 605 C->ops->solveadd = MatSolveAdd_SeqAIJ; 606 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 607 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 608 C->ops->matsolve = MatMatSolve_SeqAIJ; 609 C->assembled = PETSC_TRUE; 610 C->preallocated = PETSC_TRUE; 611 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 612 613 /* MatShiftView(A,info,&sctx) */ 614 if (sctx.nshift){ 615 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 616 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 617 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 618 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 619 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 620 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 621 } 622 } 623 ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 #undef __FUNCT__ 628 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 629 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 630 { 631 Mat C=B; 632 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 633 IS isrow = b->row,isicol = b->icol; 634 PetscErrorCode ierr; 635 const PetscInt *r,*ic,*ics; 636 PetscInt nz,row,i,j,n=A->rmap->n,diag; 637 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 638 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 639 MatScalar *pv,*rtmp,*pc,multiplier,d; 640 const MatScalar *v,*aa=a->a; 641 PetscReal rs=0.0; 642 FactorShiftCtx sctx; 643 const PetscInt *ddiag; 644 PetscBool row_identity, col_identity; 645 646 PetscFunctionBegin; 647 /* MatPivotSetUp(): initialize shift context sctx */ 648 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 649 650 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 651 ddiag = a->diag; 652 sctx.shift_top = info->zeropivot; 653 for (i=0; i<n; i++) { 654 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 655 d = (aa)[ddiag[i]]; 656 rs = -PetscAbsScalar(d) - PetscRealPart(d); 657 v = aa+ai[i]; 658 nz = ai[i+1] - ai[i]; 659 for (j=0; j<nz; j++) 660 rs += PetscAbsScalar(v[j]); 661 if (rs>sctx.shift_top) sctx.shift_top = rs; 662 } 663 sctx.shift_top *= 1.1; 664 sctx.nshift_max = 5; 665 sctx.shift_lo = 0.; 666 sctx.shift_hi = 1.; 667 } 668 669 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 670 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 671 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 672 ics = ic; 673 674 do { 675 sctx.newshift = PETSC_FALSE; 676 for (i=0; i<n; i++){ 677 nz = bi[i+1] - bi[i]; 678 bjtmp = bj + bi[i]; 679 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 680 681 /* load in initial (unfactored row) */ 682 nz = ai[r[i]+1] - ai[r[i]]; 683 ajtmp = aj + ai[r[i]]; 684 v = aa + ai[r[i]]; 685 for (j=0; j<nz; j++) { 686 rtmp[ics[ajtmp[j]]] = v[j]; 687 } 688 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 689 690 row = *bjtmp++; 691 while (row < i) { 692 pc = rtmp + row; 693 if (*pc != 0.0) { 694 pv = b->a + diag_offset[row]; 695 pj = b->j + diag_offset[row] + 1; 696 multiplier = *pc / *pv++; 697 *pc = multiplier; 698 nz = bi[row+1] - diag_offset[row] - 1; 699 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 700 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 701 } 702 row = *bjtmp++; 703 } 704 /* finished row so stick it into b->a */ 705 pv = b->a + bi[i] ; 706 pj = b->j + bi[i] ; 707 nz = bi[i+1] - bi[i]; 708 diag = diag_offset[i] - bi[i]; 709 rs = 0.0; 710 for (j=0; j<nz; j++) { 711 pv[j] = rtmp[pj[j]]; 712 rs += PetscAbsScalar(pv[j]); 713 } 714 rs -= PetscAbsScalar(pv[diag]); 715 716 sctx.rs = rs; 717 sctx.pv = pv[diag]; 718 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 719 if (sctx.newshift) break; 720 pv[diag] = sctx.pv; 721 } 722 723 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 724 /* 725 * if no shift in this attempt & shifting & started shifting & can refine, 726 * then try lower shift 727 */ 728 sctx.shift_hi = sctx.shift_fraction; 729 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 730 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 731 sctx.newshift = PETSC_TRUE; 732 sctx.nshift++; 733 } 734 } while (sctx.newshift); 735 736 /* invert diagonal entries for simplier triangular solves */ 737 for (i=0; i<n; i++) { 738 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 739 } 740 ierr = PetscFree(rtmp);CHKERRQ(ierr); 741 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 742 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 743 744 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 745 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 746 if (row_identity && col_identity) { 747 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 748 } else { 749 C->ops->solve = MatSolve_SeqAIJ_inplace; 750 } 751 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 752 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 753 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 754 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 755 C->assembled = PETSC_TRUE; 756 C->preallocated = PETSC_TRUE; 757 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 758 if (sctx.nshift){ 759 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 760 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 761 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 762 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 763 } 764 } 765 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 766 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 767 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 768 PetscFunctionReturn(0); 769 } 770 771 /* 772 This routine implements inplace ILU(0) with row or/and column permutations. 773 Input: 774 A - original matrix 775 Output; 776 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 777 a->j (col index) is permuted by the inverse of colperm, then sorted 778 a->a reordered accordingly with a->j 779 a->diag (ptr to diagonal elements) is updated. 780 */ 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 783 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 784 { 785 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 786 IS isrow = a->row,isicol = a->icol; 787 PetscErrorCode ierr; 788 const PetscInt *r,*ic,*ics; 789 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 790 PetscInt *ajtmp,nz,row; 791 PetscInt *diag = a->diag,nbdiag,*pj; 792 PetscScalar *rtmp,*pc,multiplier,d; 793 MatScalar *pv,*v; 794 PetscReal rs; 795 FactorShiftCtx sctx; 796 const MatScalar *aa=a->a,*vtmp; 797 798 PetscFunctionBegin; 799 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 800 801 /* MatPivotSetUp(): initialize shift context sctx */ 802 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 803 804 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 805 const PetscInt *ddiag = a->diag; 806 sctx.shift_top = info->zeropivot; 807 for (i=0; i<n; i++) { 808 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 809 d = (aa)[ddiag[i]]; 810 rs = -PetscAbsScalar(d) - PetscRealPart(d); 811 vtmp = aa+ai[i]; 812 nz = ai[i+1] - ai[i]; 813 for (j=0; j<nz; j++) 814 rs += PetscAbsScalar(vtmp[j]); 815 if (rs>sctx.shift_top) sctx.shift_top = rs; 816 } 817 sctx.shift_top *= 1.1; 818 sctx.nshift_max = 5; 819 sctx.shift_lo = 0.; 820 sctx.shift_hi = 1.; 821 } 822 823 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 824 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 825 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 826 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 827 ics = ic; 828 829 #if defined(MV) 830 sctx.shift_top = 0.; 831 sctx.nshift_max = 0; 832 sctx.shift_lo = 0.; 833 sctx.shift_hi = 0.; 834 sctx.shift_fraction = 0.; 835 836 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 837 sctx.shift_top = 0.; 838 for (i=0; i<n; i++) { 839 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 840 d = (a->a)[diag[i]]; 841 rs = -PetscAbsScalar(d) - PetscRealPart(d); 842 v = a->a+ai[i]; 843 nz = ai[i+1] - ai[i]; 844 for (j=0; j<nz; j++) 845 rs += PetscAbsScalar(v[j]); 846 if (rs>sctx.shift_top) sctx.shift_top = rs; 847 } 848 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 849 sctx.shift_top *= 1.1; 850 sctx.nshift_max = 5; 851 sctx.shift_lo = 0.; 852 sctx.shift_hi = 1.; 853 } 854 855 sctx.shift_amount = 0.; 856 sctx.nshift = 0; 857 #endif 858 859 do { 860 sctx.newshift = PETSC_FALSE; 861 for (i=0; i<n; i++){ 862 /* load in initial unfactored row */ 863 nz = ai[r[i]+1] - ai[r[i]]; 864 ajtmp = aj + ai[r[i]]; 865 v = a->a + ai[r[i]]; 866 /* sort permuted ajtmp and values v accordingly */ 867 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 868 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 869 870 diag[r[i]] = ai[r[i]]; 871 for (j=0; j<nz; j++) { 872 rtmp[ajtmp[j]] = v[j]; 873 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 874 } 875 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 876 877 row = *ajtmp++; 878 while (row < i) { 879 pc = rtmp + row; 880 if (*pc != 0.0) { 881 pv = a->a + diag[r[row]]; 882 pj = aj + diag[r[row]] + 1; 883 884 multiplier = *pc / *pv++; 885 *pc = multiplier; 886 nz = ai[r[row]+1] - diag[r[row]] - 1; 887 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 888 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 889 } 890 row = *ajtmp++; 891 } 892 /* finished row so overwrite it onto a->a */ 893 pv = a->a + ai[r[i]] ; 894 pj = aj + ai[r[i]] ; 895 nz = ai[r[i]+1] - ai[r[i]]; 896 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 897 898 rs = 0.0; 899 for (j=0; j<nz; j++) { 900 pv[j] = rtmp[pj[j]]; 901 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 902 } 903 904 sctx.rs = rs; 905 sctx.pv = pv[nbdiag]; 906 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 907 if (sctx.newshift) break; 908 pv[nbdiag] = sctx.pv; 909 } 910 911 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 912 /* 913 * if no shift in this attempt & shifting & started shifting & can refine, 914 * then try lower shift 915 */ 916 sctx.shift_hi = sctx.shift_fraction; 917 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 918 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 919 sctx.newshift = PETSC_TRUE; 920 sctx.nshift++; 921 } 922 } while (sctx.newshift); 923 924 /* invert diagonal entries for simplier triangular solves */ 925 for (i=0; i<n; i++) { 926 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 927 } 928 929 ierr = PetscFree(rtmp);CHKERRQ(ierr); 930 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 931 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 932 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 933 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 934 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 935 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 936 A->assembled = PETSC_TRUE; 937 A->preallocated = PETSC_TRUE; 938 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 939 if (sctx.nshift){ 940 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 941 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 942 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 943 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 944 } 945 } 946 PetscFunctionReturn(0); 947 } 948 949 /* ----------------------------------------------------------- */ 950 #undef __FUNCT__ 951 #define __FUNCT__ "MatLUFactor_SeqAIJ" 952 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 953 { 954 PetscErrorCode ierr; 955 Mat C; 956 957 PetscFunctionBegin; 958 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 959 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 960 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 961 A->ops->solve = C->ops->solve; 962 A->ops->solvetranspose = C->ops->solvetranspose; 963 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 964 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 /* ----------------------------------------------------------- */ 968 969 970 #undef __FUNCT__ 971 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 972 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 973 { 974 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 975 IS iscol = a->col,isrow = a->row; 976 PetscErrorCode ierr; 977 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 978 PetscInt nz; 979 const PetscInt *rout,*cout,*r,*c; 980 PetscScalar *x,*tmp,*tmps,sum; 981 const PetscScalar *b; 982 const MatScalar *aa = a->a,*v; 983 984 PetscFunctionBegin; 985 if (!n) PetscFunctionReturn(0); 986 987 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 988 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 989 tmp = a->solve_work; 990 991 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 992 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 993 994 /* forward solve the lower triangular */ 995 tmp[0] = b[*r++]; 996 tmps = tmp; 997 for (i=1; i<n; i++) { 998 v = aa + ai[i] ; 999 vi = aj + ai[i] ; 1000 nz = a->diag[i] - ai[i]; 1001 sum = b[*r++]; 1002 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1003 tmp[i] = sum; 1004 } 1005 1006 /* backward solve the upper triangular */ 1007 for (i=n-1; i>=0; i--){ 1008 v = aa + a->diag[i] + 1; 1009 vi = aj + a->diag[i] + 1; 1010 nz = ai[i+1] - a->diag[i] - 1; 1011 sum = tmp[i]; 1012 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1013 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1014 } 1015 1016 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1017 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1018 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1019 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1020 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1021 PetscFunctionReturn(0); 1022 } 1023 1024 #undef __FUNCT__ 1025 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1026 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1027 { 1028 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1029 IS iscol = a->col,isrow = a->row; 1030 PetscErrorCode ierr; 1031 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1032 PetscInt nz,neq; 1033 const PetscInt *rout,*cout,*r,*c; 1034 PetscScalar *x,*b,*tmp,*tmps,sum; 1035 const MatScalar *aa = a->a,*v; 1036 PetscBool bisdense,xisdense; 1037 1038 PetscFunctionBegin; 1039 if (!n) PetscFunctionReturn(0); 1040 1041 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1042 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1043 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1044 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1045 1046 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1047 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1048 1049 tmp = a->solve_work; 1050 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1051 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1052 1053 for (neq=0; neq<B->cmap->n; neq++){ 1054 /* forward solve the lower triangular */ 1055 tmp[0] = b[r[0]]; 1056 tmps = tmp; 1057 for (i=1; i<n; i++) { 1058 v = aa + ai[i] ; 1059 vi = aj + ai[i] ; 1060 nz = a->diag[i] - ai[i]; 1061 sum = b[r[i]]; 1062 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1063 tmp[i] = sum; 1064 } 1065 /* backward solve the upper triangular */ 1066 for (i=n-1; i>=0; i--){ 1067 v = aa + a->diag[i] + 1; 1068 vi = aj + a->diag[i] + 1; 1069 nz = ai[i+1] - a->diag[i] - 1; 1070 sum = tmp[i]; 1071 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1072 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1073 } 1074 1075 b += n; 1076 x += n; 1077 } 1078 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1079 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1080 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1081 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1082 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1083 PetscFunctionReturn(0); 1084 } 1085 1086 #undef __FUNCT__ 1087 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1088 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1089 { 1090 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1091 IS iscol = a->col,isrow = a->row; 1092 PetscErrorCode ierr; 1093 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1094 PetscInt nz,neq; 1095 const PetscInt *rout,*cout,*r,*c; 1096 PetscScalar *x,*b,*tmp,sum; 1097 const MatScalar *aa = a->a,*v; 1098 PetscBool bisdense,xisdense; 1099 1100 PetscFunctionBegin; 1101 if (!n) PetscFunctionReturn(0); 1102 1103 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1104 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1105 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1106 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1107 1108 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1109 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1110 1111 tmp = a->solve_work; 1112 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1113 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1114 1115 for (neq=0; neq<B->cmap->n; neq++){ 1116 /* forward solve the lower triangular */ 1117 tmp[0] = b[r[0]]; 1118 v = aa; 1119 vi = aj; 1120 for (i=1; i<n; i++) { 1121 nz = ai[i+1] - ai[i]; 1122 sum = b[r[i]]; 1123 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1124 tmp[i] = sum; 1125 v += nz; vi += nz; 1126 } 1127 1128 /* backward solve the upper triangular */ 1129 for (i=n-1; i>=0; i--){ 1130 v = aa + adiag[i+1]+1; 1131 vi = aj + adiag[i+1]+1; 1132 nz = adiag[i]-adiag[i+1]-1; 1133 sum = tmp[i]; 1134 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1135 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1136 } 1137 1138 b += n; 1139 x += n; 1140 } 1141 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1142 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1143 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1144 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1145 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 #undef __FUNCT__ 1150 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1151 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1152 { 1153 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1154 IS iscol = a->col,isrow = a->row; 1155 PetscErrorCode ierr; 1156 const PetscInt *r,*c,*rout,*cout; 1157 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1158 PetscInt nz,row; 1159 PetscScalar *x,*b,*tmp,*tmps,sum; 1160 const MatScalar *aa = a->a,*v; 1161 1162 PetscFunctionBegin; 1163 if (!n) PetscFunctionReturn(0); 1164 1165 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1166 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1167 tmp = a->solve_work; 1168 1169 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1170 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1171 1172 /* forward solve the lower triangular */ 1173 tmp[0] = b[*r++]; 1174 tmps = tmp; 1175 for (row=1; row<n; row++) { 1176 i = rout[row]; /* permuted row */ 1177 v = aa + ai[i] ; 1178 vi = aj + ai[i] ; 1179 nz = a->diag[i] - ai[i]; 1180 sum = b[*r++]; 1181 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1182 tmp[row] = sum; 1183 } 1184 1185 /* backward solve the upper triangular */ 1186 for (row=n-1; row>=0; row--){ 1187 i = rout[row]; /* permuted row */ 1188 v = aa + a->diag[i] + 1; 1189 vi = aj + a->diag[i] + 1; 1190 nz = ai[i+1] - a->diag[i] - 1; 1191 sum = tmp[row]; 1192 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1193 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1194 } 1195 1196 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1197 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1198 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1199 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1200 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 /* ----------------------------------------------------------- */ 1205 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1206 #undef __FUNCT__ 1207 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1208 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1209 { 1210 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1211 PetscErrorCode ierr; 1212 PetscInt n = A->rmap->n; 1213 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1214 PetscScalar *x; 1215 const PetscScalar *b; 1216 const MatScalar *aa = a->a; 1217 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1218 PetscInt adiag_i,i,nz,ai_i; 1219 const PetscInt *vi; 1220 const MatScalar *v; 1221 PetscScalar sum; 1222 #endif 1223 1224 PetscFunctionBegin; 1225 if (!n) PetscFunctionReturn(0); 1226 1227 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1228 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1229 1230 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1231 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1232 #else 1233 /* forward solve the lower triangular */ 1234 x[0] = b[0]; 1235 for (i=1; i<n; i++) { 1236 ai_i = ai[i]; 1237 v = aa + ai_i; 1238 vi = aj + ai_i; 1239 nz = adiag[i] - ai_i; 1240 sum = b[i]; 1241 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1242 x[i] = sum; 1243 } 1244 1245 /* backward solve the upper triangular */ 1246 for (i=n-1; i>=0; i--){ 1247 adiag_i = adiag[i]; 1248 v = aa + adiag_i + 1; 1249 vi = aj + adiag_i + 1; 1250 nz = ai[i+1] - adiag_i - 1; 1251 sum = x[i]; 1252 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1253 x[i] = sum*aa[adiag_i]; 1254 } 1255 #endif 1256 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1257 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1258 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1264 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1265 { 1266 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1267 IS iscol = a->col,isrow = a->row; 1268 PetscErrorCode ierr; 1269 PetscInt i, n = A->rmap->n,j; 1270 PetscInt nz; 1271 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1272 PetscScalar *x,*tmp,sum; 1273 const PetscScalar *b; 1274 const MatScalar *aa = a->a,*v; 1275 1276 PetscFunctionBegin; 1277 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1278 1279 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1280 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1281 tmp = a->solve_work; 1282 1283 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1284 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1285 1286 /* forward solve the lower triangular */ 1287 tmp[0] = b[*r++]; 1288 for (i=1; i<n; i++) { 1289 v = aa + ai[i] ; 1290 vi = aj + ai[i] ; 1291 nz = a->diag[i] - ai[i]; 1292 sum = b[*r++]; 1293 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1294 tmp[i] = sum; 1295 } 1296 1297 /* backward solve the upper triangular */ 1298 for (i=n-1; i>=0; i--){ 1299 v = aa + a->diag[i] + 1; 1300 vi = aj + a->diag[i] + 1; 1301 nz = ai[i+1] - a->diag[i] - 1; 1302 sum = tmp[i]; 1303 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1304 tmp[i] = sum*aa[a->diag[i]]; 1305 x[*c--] += tmp[i]; 1306 } 1307 1308 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1309 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1310 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1311 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1312 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1313 1314 PetscFunctionReturn(0); 1315 } 1316 1317 #undef __FUNCT__ 1318 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1319 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1320 { 1321 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1322 IS iscol = a->col,isrow = a->row; 1323 PetscErrorCode ierr; 1324 PetscInt i, n = A->rmap->n,j; 1325 PetscInt nz; 1326 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1327 PetscScalar *x,*tmp,sum; 1328 const PetscScalar *b; 1329 const MatScalar *aa = a->a,*v; 1330 1331 PetscFunctionBegin; 1332 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1333 1334 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1335 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1336 tmp = a->solve_work; 1337 1338 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1339 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1340 1341 /* forward solve the lower triangular */ 1342 tmp[0] = b[r[0]]; 1343 v = aa; 1344 vi = aj; 1345 for (i=1; i<n; i++) { 1346 nz = ai[i+1] - ai[i]; 1347 sum = b[r[i]]; 1348 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1349 tmp[i] = sum; 1350 v += nz; vi += nz; 1351 } 1352 1353 /* backward solve the upper triangular */ 1354 v = aa + adiag[n-1]; 1355 vi = aj + adiag[n-1]; 1356 for (i=n-1; i>=0; i--){ 1357 nz = adiag[i] - adiag[i+1] - 1; 1358 sum = tmp[i]; 1359 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1360 tmp[i] = sum*v[nz]; 1361 x[c[i]] += tmp[i]; 1362 v += nz+1; vi += nz+1; 1363 } 1364 1365 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1366 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1367 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1368 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1369 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1370 1371 PetscFunctionReturn(0); 1372 } 1373 1374 #undef __FUNCT__ 1375 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1376 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1377 { 1378 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1379 IS iscol = a->col,isrow = a->row; 1380 PetscErrorCode ierr; 1381 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1382 PetscInt i,n = A->rmap->n,j; 1383 PetscInt nz; 1384 PetscScalar *x,*tmp,s1; 1385 const MatScalar *aa = a->a,*v; 1386 const PetscScalar *b; 1387 1388 PetscFunctionBegin; 1389 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1390 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1391 tmp = a->solve_work; 1392 1393 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1394 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1395 1396 /* copy the b into temp work space according to permutation */ 1397 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1398 1399 /* forward solve the U^T */ 1400 for (i=0; i<n; i++) { 1401 v = aa + diag[i] ; 1402 vi = aj + diag[i] + 1; 1403 nz = ai[i+1] - diag[i] - 1; 1404 s1 = tmp[i]; 1405 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1406 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1407 tmp[i] = s1; 1408 } 1409 1410 /* backward solve the L^T */ 1411 for (i=n-1; i>=0; i--){ 1412 v = aa + diag[i] - 1 ; 1413 vi = aj + diag[i] - 1 ; 1414 nz = diag[i] - ai[i]; 1415 s1 = tmp[i]; 1416 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1417 } 1418 1419 /* copy tmp into x according to permutation */ 1420 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1421 1422 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1423 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1424 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1425 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1426 1427 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1428 PetscFunctionReturn(0); 1429 } 1430 1431 #undef __FUNCT__ 1432 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1433 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1434 { 1435 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1436 IS iscol = a->col,isrow = a->row; 1437 PetscErrorCode ierr; 1438 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1439 PetscInt i,n = A->rmap->n,j; 1440 PetscInt nz; 1441 PetscScalar *x,*tmp,s1; 1442 const MatScalar *aa = a->a,*v; 1443 const PetscScalar *b; 1444 1445 PetscFunctionBegin; 1446 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1447 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1448 tmp = a->solve_work; 1449 1450 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1451 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1452 1453 /* copy the b into temp work space according to permutation */ 1454 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1455 1456 /* forward solve the U^T */ 1457 for (i=0; i<n; i++) { 1458 v = aa + adiag[i+1] + 1; 1459 vi = aj + adiag[i+1] + 1; 1460 nz = adiag[i] - adiag[i+1] - 1; 1461 s1 = tmp[i]; 1462 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1463 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1464 tmp[i] = s1; 1465 } 1466 1467 /* backward solve the L^T */ 1468 for (i=n-1; i>=0; i--){ 1469 v = aa + ai[i]; 1470 vi = aj + ai[i]; 1471 nz = ai[i+1] - ai[i]; 1472 s1 = tmp[i]; 1473 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1474 } 1475 1476 /* copy tmp into x according to permutation */ 1477 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1478 1479 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1480 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1481 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1482 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1483 1484 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1485 PetscFunctionReturn(0); 1486 } 1487 1488 #undef __FUNCT__ 1489 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1490 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1491 { 1492 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1493 IS iscol = a->col,isrow = a->row; 1494 PetscErrorCode ierr; 1495 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1496 PetscInt i,n = A->rmap->n,j; 1497 PetscInt nz; 1498 PetscScalar *x,*tmp,s1; 1499 const MatScalar *aa = a->a,*v; 1500 const PetscScalar *b; 1501 1502 PetscFunctionBegin; 1503 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1504 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1505 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1506 tmp = a->solve_work; 1507 1508 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1509 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1510 1511 /* copy the b into temp work space according to permutation */ 1512 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1513 1514 /* forward solve the U^T */ 1515 for (i=0; i<n; i++) { 1516 v = aa + diag[i] ; 1517 vi = aj + diag[i] + 1; 1518 nz = ai[i+1] - diag[i] - 1; 1519 s1 = tmp[i]; 1520 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1521 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1522 tmp[i] = s1; 1523 } 1524 1525 /* backward solve the L^T */ 1526 for (i=n-1; i>=0; i--){ 1527 v = aa + diag[i] - 1 ; 1528 vi = aj + diag[i] - 1 ; 1529 nz = diag[i] - ai[i]; 1530 s1 = tmp[i]; 1531 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1532 } 1533 1534 /* copy tmp into x according to permutation */ 1535 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1536 1537 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1538 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1539 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1540 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1541 1542 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1543 PetscFunctionReturn(0); 1544 } 1545 1546 #undef __FUNCT__ 1547 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1548 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1549 { 1550 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1551 IS iscol = a->col,isrow = a->row; 1552 PetscErrorCode ierr; 1553 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1554 PetscInt i,n = A->rmap->n,j; 1555 PetscInt nz; 1556 PetscScalar *x,*tmp,s1; 1557 const MatScalar *aa = a->a,*v; 1558 const PetscScalar *b; 1559 1560 PetscFunctionBegin; 1561 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1562 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1563 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1564 tmp = a->solve_work; 1565 1566 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1567 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1568 1569 /* copy the b into temp work space according to permutation */ 1570 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1571 1572 /* forward solve the U^T */ 1573 for (i=0; i<n; i++) { 1574 v = aa + adiag[i+1] + 1; 1575 vi = aj + adiag[i+1] + 1; 1576 nz = adiag[i] - adiag[i+1] - 1; 1577 s1 = tmp[i]; 1578 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1579 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1580 tmp[i] = s1; 1581 } 1582 1583 1584 /* backward solve the L^T */ 1585 for (i=n-1; i>=0; i--){ 1586 v = aa + ai[i] ; 1587 vi = aj + ai[i]; 1588 nz = ai[i+1] - ai[i]; 1589 s1 = tmp[i]; 1590 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1591 } 1592 1593 /* copy tmp into x according to permutation */ 1594 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1595 1596 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1597 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1598 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1599 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1600 1601 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 /* ----------------------------------------------------------------*/ 1606 1607 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool ); 1608 1609 /* 1610 ilu() under revised new data structure. 1611 Factored arrays bj and ba are stored as 1612 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1613 1614 bi=fact->i is an array of size n+1, in which 1615 bi+ 1616 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1617 bi[n]: points to L(n-1,n-1)+1 1618 1619 bdiag=fact->diag is an array of size n+1,in which 1620 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1621 bdiag[n]: points to entry of U(n-1,0)-1 1622 1623 U(i,:) contains bdiag[i] as its last entry, i.e., 1624 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1625 */ 1626 #undef __FUNCT__ 1627 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1628 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1629 { 1630 1631 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1632 PetscErrorCode ierr; 1633 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1634 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1635 PetscBool missing; 1636 IS isicol; 1637 1638 PetscFunctionBegin; 1639 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1640 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1641 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1642 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1643 1644 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1645 b = (Mat_SeqAIJ*)(fact)->data; 1646 1647 /* allocate matrix arrays for new data structure */ 1648 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1649 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1650 b->singlemalloc = PETSC_TRUE; 1651 if (!b->diag){ 1652 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1653 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1654 } 1655 bdiag = b->diag; 1656 1657 if (n > 0) { 1658 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1659 } 1660 1661 /* set bi and bj with new data structure */ 1662 bi = b->i; 1663 bj = b->j; 1664 1665 /* L part */ 1666 bi[0] = 0; 1667 for (i=0; i<n; i++){ 1668 nz = adiag[i] - ai[i]; 1669 bi[i+1] = bi[i] + nz; 1670 aj = a->j + ai[i]; 1671 for (j=0; j<nz; j++){ 1672 /* *bj = aj[j]; bj++; */ 1673 bj[k++] = aj[j]; 1674 } 1675 } 1676 1677 /* U part */ 1678 bdiag[n] = bi[n]-1; 1679 for (i=n-1; i>=0; i--){ 1680 nz = ai[i+1] - adiag[i] - 1; 1681 aj = a->j + adiag[i] + 1; 1682 for (j=0; j<nz; j++){ 1683 /* *bj = aj[j]; bj++; */ 1684 bj[k++] = aj[j]; 1685 } 1686 /* diag[i] */ 1687 /* *bj = i; bj++; */ 1688 bj[k++] = i; 1689 bdiag[i] = bdiag[i+1] + nz + 1; 1690 } 1691 1692 fact->factortype = MAT_FACTOR_ILU; 1693 fact->info.factor_mallocs = 0; 1694 fact->info.fill_ratio_given = info->fill; 1695 fact->info.fill_ratio_needed = 1.0; 1696 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1697 1698 b = (Mat_SeqAIJ*)(fact)->data; 1699 b->row = isrow; 1700 b->col = iscol; 1701 b->icol = isicol; 1702 ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1703 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1704 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1705 PetscFunctionReturn(0); 1706 } 1707 1708 #undef __FUNCT__ 1709 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1710 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1711 { 1712 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1713 IS isicol; 1714 PetscErrorCode ierr; 1715 const PetscInt *r,*ic; 1716 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1717 PetscInt *bi,*cols,nnz,*cols_lvl; 1718 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1719 PetscInt i,levels,diagonal_fill; 1720 PetscBool col_identity,row_identity; 1721 PetscReal f; 1722 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1723 PetscBT lnkbt; 1724 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1725 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1726 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1727 1728 PetscFunctionBegin; 1729 /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1730 /* 1731 PetscBool olddatastruct=PETSC_FALSE; 1732 ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 1733 if(olddatastruct){ 1734 ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1735 PetscFunctionReturn(0); 1736 } 1737 */ 1738 1739 levels = (PetscInt)info->levels; 1740 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1741 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1742 1743 if (!levels && row_identity && col_identity) { 1744 /* special case: ilu(0) with natural ordering */ 1745 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1746 if (a->inode.size) { 1747 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1748 } 1749 PetscFunctionReturn(0); 1750 } 1751 1752 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1753 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1754 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1755 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1756 1757 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1758 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1759 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1760 bi[0] = bdiag[0] = 0; 1761 1762 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1763 1764 /* create a linked list for storing column indices of the active row */ 1765 nlnk = n + 1; 1766 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1767 1768 /* initial FreeSpace size is f*(ai[n]+1) */ 1769 f = info->fill; 1770 diagonal_fill = (PetscInt)info->diagonal_fill; 1771 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1772 current_space = free_space; 1773 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1774 current_space_lvl = free_space_lvl; 1775 1776 for (i=0; i<n; i++) { 1777 nzi = 0; 1778 /* copy current row into linked list */ 1779 nnz = ai[r[i]+1] - ai[r[i]]; 1780 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1781 cols = aj + ai[r[i]]; 1782 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1783 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1784 nzi += nlnk; 1785 1786 /* make sure diagonal entry is included */ 1787 if (diagonal_fill && lnk[i] == -1) { 1788 fm = n; 1789 while (lnk[fm] < i) fm = lnk[fm]; 1790 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1791 lnk[fm] = i; 1792 lnk_lvl[i] = 0; 1793 nzi++; dcount++; 1794 } 1795 1796 /* add pivot rows into the active row */ 1797 nzbd = 0; 1798 prow = lnk[n]; 1799 while (prow < i) { 1800 nnz = bdiag[prow]; 1801 cols = bj_ptr[prow] + nnz + 1; 1802 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1803 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1804 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1805 nzi += nlnk; 1806 prow = lnk[prow]; 1807 nzbd++; 1808 } 1809 bdiag[i] = nzbd; 1810 bi[i+1] = bi[i] + nzi; 1811 1812 /* if free space is not available, make more free space */ 1813 if (current_space->local_remaining<nzi) { 1814 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1815 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1816 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1817 reallocs++; 1818 } 1819 1820 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1821 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1822 bj_ptr[i] = current_space->array; 1823 bjlvl_ptr[i] = current_space_lvl->array; 1824 1825 /* make sure the active row i has diagonal entry */ 1826 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1827 1828 current_space->array += nzi; 1829 current_space->local_used += nzi; 1830 current_space->local_remaining -= nzi; 1831 current_space_lvl->array += nzi; 1832 current_space_lvl->local_used += nzi; 1833 current_space_lvl->local_remaining -= nzi; 1834 } 1835 1836 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1837 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1838 1839 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1840 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1841 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1842 1843 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1844 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1845 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1846 1847 #if defined(PETSC_USE_INFO) 1848 { 1849 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1850 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1851 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1852 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1853 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1854 if (diagonal_fill) { 1855 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1856 } 1857 } 1858 #endif 1859 1860 /* put together the new matrix */ 1861 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1862 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1863 b = (Mat_SeqAIJ*)(fact)->data; 1864 b->free_a = PETSC_TRUE; 1865 b->free_ij = PETSC_TRUE; 1866 b->singlemalloc = PETSC_FALSE; 1867 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1868 b->j = bj; 1869 b->i = bi; 1870 b->diag = bdiag; 1871 b->ilen = 0; 1872 b->imax = 0; 1873 b->row = isrow; 1874 b->col = iscol; 1875 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1876 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1877 b->icol = isicol; 1878 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1879 /* In b structure: Free imax, ilen, old a, old j. 1880 Allocate bdiag, solve_work, new a, new j */ 1881 ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1882 b->maxnz = b->nz = bdiag[0]+1; 1883 (fact)->info.factor_mallocs = reallocs; 1884 (fact)->info.fill_ratio_given = f; 1885 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1886 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1887 if (a->inode.size) { 1888 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1889 } 1890 PetscFunctionReturn(0); 1891 } 1892 1893 #undef __FUNCT__ 1894 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1895 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1896 { 1897 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1898 IS isicol; 1899 PetscErrorCode ierr; 1900 const PetscInt *r,*ic; 1901 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1902 PetscInt *bi,*cols,nnz,*cols_lvl; 1903 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1904 PetscInt i,levels,diagonal_fill; 1905 PetscBool col_identity,row_identity; 1906 PetscReal f; 1907 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1908 PetscBT lnkbt; 1909 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1910 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1911 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1912 PetscBool missing; 1913 1914 PetscFunctionBegin; 1915 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1916 f = info->fill; 1917 levels = (PetscInt)info->levels; 1918 diagonal_fill = (PetscInt)info->diagonal_fill; 1919 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1920 1921 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1922 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1923 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1924 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1925 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1926 if (a->inode.size) { 1927 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1928 } 1929 fact->factortype = MAT_FACTOR_ILU; 1930 (fact)->info.factor_mallocs = 0; 1931 (fact)->info.fill_ratio_given = info->fill; 1932 (fact)->info.fill_ratio_needed = 1.0; 1933 b = (Mat_SeqAIJ*)(fact)->data; 1934 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1935 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1936 b->row = isrow; 1937 b->col = iscol; 1938 b->icol = isicol; 1939 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1940 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1941 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1942 PetscFunctionReturn(0); 1943 } 1944 1945 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1946 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1947 1948 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1949 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1950 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1951 bi[0] = bdiag[0] = 0; 1952 1953 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1954 1955 /* create a linked list for storing column indices of the active row */ 1956 nlnk = n + 1; 1957 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1958 1959 /* initial FreeSpace size is f*(ai[n]+1) */ 1960 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1961 current_space = free_space; 1962 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1963 current_space_lvl = free_space_lvl; 1964 1965 for (i=0; i<n; i++) { 1966 nzi = 0; 1967 /* copy current row into linked list */ 1968 nnz = ai[r[i]+1] - ai[r[i]]; 1969 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1970 cols = aj + ai[r[i]]; 1971 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1972 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1973 nzi += nlnk; 1974 1975 /* make sure diagonal entry is included */ 1976 if (diagonal_fill && lnk[i] == -1) { 1977 fm = n; 1978 while (lnk[fm] < i) fm = lnk[fm]; 1979 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1980 lnk[fm] = i; 1981 lnk_lvl[i] = 0; 1982 nzi++; dcount++; 1983 } 1984 1985 /* add pivot rows into the active row */ 1986 nzbd = 0; 1987 prow = lnk[n]; 1988 while (prow < i) { 1989 nnz = bdiag[prow]; 1990 cols = bj_ptr[prow] + nnz + 1; 1991 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1992 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1993 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1994 nzi += nlnk; 1995 prow = lnk[prow]; 1996 nzbd++; 1997 } 1998 bdiag[i] = nzbd; 1999 bi[i+1] = bi[i] + nzi; 2000 2001 /* if free space is not available, make more free space */ 2002 if (current_space->local_remaining<nzi) { 2003 nnz = nzi*(n - i); /* estimated and max additional space needed */ 2004 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2005 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2006 reallocs++; 2007 } 2008 2009 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2010 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2011 bj_ptr[i] = current_space->array; 2012 bjlvl_ptr[i] = current_space_lvl->array; 2013 2014 /* make sure the active row i has diagonal entry */ 2015 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2016 2017 current_space->array += nzi; 2018 current_space->local_used += nzi; 2019 current_space->local_remaining -= nzi; 2020 current_space_lvl->array += nzi; 2021 current_space_lvl->local_used += nzi; 2022 current_space_lvl->local_remaining -= nzi; 2023 } 2024 2025 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2026 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2027 2028 /* destroy list of free space and other temporary arrays */ 2029 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2030 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2031 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2032 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2033 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2034 2035 #if defined(PETSC_USE_INFO) 2036 { 2037 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2038 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2039 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2040 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2041 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2042 if (diagonal_fill) { 2043 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2044 } 2045 } 2046 #endif 2047 2048 /* put together the new matrix */ 2049 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2050 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2051 b = (Mat_SeqAIJ*)(fact)->data; 2052 b->free_a = PETSC_TRUE; 2053 b->free_ij = PETSC_TRUE; 2054 b->singlemalloc = PETSC_FALSE; 2055 ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2056 b->j = bj; 2057 b->i = bi; 2058 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2059 b->diag = bdiag; 2060 b->ilen = 0; 2061 b->imax = 0; 2062 b->row = isrow; 2063 b->col = iscol; 2064 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2065 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2066 b->icol = isicol; 2067 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2068 /* In b structure: Free imax, ilen, old a, old j. 2069 Allocate bdiag, solve_work, new a, new j */ 2070 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2071 b->maxnz = b->nz = bi[n] ; 2072 (fact)->info.factor_mallocs = reallocs; 2073 (fact)->info.fill_ratio_given = f; 2074 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2075 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2076 if (a->inode.size) { 2077 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2078 } 2079 PetscFunctionReturn(0); 2080 } 2081 2082 #undef __FUNCT__ 2083 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2084 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2085 { 2086 Mat C = B; 2087 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2088 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2089 IS ip=b->row,iip = b->icol; 2090 PetscErrorCode ierr; 2091 const PetscInt *rip,*riip; 2092 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2093 PetscInt *ai=a->i,*aj=a->j; 2094 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2095 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2096 PetscBool perm_identity; 2097 FactorShiftCtx sctx; 2098 PetscReal rs; 2099 MatScalar d,*v; 2100 2101 PetscFunctionBegin; 2102 /* MatPivotSetUp(): initialize shift context sctx */ 2103 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2104 2105 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2106 sctx.shift_top = info->zeropivot; 2107 for (i=0; i<mbs; i++) { 2108 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2109 d = (aa)[a->diag[i]]; 2110 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2111 v = aa+ai[i]; 2112 nz = ai[i+1] - ai[i]; 2113 for (j=0; j<nz; j++) 2114 rs += PetscAbsScalar(v[j]); 2115 if (rs>sctx.shift_top) sctx.shift_top = rs; 2116 } 2117 sctx.shift_top *= 1.1; 2118 sctx.nshift_max = 5; 2119 sctx.shift_lo = 0.; 2120 sctx.shift_hi = 1.; 2121 } 2122 2123 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2124 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2125 2126 /* allocate working arrays 2127 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2128 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2129 */ 2130 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 2131 2132 do { 2133 sctx.newshift = PETSC_FALSE; 2134 2135 for (i=0; i<mbs; i++) c2r[i] = mbs; 2136 il[0] = 0; 2137 2138 for (k = 0; k<mbs; k++){ 2139 /* zero rtmp */ 2140 nz = bi[k+1] - bi[k]; 2141 bjtmp = bj + bi[k]; 2142 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2143 2144 /* load in initial unfactored row */ 2145 bval = ba + bi[k]; 2146 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2147 for (j = jmin; j < jmax; j++){ 2148 col = riip[aj[j]]; 2149 if (col >= k){ /* only take upper triangular entry */ 2150 rtmp[col] = aa[j]; 2151 *bval++ = 0.0; /* for in-place factorization */ 2152 } 2153 } 2154 /* shift the diagonal of the matrix: ZeropivotApply() */ 2155 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2156 2157 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2158 dk = rtmp[k]; 2159 i = c2r[k]; /* first row to be added to k_th row */ 2160 2161 while (i < k){ 2162 nexti = c2r[i]; /* next row to be added to k_th row */ 2163 2164 /* compute multiplier, update diag(k) and U(i,k) */ 2165 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2166 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2167 dk += uikdi*ba[ili]; /* update diag[k] */ 2168 ba[ili] = uikdi; /* -U(i,k) */ 2169 2170 /* add multiple of row i to k-th row */ 2171 jmin = ili + 1; jmax = bi[i+1]; 2172 if (jmin < jmax){ 2173 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2174 /* update il and c2r for row i */ 2175 il[i] = jmin; 2176 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2177 } 2178 i = nexti; 2179 } 2180 2181 /* copy data into U(k,:) */ 2182 rs = 0.0; 2183 jmin = bi[k]; jmax = bi[k+1]-1; 2184 if (jmin < jmax) { 2185 for (j=jmin; j<jmax; j++){ 2186 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2187 } 2188 /* add the k-th row into il and c2r */ 2189 il[k] = jmin; 2190 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2191 } 2192 2193 /* MatPivotCheck() */ 2194 sctx.rs = rs; 2195 sctx.pv = dk; 2196 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2197 if(sctx.newshift) break; 2198 dk = sctx.pv; 2199 2200 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2201 } 2202 } while (sctx.newshift); 2203 2204 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2205 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2206 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2207 2208 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2209 if (perm_identity){ 2210 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2211 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2212 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2213 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2214 } else { 2215 B->ops->solve = MatSolve_SeqSBAIJ_1; 2216 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2217 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2218 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2219 } 2220 2221 C->assembled = PETSC_TRUE; 2222 C->preallocated = PETSC_TRUE; 2223 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2224 2225 /* MatPivotView() */ 2226 if (sctx.nshift){ 2227 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2228 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2229 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2230 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2231 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 2232 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 2233 } 2234 } 2235 PetscFunctionReturn(0); 2236 } 2237 2238 #undef __FUNCT__ 2239 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2240 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2241 { 2242 Mat C = B; 2243 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2244 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2245 IS ip=b->row,iip = b->icol; 2246 PetscErrorCode ierr; 2247 const PetscInt *rip,*riip; 2248 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2249 PetscInt *ai=a->i,*aj=a->j; 2250 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2251 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2252 PetscBool perm_identity; 2253 FactorShiftCtx sctx; 2254 PetscReal rs; 2255 MatScalar d,*v; 2256 2257 PetscFunctionBegin; 2258 /* MatPivotSetUp(): initialize shift context sctx */ 2259 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2260 2261 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2262 sctx.shift_top = info->zeropivot; 2263 for (i=0; i<mbs; i++) { 2264 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2265 d = (aa)[a->diag[i]]; 2266 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2267 v = aa+ai[i]; 2268 nz = ai[i+1] - ai[i]; 2269 for (j=0; j<nz; j++) 2270 rs += PetscAbsScalar(v[j]); 2271 if (rs>sctx.shift_top) sctx.shift_top = rs; 2272 } 2273 sctx.shift_top *= 1.1; 2274 sctx.nshift_max = 5; 2275 sctx.shift_lo = 0.; 2276 sctx.shift_hi = 1.; 2277 } 2278 2279 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2280 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2281 2282 /* initialization */ 2283 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2284 2285 do { 2286 sctx.newshift = PETSC_FALSE; 2287 2288 for (i=0; i<mbs; i++) jl[i] = mbs; 2289 il[0] = 0; 2290 2291 for (k = 0; k<mbs; k++){ 2292 /* zero rtmp */ 2293 nz = bi[k+1] - bi[k]; 2294 bjtmp = bj + bi[k]; 2295 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2296 2297 bval = ba + bi[k]; 2298 /* initialize k-th row by the perm[k]-th row of A */ 2299 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2300 for (j = jmin; j < jmax; j++){ 2301 col = riip[aj[j]]; 2302 if (col >= k){ /* only take upper triangular entry */ 2303 rtmp[col] = aa[j]; 2304 *bval++ = 0.0; /* for in-place factorization */ 2305 } 2306 } 2307 /* shift the diagonal of the matrix */ 2308 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2309 2310 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2311 dk = rtmp[k]; 2312 i = jl[k]; /* first row to be added to k_th row */ 2313 2314 while (i < k){ 2315 nexti = jl[i]; /* next row to be added to k_th row */ 2316 2317 /* compute multiplier, update diag(k) and U(i,k) */ 2318 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2319 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2320 dk += uikdi*ba[ili]; 2321 ba[ili] = uikdi; /* -U(i,k) */ 2322 2323 /* add multiple of row i to k-th row */ 2324 jmin = ili + 1; jmax = bi[i+1]; 2325 if (jmin < jmax){ 2326 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2327 /* update il and jl for row i */ 2328 il[i] = jmin; 2329 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2330 } 2331 i = nexti; 2332 } 2333 2334 /* shift the diagonals when zero pivot is detected */ 2335 /* compute rs=sum of abs(off-diagonal) */ 2336 rs = 0.0; 2337 jmin = bi[k]+1; 2338 nz = bi[k+1] - jmin; 2339 bcol = bj + jmin; 2340 for (j=0; j<nz; j++) { 2341 rs += PetscAbsScalar(rtmp[bcol[j]]); 2342 } 2343 2344 sctx.rs = rs; 2345 sctx.pv = dk; 2346 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2347 if (sctx.newshift) break; 2348 dk = sctx.pv; 2349 2350 /* copy data into U(k,:) */ 2351 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2352 jmin = bi[k]+1; jmax = bi[k+1]; 2353 if (jmin < jmax) { 2354 for (j=jmin; j<jmax; j++){ 2355 col = bj[j]; ba[j] = rtmp[col]; 2356 } 2357 /* add the k-th row into il and jl */ 2358 il[k] = jmin; 2359 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2360 } 2361 } 2362 } while (sctx.newshift); 2363 2364 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2365 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2366 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2367 2368 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2369 if (perm_identity){ 2370 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2371 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2372 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2373 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2374 } else { 2375 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2376 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2377 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2378 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2379 } 2380 2381 C->assembled = PETSC_TRUE; 2382 C->preallocated = PETSC_TRUE; 2383 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2384 if (sctx.nshift){ 2385 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2386 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2387 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2388 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2389 } 2390 } 2391 PetscFunctionReturn(0); 2392 } 2393 2394 /* 2395 icc() under revised new data structure. 2396 Factored arrays bj and ba are stored as 2397 U(0,:),...,U(i,:),U(n-1,:) 2398 2399 ui=fact->i is an array of size n+1, in which 2400 ui+ 2401 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2402 ui[n]: points to U(n-1,n-1)+1 2403 2404 udiag=fact->diag is an array of size n,in which 2405 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2406 2407 U(i,:) contains udiag[i] as its last entry, i.e., 2408 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2409 */ 2410 2411 #undef __FUNCT__ 2412 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2413 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2414 { 2415 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2416 Mat_SeqSBAIJ *b; 2417 PetscErrorCode ierr; 2418 PetscBool perm_identity,missing; 2419 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2420 const PetscInt *rip,*riip; 2421 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2422 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2423 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2424 PetscReal fill=info->fill,levels=info->levels; 2425 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2426 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2427 PetscBT lnkbt; 2428 IS iperm; 2429 2430 PetscFunctionBegin; 2431 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2432 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2433 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2434 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2435 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2436 2437 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2438 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2439 ui[0] = 0; 2440 2441 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2442 if (!levels && perm_identity) { 2443 for (i=0; i<am; i++) { 2444 ncols = ai[i+1] - a->diag[i]; 2445 ui[i+1] = ui[i] + ncols; 2446 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2447 } 2448 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2449 cols = uj; 2450 for (i=0; i<am; i++) { 2451 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2452 ncols = ai[i+1] - a->diag[i] -1; 2453 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2454 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2455 } 2456 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2457 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2458 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2459 2460 /* initialization */ 2461 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2462 2463 /* jl: linked list for storing indices of the pivot rows 2464 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2465 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2466 for (i=0; i<am; i++){ 2467 jl[i] = am; il[i] = 0; 2468 } 2469 2470 /* create and initialize a linked list for storing column indices of the active row k */ 2471 nlnk = am + 1; 2472 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2473 2474 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2475 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2476 current_space = free_space; 2477 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2478 current_space_lvl = free_space_lvl; 2479 2480 for (k=0; k<am; k++){ /* for each active row k */ 2481 /* initialize lnk by the column indices of row rip[k] of A */ 2482 nzk = 0; 2483 ncols = ai[rip[k]+1] - ai[rip[k]]; 2484 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2485 ncols_upper = 0; 2486 for (j=0; j<ncols; j++){ 2487 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2488 if (riip[i] >= k){ /* only take upper triangular entry */ 2489 ajtmp[ncols_upper] = i; 2490 ncols_upper++; 2491 } 2492 } 2493 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2494 nzk += nlnk; 2495 2496 /* update lnk by computing fill-in for each pivot row to be merged in */ 2497 prow = jl[k]; /* 1st pivot row */ 2498 2499 while (prow < k){ 2500 nextprow = jl[prow]; 2501 2502 /* merge prow into k-th row */ 2503 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2504 jmax = ui[prow+1]; 2505 ncols = jmax-jmin; 2506 i = jmin - ui[prow]; 2507 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2508 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2509 j = *(uj - 1); 2510 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2511 nzk += nlnk; 2512 2513 /* update il and jl for prow */ 2514 if (jmin < jmax){ 2515 il[prow] = jmin; 2516 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2517 } 2518 prow = nextprow; 2519 } 2520 2521 /* if free space is not available, make more free space */ 2522 if (current_space->local_remaining<nzk) { 2523 i = am - k + 1; /* num of unfactored rows */ 2524 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2525 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2526 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2527 reallocs++; 2528 } 2529 2530 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2531 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2532 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2533 2534 /* add the k-th row into il and jl */ 2535 if (nzk > 1){ 2536 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2537 jl[k] = jl[i]; jl[i] = k; 2538 il[k] = ui[k] + 1; 2539 } 2540 uj_ptr[k] = current_space->array; 2541 uj_lvl_ptr[k] = current_space_lvl->array; 2542 2543 current_space->array += nzk; 2544 current_space->local_used += nzk; 2545 current_space->local_remaining -= nzk; 2546 2547 current_space_lvl->array += nzk; 2548 current_space_lvl->local_used += nzk; 2549 current_space_lvl->local_remaining -= nzk; 2550 2551 ui[k+1] = ui[k] + nzk; 2552 } 2553 2554 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2555 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2556 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2557 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2558 2559 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2560 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2561 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2562 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2563 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2564 2565 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2566 2567 /* put together the new matrix in MATSEQSBAIJ format */ 2568 b = (Mat_SeqSBAIJ*)(fact)->data; 2569 b->singlemalloc = PETSC_FALSE; 2570 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2571 b->j = uj; 2572 b->i = ui; 2573 b->diag = udiag; 2574 b->free_diag = PETSC_TRUE; 2575 b->ilen = 0; 2576 b->imax = 0; 2577 b->row = perm; 2578 b->col = perm; 2579 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2580 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2581 b->icol = iperm; 2582 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2583 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2584 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2585 b->maxnz = b->nz = ui[am]; 2586 b->free_a = PETSC_TRUE; 2587 b->free_ij = PETSC_TRUE; 2588 2589 fact->info.factor_mallocs = reallocs; 2590 fact->info.fill_ratio_given = fill; 2591 if (ai[am] != 0) { 2592 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2593 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2594 } else { 2595 fact->info.fill_ratio_needed = 0.0; 2596 } 2597 #if defined(PETSC_USE_INFO) 2598 if (ai[am] != 0) { 2599 PetscReal af = fact->info.fill_ratio_needed; 2600 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2601 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2602 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2603 } else { 2604 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2605 } 2606 #endif 2607 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2608 PetscFunctionReturn(0); 2609 } 2610 2611 #undef __FUNCT__ 2612 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2613 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2614 { 2615 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2616 Mat_SeqSBAIJ *b; 2617 PetscErrorCode ierr; 2618 PetscBool perm_identity,missing; 2619 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2620 const PetscInt *rip,*riip; 2621 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2622 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2623 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2624 PetscReal fill=info->fill,levels=info->levels; 2625 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2626 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2627 PetscBT lnkbt; 2628 IS iperm; 2629 2630 PetscFunctionBegin; 2631 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2632 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2633 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2634 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2635 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2636 2637 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2638 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2639 ui[0] = 0; 2640 2641 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2642 if (!levels && perm_identity) { 2643 2644 for (i=0; i<am; i++) { 2645 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2646 udiag[i] = ui[i]; 2647 } 2648 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2649 cols = uj; 2650 for (i=0; i<am; i++) { 2651 aj = a->j + a->diag[i]; 2652 ncols = ui[i+1] - ui[i]; 2653 for (j=0; j<ncols; j++) *cols++ = *aj++; 2654 } 2655 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2656 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2657 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2658 2659 /* initialization */ 2660 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2661 2662 /* jl: linked list for storing indices of the pivot rows 2663 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2664 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2665 for (i=0; i<am; i++){ 2666 jl[i] = am; il[i] = 0; 2667 } 2668 2669 /* create and initialize a linked list for storing column indices of the active row k */ 2670 nlnk = am + 1; 2671 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2672 2673 /* initial FreeSpace size is fill*(ai[am]+1) */ 2674 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2675 current_space = free_space; 2676 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2677 current_space_lvl = free_space_lvl; 2678 2679 for (k=0; k<am; k++){ /* for each active row k */ 2680 /* initialize lnk by the column indices of row rip[k] of A */ 2681 nzk = 0; 2682 ncols = ai[rip[k]+1] - ai[rip[k]]; 2683 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2684 ncols_upper = 0; 2685 for (j=0; j<ncols; j++){ 2686 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2687 if (riip[i] >= k){ /* only take upper triangular entry */ 2688 ajtmp[ncols_upper] = i; 2689 ncols_upper++; 2690 } 2691 } 2692 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2693 nzk += nlnk; 2694 2695 /* update lnk by computing fill-in for each pivot row to be merged in */ 2696 prow = jl[k]; /* 1st pivot row */ 2697 2698 while (prow < k){ 2699 nextprow = jl[prow]; 2700 2701 /* merge prow into k-th row */ 2702 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2703 jmax = ui[prow+1]; 2704 ncols = jmax-jmin; 2705 i = jmin - ui[prow]; 2706 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2707 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2708 j = *(uj - 1); 2709 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2710 nzk += nlnk; 2711 2712 /* update il and jl for prow */ 2713 if (jmin < jmax){ 2714 il[prow] = jmin; 2715 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2716 } 2717 prow = nextprow; 2718 } 2719 2720 /* if free space is not available, make more free space */ 2721 if (current_space->local_remaining<nzk) { 2722 i = am - k + 1; /* num of unfactored rows */ 2723 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2724 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2725 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2726 reallocs++; 2727 } 2728 2729 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2730 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2731 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2732 2733 /* add the k-th row into il and jl */ 2734 if (nzk > 1){ 2735 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2736 jl[k] = jl[i]; jl[i] = k; 2737 il[k] = ui[k] + 1; 2738 } 2739 uj_ptr[k] = current_space->array; 2740 uj_lvl_ptr[k] = current_space_lvl->array; 2741 2742 current_space->array += nzk; 2743 current_space->local_used += nzk; 2744 current_space->local_remaining -= nzk; 2745 2746 current_space_lvl->array += nzk; 2747 current_space_lvl->local_used += nzk; 2748 current_space_lvl->local_remaining -= nzk; 2749 2750 ui[k+1] = ui[k] + nzk; 2751 } 2752 2753 #if defined(PETSC_USE_INFO) 2754 if (ai[am] != 0) { 2755 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2756 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2757 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2758 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2759 } else { 2760 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2761 } 2762 #endif 2763 2764 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2765 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2766 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2767 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2768 2769 /* destroy list of free space and other temporary array(s) */ 2770 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2771 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2772 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2773 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2774 2775 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2776 2777 /* put together the new matrix in MATSEQSBAIJ format */ 2778 2779 b = (Mat_SeqSBAIJ*)fact->data; 2780 b->singlemalloc = PETSC_FALSE; 2781 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2782 b->j = uj; 2783 b->i = ui; 2784 b->diag = udiag; 2785 b->free_diag = PETSC_TRUE; 2786 b->ilen = 0; 2787 b->imax = 0; 2788 b->row = perm; 2789 b->col = perm; 2790 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2791 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2792 b->icol = iperm; 2793 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2794 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2795 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2796 b->maxnz = b->nz = ui[am]; 2797 b->free_a = PETSC_TRUE; 2798 b->free_ij = PETSC_TRUE; 2799 2800 fact->info.factor_mallocs = reallocs; 2801 fact->info.fill_ratio_given = fill; 2802 if (ai[am] != 0) { 2803 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2804 } else { 2805 fact->info.fill_ratio_needed = 0.0; 2806 } 2807 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2808 PetscFunctionReturn(0); 2809 } 2810 2811 #undef __FUNCT__ 2812 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2813 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2814 { 2815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2816 Mat_SeqSBAIJ *b; 2817 PetscErrorCode ierr; 2818 PetscBool perm_identity; 2819 PetscReal fill = info->fill; 2820 const PetscInt *rip,*riip; 2821 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2822 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2823 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2824 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2825 PetscBT lnkbt; 2826 IS iperm; 2827 2828 PetscFunctionBegin; 2829 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2830 /* check whether perm is the identity mapping */ 2831 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2832 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2833 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2834 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2835 2836 /* initialization */ 2837 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2838 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2839 ui[0] = 0; 2840 2841 /* jl: linked list for storing indices of the pivot rows 2842 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2843 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2844 for (i=0; i<am; i++){ 2845 jl[i] = am; il[i] = 0; 2846 } 2847 2848 /* create and initialize a linked list for storing column indices of the active row k */ 2849 nlnk = am + 1; 2850 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2851 2852 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2853 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2854 current_space = free_space; 2855 2856 for (k=0; k<am; k++){ /* for each active row k */ 2857 /* initialize lnk by the column indices of row rip[k] of A */ 2858 nzk = 0; 2859 ncols = ai[rip[k]+1] - ai[rip[k]]; 2860 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2861 ncols_upper = 0; 2862 for (j=0; j<ncols; j++){ 2863 i = riip[*(aj + ai[rip[k]] + j)]; 2864 if (i >= k){ /* only take upper triangular entry */ 2865 cols[ncols_upper] = i; 2866 ncols_upper++; 2867 } 2868 } 2869 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2870 nzk += nlnk; 2871 2872 /* update lnk by computing fill-in for each pivot row to be merged in */ 2873 prow = jl[k]; /* 1st pivot row */ 2874 2875 while (prow < k){ 2876 nextprow = jl[prow]; 2877 /* merge prow into k-th row */ 2878 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2879 jmax = ui[prow+1]; 2880 ncols = jmax-jmin; 2881 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2882 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2883 nzk += nlnk; 2884 2885 /* update il and jl for prow */ 2886 if (jmin < jmax){ 2887 il[prow] = jmin; 2888 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2889 } 2890 prow = nextprow; 2891 } 2892 2893 /* if free space is not available, make more free space */ 2894 if (current_space->local_remaining<nzk) { 2895 i = am - k + 1; /* num of unfactored rows */ 2896 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2897 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2898 reallocs++; 2899 } 2900 2901 /* copy data into free space, then initialize lnk */ 2902 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2903 2904 /* add the k-th row into il and jl */ 2905 if (nzk > 1){ 2906 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2907 jl[k] = jl[i]; jl[i] = k; 2908 il[k] = ui[k] + 1; 2909 } 2910 ui_ptr[k] = current_space->array; 2911 current_space->array += nzk; 2912 current_space->local_used += nzk; 2913 current_space->local_remaining -= nzk; 2914 2915 ui[k+1] = ui[k] + nzk; 2916 } 2917 2918 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2919 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2920 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2921 2922 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2923 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2924 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2925 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2926 2927 /* put together the new matrix in MATSEQSBAIJ format */ 2928 2929 b = (Mat_SeqSBAIJ*)fact->data; 2930 b->singlemalloc = PETSC_FALSE; 2931 b->free_a = PETSC_TRUE; 2932 b->free_ij = PETSC_TRUE; 2933 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2934 b->j = uj; 2935 b->i = ui; 2936 b->diag = udiag; 2937 b->free_diag = PETSC_TRUE; 2938 b->ilen = 0; 2939 b->imax = 0; 2940 b->row = perm; 2941 b->col = perm; 2942 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2943 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2944 b->icol = iperm; 2945 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2946 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2947 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2948 b->maxnz = b->nz = ui[am]; 2949 2950 fact->info.factor_mallocs = reallocs; 2951 fact->info.fill_ratio_given = fill; 2952 if (ai[am] != 0) { 2953 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2954 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2955 } else { 2956 fact->info.fill_ratio_needed = 0.0; 2957 } 2958 #if defined(PETSC_USE_INFO) 2959 if (ai[am] != 0) { 2960 PetscReal af = fact->info.fill_ratio_needed; 2961 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2962 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2963 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2964 } else { 2965 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2966 } 2967 #endif 2968 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2969 PetscFunctionReturn(0); 2970 } 2971 2972 #undef __FUNCT__ 2973 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2974 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2975 { 2976 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2977 Mat_SeqSBAIJ *b; 2978 PetscErrorCode ierr; 2979 PetscBool perm_identity; 2980 PetscReal fill = info->fill; 2981 const PetscInt *rip,*riip; 2982 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2983 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2984 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2985 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2986 PetscBT lnkbt; 2987 IS iperm; 2988 2989 PetscFunctionBegin; 2990 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2991 /* check whether perm is the identity mapping */ 2992 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2993 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2994 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2995 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2996 2997 /* initialization */ 2998 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2999 ui[0] = 0; 3000 3001 /* jl: linked list for storing indices of the pivot rows 3002 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3003 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 3004 for (i=0; i<am; i++){ 3005 jl[i] = am; il[i] = 0; 3006 } 3007 3008 /* create and initialize a linked list for storing column indices of the active row k */ 3009 nlnk = am + 1; 3010 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3011 3012 /* initial FreeSpace size is fill*(ai[am]+1) */ 3013 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3014 current_space = free_space; 3015 3016 for (k=0; k<am; k++){ /* for each active row k */ 3017 /* initialize lnk by the column indices of row rip[k] of A */ 3018 nzk = 0; 3019 ncols = ai[rip[k]+1] - ai[rip[k]]; 3020 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3021 ncols_upper = 0; 3022 for (j=0; j<ncols; j++){ 3023 i = riip[*(aj + ai[rip[k]] + j)]; 3024 if (i >= k){ /* only take upper triangular entry */ 3025 cols[ncols_upper] = i; 3026 ncols_upper++; 3027 } 3028 } 3029 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3030 nzk += nlnk; 3031 3032 /* update lnk by computing fill-in for each pivot row to be merged in */ 3033 prow = jl[k]; /* 1st pivot row */ 3034 3035 while (prow < k){ 3036 nextprow = jl[prow]; 3037 /* merge prow into k-th row */ 3038 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3039 jmax = ui[prow+1]; 3040 ncols = jmax-jmin; 3041 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3042 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3043 nzk += nlnk; 3044 3045 /* update il and jl for prow */ 3046 if (jmin < jmax){ 3047 il[prow] = jmin; 3048 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3049 } 3050 prow = nextprow; 3051 } 3052 3053 /* if free space is not available, make more free space */ 3054 if (current_space->local_remaining<nzk) { 3055 i = am - k + 1; /* num of unfactored rows */ 3056 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3057 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3058 reallocs++; 3059 } 3060 3061 /* copy data into free space, then initialize lnk */ 3062 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3063 3064 /* add the k-th row into il and jl */ 3065 if (nzk-1 > 0){ 3066 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3067 jl[k] = jl[i]; jl[i] = k; 3068 il[k] = ui[k] + 1; 3069 } 3070 ui_ptr[k] = current_space->array; 3071 current_space->array += nzk; 3072 current_space->local_used += nzk; 3073 current_space->local_remaining -= nzk; 3074 3075 ui[k+1] = ui[k] + nzk; 3076 } 3077 3078 #if defined(PETSC_USE_INFO) 3079 if (ai[am] != 0) { 3080 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3081 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3082 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3083 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3084 } else { 3085 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3086 } 3087 #endif 3088 3089 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3090 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3091 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3092 3093 /* destroy list of free space and other temporary array(s) */ 3094 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3095 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3096 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3097 3098 /* put together the new matrix in MATSEQSBAIJ format */ 3099 3100 b = (Mat_SeqSBAIJ*)fact->data; 3101 b->singlemalloc = PETSC_FALSE; 3102 b->free_a = PETSC_TRUE; 3103 b->free_ij = PETSC_TRUE; 3104 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3105 b->j = uj; 3106 b->i = ui; 3107 b->diag = 0; 3108 b->ilen = 0; 3109 b->imax = 0; 3110 b->row = perm; 3111 b->col = perm; 3112 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3113 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3114 b->icol = iperm; 3115 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3116 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3117 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3118 b->maxnz = b->nz = ui[am]; 3119 3120 fact->info.factor_mallocs = reallocs; 3121 fact->info.fill_ratio_given = fill; 3122 if (ai[am] != 0) { 3123 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3124 } else { 3125 fact->info.fill_ratio_needed = 0.0; 3126 } 3127 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3128 PetscFunctionReturn(0); 3129 } 3130 3131 #undef __FUNCT__ 3132 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3133 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3134 { 3135 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3136 PetscErrorCode ierr; 3137 PetscInt n = A->rmap->n; 3138 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3139 PetscScalar *x,sum; 3140 const PetscScalar *b; 3141 const MatScalar *aa = a->a,*v; 3142 PetscInt i,nz; 3143 3144 PetscFunctionBegin; 3145 if (!n) PetscFunctionReturn(0); 3146 3147 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3148 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3149 3150 /* forward solve the lower triangular */ 3151 x[0] = b[0]; 3152 v = aa; 3153 vi = aj; 3154 for (i=1; i<n; i++) { 3155 nz = ai[i+1] - ai[i]; 3156 sum = b[i]; 3157 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3158 v += nz; 3159 vi += nz; 3160 x[i] = sum; 3161 } 3162 3163 /* backward solve the upper triangular */ 3164 for (i=n-1; i>=0; i--){ 3165 v = aa + adiag[i+1] + 1; 3166 vi = aj + adiag[i+1] + 1; 3167 nz = adiag[i] - adiag[i+1]-1; 3168 sum = x[i]; 3169 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3170 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3171 } 3172 3173 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3174 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3175 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3176 PetscFunctionReturn(0); 3177 } 3178 3179 #undef __FUNCT__ 3180 #define __FUNCT__ "MatSolve_SeqAIJ" 3181 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3182 { 3183 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3184 IS iscol = a->col,isrow = a->row; 3185 PetscErrorCode ierr; 3186 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3187 const PetscInt *rout,*cout,*r,*c; 3188 PetscScalar *x,*tmp,sum; 3189 const PetscScalar *b; 3190 const MatScalar *aa = a->a,*v; 3191 3192 PetscFunctionBegin; 3193 if (!n) PetscFunctionReturn(0); 3194 3195 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3196 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3197 tmp = a->solve_work; 3198 3199 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3200 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3201 3202 /* forward solve the lower triangular */ 3203 tmp[0] = b[r[0]]; 3204 v = aa; 3205 vi = aj; 3206 for (i=1; i<n; i++) { 3207 nz = ai[i+1] - ai[i]; 3208 sum = b[r[i]]; 3209 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3210 tmp[i] = sum; 3211 v += nz; vi += nz; 3212 } 3213 3214 /* backward solve the upper triangular */ 3215 for (i=n-1; i>=0; i--){ 3216 v = aa + adiag[i+1]+1; 3217 vi = aj + adiag[i+1]+1; 3218 nz = adiag[i]-adiag[i+1]-1; 3219 sum = tmp[i]; 3220 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3221 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3222 } 3223 3224 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3225 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3226 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3227 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3228 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3229 PetscFunctionReturn(0); 3230 } 3231 3232 #undef __FUNCT__ 3233 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3234 /* 3235 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3236 */ 3237 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3238 { 3239 Mat B = *fact; 3240 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3241 IS isicol; 3242 PetscErrorCode ierr; 3243 const PetscInt *r,*ic; 3244 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3245 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3246 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3247 PetscInt nlnk,*lnk; 3248 PetscBT lnkbt; 3249 PetscBool row_identity,icol_identity; 3250 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3251 const PetscInt *ics; 3252 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3253 PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount; 3254 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3255 PetscBool missing; 3256 3257 PetscFunctionBegin; 3258 3259 if (dt == PETSC_DEFAULT) dt = 0.005; 3260 if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */ 3261 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3262 3263 /* ------- symbolic factorization, can be reused ---------*/ 3264 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3265 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3266 adiag=a->diag; 3267 3268 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3269 3270 /* bdiag is location of diagonal in factor */ 3271 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3272 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3273 3274 /* allocate row pointers bi */ 3275 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3276 3277 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3278 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3279 nnz_max = ai[n]+2*n*dtcount+2; 3280 3281 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3282 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3283 3284 /* put together the new matrix */ 3285 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3286 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3287 b = (Mat_SeqAIJ*)B->data; 3288 b->free_a = PETSC_TRUE; 3289 b->free_ij = PETSC_TRUE; 3290 b->singlemalloc = PETSC_FALSE; 3291 b->a = ba; 3292 b->j = bj; 3293 b->i = bi; 3294 b->diag = bdiag; 3295 b->ilen = 0; 3296 b->imax = 0; 3297 b->row = isrow; 3298 b->col = iscol; 3299 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3300 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3301 b->icol = isicol; 3302 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3303 3304 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3305 b->maxnz = nnz_max; 3306 3307 B->factortype = MAT_FACTOR_ILUDT; 3308 B->info.factor_mallocs = 0; 3309 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3310 CHKMEMQ; 3311 /* ------- end of symbolic factorization ---------*/ 3312 3313 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3314 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3315 ics = ic; 3316 3317 /* linked list for storing column indices of the active row */ 3318 nlnk = n + 1; 3319 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3320 3321 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3322 ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 3323 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3324 ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 3325 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3326 3327 bi[0] = 0; 3328 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3329 bdiag_rev[n] = bdiag[0]; 3330 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3331 for (i=0; i<n; i++) { 3332 /* copy initial fill into linked list */ 3333 nzi = 0; /* nonzeros for active row i */ 3334 nzi = ai[r[i]+1] - ai[r[i]]; 3335 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3336 nzi_al = adiag[r[i]] - ai[r[i]]; 3337 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3338 ajtmp = aj + ai[r[i]]; 3339 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3340 3341 /* load in initial (unfactored row) */ 3342 aatmp = a->a + ai[r[i]]; 3343 for (j=0; j<nzi; j++) { 3344 rtmp[ics[*ajtmp++]] = *aatmp++; 3345 } 3346 3347 /* add pivot rows into linked list */ 3348 row = lnk[n]; 3349 while (row < i ) { 3350 nzi_bl = bi[row+1] - bi[row] + 1; 3351 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3352 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3353 nzi += nlnk; 3354 row = lnk[row]; 3355 } 3356 3357 /* copy data from lnk into jtmp, then initialize lnk */ 3358 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3359 3360 /* numerical factorization */ 3361 bjtmp = jtmp; 3362 row = *bjtmp++; /* 1st pivot row */ 3363 while ( row < i ) { 3364 pc = rtmp + row; 3365 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3366 multiplier = (*pc) * (*pv); 3367 *pc = multiplier; 3368 if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 3369 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3370 pv = ba + bdiag[row+1] + 1; 3371 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3372 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3373 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3374 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3375 } 3376 row = *bjtmp++; 3377 } 3378 3379 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3380 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3381 nzi_bl = 0; j = 0; 3382 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 3383 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3384 nzi_bl++; j++; 3385 } 3386 nzi_bu = nzi - nzi_bl -1; 3387 while (j < nzi){ 3388 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3389 j++; 3390 } 3391 3392 bjtmp = bj + bi[i]; 3393 batmp = ba + bi[i]; 3394 /* apply level dropping rule to L part */ 3395 ncut = nzi_al + dtcount; 3396 if (ncut < nzi_bl){ 3397 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3398 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3399 } else { 3400 ncut = nzi_bl; 3401 } 3402 for (j=0; j<ncut; j++){ 3403 bjtmp[j] = jtmp[j]; 3404 batmp[j] = vtmp[j]; 3405 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3406 } 3407 bi[i+1] = bi[i] + ncut; 3408 nzi = ncut + 1; 3409 3410 /* apply level dropping rule to U part */ 3411 ncut = nzi_au + dtcount; 3412 if (ncut < nzi_bu){ 3413 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3414 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3415 } else { 3416 ncut = nzi_bu; 3417 } 3418 nzi += ncut; 3419 3420 /* mark bdiagonal */ 3421 bdiag[i+1] = bdiag[i] - (ncut + 1); 3422 bdiag_rev[n-i-1] = bdiag[i+1]; 3423 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3424 bjtmp = bj + bdiag[i]; 3425 batmp = ba + bdiag[i]; 3426 *bjtmp = i; 3427 *batmp = diag_tmp; /* rtmp[i]; */ 3428 if (*batmp == 0.0) { 3429 *batmp = dt+shift; 3430 /* printf(" row %d add shift %g\n",i,shift); */ 3431 } 3432 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3433 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3434 3435 bjtmp = bj + bdiag[i+1]+1; 3436 batmp = ba + bdiag[i+1]+1; 3437 for (k=0; k<ncut; k++){ 3438 bjtmp[k] = jtmp[nzi_bl+1+k]; 3439 batmp[k] = vtmp[nzi_bl+1+k]; 3440 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3441 } 3442 /* printf("\n"); */ 3443 3444 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3445 /* 3446 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3447 printf(" ----------------------------\n"); 3448 */ 3449 } /* for (i=0; i<n; i++) */ 3450 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3451 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3452 3453 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3454 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3455 3456 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3457 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3458 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3459 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3460 3461 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3462 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3463 3464 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3465 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3466 if (row_identity && icol_identity) { 3467 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3468 } else { 3469 B->ops->solve = MatSolve_SeqAIJ; 3470 } 3471 3472 B->ops->solveadd = 0; 3473 B->ops->solvetranspose = 0; 3474 B->ops->solvetransposeadd = 0; 3475 B->ops->matsolve = 0; 3476 B->assembled = PETSC_TRUE; 3477 B->preallocated = PETSC_TRUE; 3478 PetscFunctionReturn(0); 3479 } 3480 3481 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3482 #undef __FUNCT__ 3483 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3484 /* 3485 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3486 */ 3487 3488 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3489 { 3490 PetscErrorCode ierr; 3491 3492 PetscFunctionBegin; 3493 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3494 PetscFunctionReturn(0); 3495 } 3496 3497 /* 3498 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3499 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3500 */ 3501 #undef __FUNCT__ 3502 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3503 /* 3504 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3505 */ 3506 3507 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3508 { 3509 Mat C=fact; 3510 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 3511 IS isrow = b->row,isicol = b->icol; 3512 PetscErrorCode ierr; 3513 const PetscInt *r,*ic,*ics; 3514 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3515 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3516 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3517 PetscReal dt=info->dt,shift=info->shiftamount; 3518 PetscBool row_identity, col_identity; 3519 3520 PetscFunctionBegin; 3521 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3522 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3523 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3524 ics = ic; 3525 3526 for (i=0; i<n; i++){ 3527 /* initialize rtmp array */ 3528 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3529 bjtmp = bj + bi[i]; 3530 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3531 rtmp[i] = 0.0; 3532 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3533 bjtmp = bj + bdiag[i+1] + 1; 3534 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3535 3536 /* load in initial unfactored row of A */ 3537 /* printf("row %d\n",i); */ 3538 nz = ai[r[i]+1] - ai[r[i]]; 3539 ajtmp = aj + ai[r[i]]; 3540 v = aa + ai[r[i]]; 3541 for (j=0; j<nz; j++) { 3542 rtmp[ics[*ajtmp++]] = v[j]; 3543 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3544 } 3545 /* printf("\n"); */ 3546 3547 /* numerical factorization */ 3548 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3549 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3550 k = 0; 3551 while (k < nzl){ 3552 row = *bjtmp++; 3553 /* printf(" prow %d\n",row); */ 3554 pc = rtmp + row; 3555 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3556 multiplier = (*pc) * (*pv); 3557 *pc = multiplier; 3558 if (PetscAbsScalar(multiplier) > dt){ 3559 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3560 pv = b->a + bdiag[row+1] + 1; 3561 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3562 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3563 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3564 } 3565 k++; 3566 } 3567 3568 /* finished row so stick it into b->a */ 3569 /* L-part */ 3570 pv = b->a + bi[i] ; 3571 pj = bj + bi[i] ; 3572 nzl = bi[i+1] - bi[i]; 3573 for (j=0; j<nzl; j++) { 3574 pv[j] = rtmp[pj[j]]; 3575 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3576 } 3577 3578 /* diagonal: invert diagonal entries for simplier triangular solves */ 3579 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3580 b->a[bdiag[i]] = 1.0/rtmp[i]; 3581 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3582 3583 /* U-part */ 3584 pv = b->a + bdiag[i+1] + 1; 3585 pj = bj + bdiag[i+1] + 1; 3586 nzu = bdiag[i] - bdiag[i+1] - 1; 3587 for (j=0; j<nzu; j++) { 3588 pv[j] = rtmp[pj[j]]; 3589 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3590 } 3591 /* printf("\n"); */ 3592 } 3593 3594 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3595 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3596 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3597 3598 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3599 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3600 if (row_identity && col_identity) { 3601 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3602 } else { 3603 C->ops->solve = MatSolve_SeqAIJ; 3604 } 3605 C->ops->solveadd = 0; 3606 C->ops->solvetranspose = 0; 3607 C->ops->solvetransposeadd = 0; 3608 C->ops->matsolve = 0; 3609 C->assembled = PETSC_TRUE; 3610 C->preallocated = PETSC_TRUE; 3611 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3612 PetscFunctionReturn(0); 3613 } 3614