1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 EXTERN_C_BEGIN 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 10 /* 11 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 12 13 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 14 */ 15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 16 { 17 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 20 const PetscInt *ai = a->i, *aj = a->j; 21 const PetscScalar *aa = a->a; 22 PetscBool *done; 23 PetscReal best,past = 0,future; 24 25 PetscFunctionBegin; 26 /* pick initial row */ 27 best = -1; 28 for (i=0; i<n; i++) { 29 future = 0.0; 30 for (j=ai[i]; j<ai[i+1]; j++) { 31 if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 EXTERN_C_END 94 95 EXTERN_C_BEGIN 96 #undef __FUNCT__ 97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 99 { 100 PetscFunctionBegin; 101 *flg = PETSC_TRUE; 102 PetscFunctionReturn(0); 103 } 104 EXTERN_C_END 105 106 EXTERN_C_BEGIN 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 110 { 111 PetscInt n = A->rmap->n; 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 116 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 117 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 118 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 119 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 120 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 121 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 122 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 123 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 124 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 125 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 126 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 127 (*B)->factortype = ftype; 128 PetscFunctionReturn(0); 129 } 130 EXTERN_C_END 131 132 #undef __FUNCT__ 133 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 134 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 135 { 136 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 137 IS isicol; 138 PetscErrorCode ierr; 139 const PetscInt *r,*ic; 140 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 141 PetscInt *bi,*bj,*ajtmp; 142 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 143 PetscReal f; 144 PetscInt nlnk,*lnk,k,**bi_ptr; 145 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 146 PetscBT lnkbt; 147 148 PetscFunctionBegin; 149 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 150 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 151 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 152 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 153 154 /* get new row pointers */ 155 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 156 bi[0] = 0; 157 158 /* bdiag is location of diagonal in factor */ 159 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 160 bdiag[0] = 0; 161 162 /* linked list for storing column indices of the active row */ 163 nlnk = n + 1; 164 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 165 166 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 167 168 /* initial FreeSpace size is f*(ai[n]+1) */ 169 f = info->fill; 170 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 171 current_space = free_space; 172 173 for (i=0; i<n; i++) { 174 /* copy previous fill into linked list */ 175 nzi = 0; 176 nnz = ai[r[i]+1] - ai[r[i]]; 177 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 178 ajtmp = aj + ai[r[i]]; 179 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 180 nzi += nlnk; 181 182 /* add pivot rows into linked list */ 183 row = lnk[n]; 184 while (row < i) { 185 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 186 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 187 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 188 nzi += nlnk; 189 row = lnk[row]; 190 } 191 bi[i+1] = bi[i] + nzi; 192 im[i] = nzi; 193 194 /* mark bdiag */ 195 nzbd = 0; 196 nnz = nzi; 197 k = lnk[n]; 198 while (nnz-- && k < i){ 199 nzbd++; 200 k = lnk[k]; 201 } 202 bdiag[i] = bi[i] + nzbd; 203 204 /* if free space is not available, make more free space */ 205 if (current_space->local_remaining<nzi) { 206 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 207 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 208 reallocs++; 209 } 210 211 /* copy data into free space, then initialize lnk */ 212 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 213 bi_ptr[i] = current_space->array; 214 current_space->array += nzi; 215 current_space->local_used += nzi; 216 current_space->local_remaining -= nzi; 217 } 218 #if defined(PETSC_USE_INFO) 219 if (ai[n] != 0) { 220 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 221 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 222 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 223 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 224 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 225 } else { 226 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 227 } 228 #endif 229 230 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 231 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 232 233 /* destroy list of free space and other temporary array(s) */ 234 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 235 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 236 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 237 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 238 239 /* put together the new matrix */ 240 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 241 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 242 b = (Mat_SeqAIJ*)(B)->data; 243 b->free_a = PETSC_TRUE; 244 b->free_ij = PETSC_TRUE; 245 b->singlemalloc = PETSC_FALSE; 246 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 247 b->j = bj; 248 b->i = bi; 249 b->diag = bdiag; 250 b->ilen = 0; 251 b->imax = 0; 252 b->row = isrow; 253 b->col = iscol; 254 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 255 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 256 b->icol = isicol; 257 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 258 259 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 260 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 261 b->maxnz = b->nz = bi[n] ; 262 263 (B)->factortype = MAT_FACTOR_LU; 264 (B)->info.factor_mallocs = reallocs; 265 (B)->info.fill_ratio_given = f; 266 267 if (ai[n]) { 268 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 269 } else { 270 (B)->info.fill_ratio_needed = 0.0; 271 } 272 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 273 if (a->inode.size) { 274 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 275 } 276 PetscFunctionReturn(0); 277 } 278 279 #undef __FUNCT__ 280 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 281 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 282 { 283 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 284 IS isicol; 285 PetscErrorCode ierr; 286 const PetscInt *r,*ic; 287 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 288 PetscInt *bi,*bj,*ajtmp; 289 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 290 PetscReal f; 291 PetscInt nlnk,*lnk,k,**bi_ptr; 292 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 293 PetscBT lnkbt; 294 295 PetscFunctionBegin; 296 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 297 /* 298 PetscBool olddatastruct=PETSC_FALSE; 299 ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 300 if(olddatastruct){ 301 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 302 PetscFunctionReturn(0); 303 } 304 */ 305 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 306 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 307 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 308 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 309 310 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 311 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 312 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 313 bi[0] = bdiag[0] = 0; 314 315 /* linked list for storing column indices of the active row */ 316 nlnk = n + 1; 317 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 318 319 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 320 321 /* initial FreeSpace size is f*(ai[n]+1) */ 322 f = info->fill; 323 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 324 current_space = free_space; 325 326 for (i=0; i<n; i++) { 327 /* copy previous fill into linked list */ 328 nzi = 0; 329 nnz = ai[r[i]+1] - ai[r[i]]; 330 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 331 ajtmp = aj + ai[r[i]]; 332 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 333 nzi += nlnk; 334 335 /* add pivot rows into linked list */ 336 row = lnk[n]; 337 while (row < i){ 338 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 339 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 340 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 341 nzi += nlnk; 342 row = lnk[row]; 343 } 344 bi[i+1] = bi[i] + nzi; 345 im[i] = nzi; 346 347 /* mark bdiag */ 348 nzbd = 0; 349 nnz = nzi; 350 k = lnk[n]; 351 while (nnz-- && k < i){ 352 nzbd++; 353 k = lnk[k]; 354 } 355 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 356 357 /* if free space is not available, make more free space */ 358 if (current_space->local_remaining<nzi) { 359 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 360 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 361 reallocs++; 362 } 363 364 /* copy data into free space, then initialize lnk */ 365 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 366 bi_ptr[i] = current_space->array; 367 current_space->array += nzi; 368 current_space->local_used += nzi; 369 current_space->local_remaining -= nzi; 370 } 371 372 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 373 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 374 375 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 376 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 377 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 378 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 379 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 380 381 /* put together the new matrix */ 382 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 383 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 384 b = (Mat_SeqAIJ*)(B)->data; 385 b->free_a = PETSC_TRUE; 386 b->free_ij = PETSC_TRUE; 387 b->singlemalloc = PETSC_FALSE; 388 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 389 b->j = bj; 390 b->i = bi; 391 b->diag = bdiag; 392 b->ilen = 0; 393 b->imax = 0; 394 b->row = isrow; 395 b->col = iscol; 396 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 397 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 398 b->icol = isicol; 399 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 400 401 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 402 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 403 b->maxnz = b->nz = bdiag[0]+1; 404 B->factortype = MAT_FACTOR_LU; 405 B->info.factor_mallocs = reallocs; 406 B->info.fill_ratio_given = f; 407 408 if (ai[n]) { 409 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 410 } else { 411 B->info.fill_ratio_needed = 0.0; 412 } 413 #if defined(PETSC_USE_INFO) 414 if (ai[n] != 0) { 415 PetscReal af = B->info.fill_ratio_needed; 416 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 417 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 418 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 419 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 420 } else { 421 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 422 } 423 #endif 424 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 425 if (a->inode.size) { 426 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 427 } 428 PetscFunctionReturn(0); 429 } 430 431 /* 432 Trouble in factorization, should we dump the original matrix? 433 */ 434 #undef __FUNCT__ 435 #define __FUNCT__ "MatFactorDumpMatrix" 436 PetscErrorCode MatFactorDumpMatrix(Mat A) 437 { 438 PetscErrorCode ierr; 439 PetscBool flg = PETSC_FALSE; 440 441 PetscFunctionBegin; 442 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 443 if (flg) { 444 PetscViewer viewer; 445 char filename[PETSC_MAX_PATH_LEN]; 446 447 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 448 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 449 ierr = MatView(A,viewer);CHKERRQ(ierr); 450 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 451 } 452 PetscFunctionReturn(0); 453 } 454 455 #undef __FUNCT__ 456 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 457 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 458 { 459 Mat C=B; 460 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 461 IS isrow = b->row,isicol = b->icol; 462 PetscErrorCode ierr; 463 const PetscInt *r,*ic,*ics; 464 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 465 PetscInt i,j,k,nz,nzL,row,*pj; 466 const PetscInt *ajtmp,*bjtmp; 467 MatScalar *rtmp,*pc,multiplier,*pv; 468 const MatScalar *aa=a->a,*v; 469 PetscBool row_identity,col_identity; 470 FactorShiftCtx sctx; 471 const PetscInt *ddiag; 472 PetscReal rs; 473 MatScalar d; 474 475 PetscFunctionBegin; 476 /* MatPivotSetUp(): initialize shift context sctx */ 477 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 478 479 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 480 ddiag = a->diag; 481 sctx.shift_top = info->zeropivot; 482 for (i=0; i<n; i++) { 483 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 484 d = (aa)[ddiag[i]]; 485 rs = -PetscAbsScalar(d) - PetscRealPart(d); 486 v = aa+ai[i]; 487 nz = ai[i+1] - ai[i]; 488 for (j=0; j<nz; j++) 489 rs += PetscAbsScalar(v[j]); 490 if (rs>sctx.shift_top) sctx.shift_top = rs; 491 } 492 sctx.shift_top *= 1.1; 493 sctx.nshift_max = 5; 494 sctx.shift_lo = 0.; 495 sctx.shift_hi = 1.; 496 } 497 498 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 499 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 500 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 501 ics = ic; 502 503 do { 504 sctx.newshift = PETSC_FALSE; 505 for (i=0; i<n; i++){ 506 /* zero rtmp */ 507 /* L part */ 508 nz = bi[i+1] - bi[i]; 509 bjtmp = bj + bi[i]; 510 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 511 512 /* U part */ 513 nz = bdiag[i]-bdiag[i+1]; 514 bjtmp = bj + bdiag[i+1]+1; 515 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 516 517 /* load in initial (unfactored row) */ 518 nz = ai[r[i]+1] - ai[r[i]]; 519 ajtmp = aj + ai[r[i]]; 520 v = aa + ai[r[i]]; 521 for (j=0; j<nz; j++) { 522 rtmp[ics[ajtmp[j]]] = v[j]; 523 } 524 /* ZeropivotApply() */ 525 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 526 527 /* elimination */ 528 bjtmp = bj + bi[i]; 529 row = *bjtmp++; 530 nzL = bi[i+1] - bi[i]; 531 for(k=0; k < nzL;k++) { 532 pc = rtmp + row; 533 if (*pc != 0.0) { 534 pv = b->a + bdiag[row]; 535 multiplier = *pc * (*pv); 536 *pc = multiplier; 537 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 538 pv = b->a + bdiag[row+1]+1; 539 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 540 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 541 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 542 } 543 row = *bjtmp++; 544 } 545 546 /* finished row so stick it into b->a */ 547 rs = 0.0; 548 /* L part */ 549 pv = b->a + bi[i] ; 550 pj = b->j + bi[i] ; 551 nz = bi[i+1] - bi[i]; 552 for (j=0; j<nz; j++) { 553 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 554 } 555 556 /* U part */ 557 pv = b->a + bdiag[i+1]+1; 558 pj = b->j + bdiag[i+1]+1; 559 nz = bdiag[i] - bdiag[i+1]-1; 560 for (j=0; j<nz; j++) { 561 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 562 } 563 564 sctx.rs = rs; 565 sctx.pv = rtmp[i]; 566 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 567 if(sctx.newshift) break; /* break for-loop */ 568 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 569 570 /* Mark diagonal and invert diagonal for simplier triangular solves */ 571 pv = b->a + bdiag[i]; 572 *pv = 1.0/rtmp[i]; 573 574 } /* endof for (i=0; i<n; i++){ */ 575 576 /* MatPivotRefine() */ 577 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){ 578 /* 579 * if no shift in this attempt & shifting & started shifting & can refine, 580 * then try lower shift 581 */ 582 sctx.shift_hi = sctx.shift_fraction; 583 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 584 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 585 sctx.newshift = PETSC_TRUE; 586 sctx.nshift++; 587 } 588 } while (sctx.newshift); 589 590 ierr = PetscFree(rtmp);CHKERRQ(ierr); 591 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 592 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 593 594 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 595 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 596 if (row_identity && col_identity) { 597 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 598 } else { 599 C->ops->solve = MatSolve_SeqAIJ; 600 } 601 C->ops->solveadd = MatSolveAdd_SeqAIJ; 602 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 603 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 604 C->ops->matsolve = MatMatSolve_SeqAIJ; 605 C->assembled = PETSC_TRUE; 606 C->preallocated = PETSC_TRUE; 607 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 608 609 /* MatShiftView(A,info,&sctx) */ 610 if (sctx.nshift){ 611 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 612 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 613 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 614 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 615 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 616 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 617 } 618 } 619 ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr); 620 PetscFunctionReturn(0); 621 } 622 623 #undef __FUNCT__ 624 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 625 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 626 { 627 Mat C=B; 628 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 629 IS isrow = b->row,isicol = b->icol; 630 PetscErrorCode ierr; 631 const PetscInt *r,*ic,*ics; 632 PetscInt nz,row,i,j,n=A->rmap->n,diag; 633 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 634 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 635 MatScalar *pv,*rtmp,*pc,multiplier,d; 636 const MatScalar *v,*aa=a->a; 637 PetscReal rs=0.0; 638 FactorShiftCtx sctx; 639 const PetscInt *ddiag; 640 PetscBool row_identity, col_identity; 641 642 PetscFunctionBegin; 643 /* MatPivotSetUp(): initialize shift context sctx */ 644 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 645 646 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 647 ddiag = a->diag; 648 sctx.shift_top = info->zeropivot; 649 for (i=0; i<n; i++) { 650 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 651 d = (aa)[ddiag[i]]; 652 rs = -PetscAbsScalar(d) - PetscRealPart(d); 653 v = aa+ai[i]; 654 nz = ai[i+1] - ai[i]; 655 for (j=0; j<nz; j++) 656 rs += PetscAbsScalar(v[j]); 657 if (rs>sctx.shift_top) sctx.shift_top = rs; 658 } 659 sctx.shift_top *= 1.1; 660 sctx.nshift_max = 5; 661 sctx.shift_lo = 0.; 662 sctx.shift_hi = 1.; 663 } 664 665 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 666 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 667 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 668 ics = ic; 669 670 do { 671 sctx.newshift = PETSC_FALSE; 672 for (i=0; i<n; i++){ 673 nz = bi[i+1] - bi[i]; 674 bjtmp = bj + bi[i]; 675 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 676 677 /* load in initial (unfactored row) */ 678 nz = ai[r[i]+1] - ai[r[i]]; 679 ajtmp = aj + ai[r[i]]; 680 v = aa + ai[r[i]]; 681 for (j=0; j<nz; j++) { 682 rtmp[ics[ajtmp[j]]] = v[j]; 683 } 684 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 685 686 row = *bjtmp++; 687 while (row < i) { 688 pc = rtmp + row; 689 if (*pc != 0.0) { 690 pv = b->a + diag_offset[row]; 691 pj = b->j + diag_offset[row] + 1; 692 multiplier = *pc / *pv++; 693 *pc = multiplier; 694 nz = bi[row+1] - diag_offset[row] - 1; 695 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 696 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 697 } 698 row = *bjtmp++; 699 } 700 /* finished row so stick it into b->a */ 701 pv = b->a + bi[i] ; 702 pj = b->j + bi[i] ; 703 nz = bi[i+1] - bi[i]; 704 diag = diag_offset[i] - bi[i]; 705 rs = 0.0; 706 for (j=0; j<nz; j++) { 707 pv[j] = rtmp[pj[j]]; 708 rs += PetscAbsScalar(pv[j]); 709 } 710 rs -= PetscAbsScalar(pv[diag]); 711 712 sctx.rs = rs; 713 sctx.pv = pv[diag]; 714 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 715 if (sctx.newshift) break; 716 pv[diag] = sctx.pv; 717 } 718 719 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 720 /* 721 * if no shift in this attempt & shifting & started shifting & can refine, 722 * then try lower shift 723 */ 724 sctx.shift_hi = sctx.shift_fraction; 725 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 726 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 727 sctx.newshift = PETSC_TRUE; 728 sctx.nshift++; 729 } 730 } while (sctx.newshift); 731 732 /* invert diagonal entries for simplier triangular solves */ 733 for (i=0; i<n; i++) { 734 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 735 } 736 ierr = PetscFree(rtmp);CHKERRQ(ierr); 737 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 738 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 739 740 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 741 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 742 if (row_identity && col_identity) { 743 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 744 } else { 745 C->ops->solve = MatSolve_SeqAIJ_inplace; 746 } 747 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 748 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 749 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 750 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 751 C->assembled = PETSC_TRUE; 752 C->preallocated = PETSC_TRUE; 753 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 754 if (sctx.nshift){ 755 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 756 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 757 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 758 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 759 } 760 } 761 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 762 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 763 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 /* 768 This routine implements inplace ILU(0) with row or/and column permutations. 769 Input: 770 A - original matrix 771 Output; 772 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 773 a->j (col index) is permuted by the inverse of colperm, then sorted 774 a->a reordered accordingly with a->j 775 a->diag (ptr to diagonal elements) is updated. 776 */ 777 #undef __FUNCT__ 778 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 779 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 780 { 781 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 782 IS isrow = a->row,isicol = a->icol; 783 PetscErrorCode ierr; 784 const PetscInt *r,*ic,*ics; 785 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 786 PetscInt *ajtmp,nz,row; 787 PetscInt *diag = a->diag,nbdiag,*pj; 788 PetscScalar *rtmp,*pc,multiplier,d; 789 MatScalar *pv,*v; 790 PetscReal rs; 791 FactorShiftCtx sctx; 792 const MatScalar *aa=a->a,*vtmp; 793 794 PetscFunctionBegin; 795 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 796 797 /* MatPivotSetUp(): initialize shift context sctx */ 798 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 799 800 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 801 const PetscInt *ddiag = a->diag; 802 sctx.shift_top = info->zeropivot; 803 for (i=0; i<n; i++) { 804 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 805 d = (aa)[ddiag[i]]; 806 rs = -PetscAbsScalar(d) - PetscRealPart(d); 807 vtmp = aa+ai[i]; 808 nz = ai[i+1] - ai[i]; 809 for (j=0; j<nz; j++) 810 rs += PetscAbsScalar(vtmp[j]); 811 if (rs>sctx.shift_top) sctx.shift_top = rs; 812 } 813 sctx.shift_top *= 1.1; 814 sctx.nshift_max = 5; 815 sctx.shift_lo = 0.; 816 sctx.shift_hi = 1.; 817 } 818 819 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 820 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 821 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 822 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 823 ics = ic; 824 825 #if defined(MV) 826 sctx.shift_top = 0.; 827 sctx.nshift_max = 0; 828 sctx.shift_lo = 0.; 829 sctx.shift_hi = 0.; 830 sctx.shift_fraction = 0.; 831 832 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 833 sctx.shift_top = 0.; 834 for (i=0; i<n; i++) { 835 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 836 d = (a->a)[diag[i]]; 837 rs = -PetscAbsScalar(d) - PetscRealPart(d); 838 v = a->a+ai[i]; 839 nz = ai[i+1] - ai[i]; 840 for (j=0; j<nz; j++) 841 rs += PetscAbsScalar(v[j]); 842 if (rs>sctx.shift_top) sctx.shift_top = rs; 843 } 844 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 845 sctx.shift_top *= 1.1; 846 sctx.nshift_max = 5; 847 sctx.shift_lo = 0.; 848 sctx.shift_hi = 1.; 849 } 850 851 sctx.shift_amount = 0.; 852 sctx.nshift = 0; 853 #endif 854 855 do { 856 sctx.newshift = PETSC_FALSE; 857 for (i=0; i<n; i++){ 858 /* load in initial unfactored row */ 859 nz = ai[r[i]+1] - ai[r[i]]; 860 ajtmp = aj + ai[r[i]]; 861 v = a->a + ai[r[i]]; 862 /* sort permuted ajtmp and values v accordingly */ 863 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 864 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 865 866 diag[r[i]] = ai[r[i]]; 867 for (j=0; j<nz; j++) { 868 rtmp[ajtmp[j]] = v[j]; 869 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 870 } 871 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 872 873 row = *ajtmp++; 874 while (row < i) { 875 pc = rtmp + row; 876 if (*pc != 0.0) { 877 pv = a->a + diag[r[row]]; 878 pj = aj + diag[r[row]] + 1; 879 880 multiplier = *pc / *pv++; 881 *pc = multiplier; 882 nz = ai[r[row]+1] - diag[r[row]] - 1; 883 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 884 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 885 } 886 row = *ajtmp++; 887 } 888 /* finished row so overwrite it onto a->a */ 889 pv = a->a + ai[r[i]] ; 890 pj = aj + ai[r[i]] ; 891 nz = ai[r[i]+1] - ai[r[i]]; 892 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 893 894 rs = 0.0; 895 for (j=0; j<nz; j++) { 896 pv[j] = rtmp[pj[j]]; 897 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 898 } 899 900 sctx.rs = rs; 901 sctx.pv = pv[nbdiag]; 902 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 903 if (sctx.newshift) break; 904 pv[nbdiag] = sctx.pv; 905 } 906 907 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 908 /* 909 * if no shift in this attempt & shifting & started shifting & can refine, 910 * then try lower shift 911 */ 912 sctx.shift_hi = sctx.shift_fraction; 913 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 914 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 915 sctx.newshift = PETSC_TRUE; 916 sctx.nshift++; 917 } 918 } while (sctx.newshift); 919 920 /* invert diagonal entries for simplier triangular solves */ 921 for (i=0; i<n; i++) { 922 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 923 } 924 925 ierr = PetscFree(rtmp);CHKERRQ(ierr); 926 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 927 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 928 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 929 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 930 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 931 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 932 A->assembled = PETSC_TRUE; 933 A->preallocated = PETSC_TRUE; 934 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 935 if (sctx.nshift){ 936 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 937 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 938 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 939 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 940 } 941 } 942 PetscFunctionReturn(0); 943 } 944 945 /* ----------------------------------------------------------- */ 946 #undef __FUNCT__ 947 #define __FUNCT__ "MatLUFactor_SeqAIJ" 948 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 949 { 950 PetscErrorCode ierr; 951 Mat C; 952 953 PetscFunctionBegin; 954 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 955 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 956 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 957 A->ops->solve = C->ops->solve; 958 A->ops->solvetranspose = C->ops->solvetranspose; 959 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 960 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 961 PetscFunctionReturn(0); 962 } 963 /* ----------------------------------------------------------- */ 964 965 966 #undef __FUNCT__ 967 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 968 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 969 { 970 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 971 IS iscol = a->col,isrow = a->row; 972 PetscErrorCode ierr; 973 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 974 PetscInt nz; 975 const PetscInt *rout,*cout,*r,*c; 976 PetscScalar *x,*tmp,*tmps,sum; 977 const PetscScalar *b; 978 const MatScalar *aa = a->a,*v; 979 980 PetscFunctionBegin; 981 if (!n) PetscFunctionReturn(0); 982 983 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 984 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 985 tmp = a->solve_work; 986 987 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 988 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 989 990 /* forward solve the lower triangular */ 991 tmp[0] = b[*r++]; 992 tmps = tmp; 993 for (i=1; i<n; i++) { 994 v = aa + ai[i] ; 995 vi = aj + ai[i] ; 996 nz = a->diag[i] - ai[i]; 997 sum = b[*r++]; 998 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 999 tmp[i] = sum; 1000 } 1001 1002 /* backward solve the upper triangular */ 1003 for (i=n-1; i>=0; i--){ 1004 v = aa + a->diag[i] + 1; 1005 vi = aj + a->diag[i] + 1; 1006 nz = ai[i+1] - a->diag[i] - 1; 1007 sum = tmp[i]; 1008 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1009 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1010 } 1011 1012 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1013 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1014 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1015 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1016 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1017 PetscFunctionReturn(0); 1018 } 1019 1020 #undef __FUNCT__ 1021 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1022 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1023 { 1024 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1025 IS iscol = a->col,isrow = a->row; 1026 PetscErrorCode ierr; 1027 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1028 PetscInt nz,neq; 1029 const PetscInt *rout,*cout,*r,*c; 1030 PetscScalar *x,*b,*tmp,*tmps,sum; 1031 const MatScalar *aa = a->a,*v; 1032 PetscBool bisdense,xisdense; 1033 1034 PetscFunctionBegin; 1035 if (!n) PetscFunctionReturn(0); 1036 1037 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1038 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1039 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1040 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1041 1042 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1043 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1044 1045 tmp = a->solve_work; 1046 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1047 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1048 1049 for (neq=0; neq<B->cmap->n; neq++){ 1050 /* forward solve the lower triangular */ 1051 tmp[0] = b[r[0]]; 1052 tmps = tmp; 1053 for (i=1; i<n; i++) { 1054 v = aa + ai[i] ; 1055 vi = aj + ai[i] ; 1056 nz = a->diag[i] - ai[i]; 1057 sum = b[r[i]]; 1058 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1059 tmp[i] = sum; 1060 } 1061 /* backward solve the upper triangular */ 1062 for (i=n-1; i>=0; i--){ 1063 v = aa + a->diag[i] + 1; 1064 vi = aj + a->diag[i] + 1; 1065 nz = ai[i+1] - a->diag[i] - 1; 1066 sum = tmp[i]; 1067 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1068 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1069 } 1070 1071 b += n; 1072 x += n; 1073 } 1074 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1075 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1076 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1077 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1078 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1084 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1085 { 1086 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1087 IS iscol = a->col,isrow = a->row; 1088 PetscErrorCode ierr; 1089 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1090 PetscInt nz,neq; 1091 const PetscInt *rout,*cout,*r,*c; 1092 PetscScalar *x,*b,*tmp,sum; 1093 const MatScalar *aa = a->a,*v; 1094 PetscBool bisdense,xisdense; 1095 1096 PetscFunctionBegin; 1097 if (!n) PetscFunctionReturn(0); 1098 1099 ierr = PetscTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1100 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1101 ierr = PetscTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1102 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1103 1104 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1105 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1106 1107 tmp = a->solve_work; 1108 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1109 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1110 1111 for (neq=0; neq<B->cmap->n; neq++){ 1112 /* forward solve the lower triangular */ 1113 tmp[0] = b[r[0]]; 1114 v = aa; 1115 vi = aj; 1116 for (i=1; i<n; i++) { 1117 nz = ai[i+1] - ai[i]; 1118 sum = b[r[i]]; 1119 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1120 tmp[i] = sum; 1121 v += nz; vi += nz; 1122 } 1123 1124 /* backward solve the upper triangular */ 1125 for (i=n-1; i>=0; i--){ 1126 v = aa + adiag[i+1]+1; 1127 vi = aj + adiag[i+1]+1; 1128 nz = adiag[i]-adiag[i+1]-1; 1129 sum = tmp[i]; 1130 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1131 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1132 } 1133 1134 b += n; 1135 x += n; 1136 } 1137 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1138 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1139 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1140 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1141 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 #undef __FUNCT__ 1146 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1147 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1148 { 1149 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1150 IS iscol = a->col,isrow = a->row; 1151 PetscErrorCode ierr; 1152 const PetscInt *r,*c,*rout,*cout; 1153 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1154 PetscInt nz,row; 1155 PetscScalar *x,*b,*tmp,*tmps,sum; 1156 const MatScalar *aa = a->a,*v; 1157 1158 PetscFunctionBegin; 1159 if (!n) PetscFunctionReturn(0); 1160 1161 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1162 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1163 tmp = a->solve_work; 1164 1165 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1166 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1167 1168 /* forward solve the lower triangular */ 1169 tmp[0] = b[*r++]; 1170 tmps = tmp; 1171 for (row=1; row<n; row++) { 1172 i = rout[row]; /* permuted row */ 1173 v = aa + ai[i] ; 1174 vi = aj + ai[i] ; 1175 nz = a->diag[i] - ai[i]; 1176 sum = b[*r++]; 1177 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1178 tmp[row] = sum; 1179 } 1180 1181 /* backward solve the upper triangular */ 1182 for (row=n-1; row>=0; row--){ 1183 i = rout[row]; /* permuted row */ 1184 v = aa + a->diag[i] + 1; 1185 vi = aj + a->diag[i] + 1; 1186 nz = ai[i+1] - a->diag[i] - 1; 1187 sum = tmp[row]; 1188 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1189 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1190 } 1191 1192 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1193 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1194 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1195 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1196 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 /* ----------------------------------------------------------- */ 1201 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1202 #undef __FUNCT__ 1203 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1204 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1205 { 1206 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1207 PetscErrorCode ierr; 1208 PetscInt n = A->rmap->n; 1209 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1210 PetscScalar *x; 1211 const PetscScalar *b; 1212 const MatScalar *aa = a->a; 1213 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1214 PetscInt adiag_i,i,nz,ai_i; 1215 const PetscInt *vi; 1216 const MatScalar *v; 1217 PetscScalar sum; 1218 #endif 1219 1220 PetscFunctionBegin; 1221 if (!n) PetscFunctionReturn(0); 1222 1223 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1224 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1225 1226 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1227 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1228 #else 1229 /* forward solve the lower triangular */ 1230 x[0] = b[0]; 1231 for (i=1; i<n; i++) { 1232 ai_i = ai[i]; 1233 v = aa + ai_i; 1234 vi = aj + ai_i; 1235 nz = adiag[i] - ai_i; 1236 sum = b[i]; 1237 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1238 x[i] = sum; 1239 } 1240 1241 /* backward solve the upper triangular */ 1242 for (i=n-1; i>=0; i--){ 1243 adiag_i = adiag[i]; 1244 v = aa + adiag_i + 1; 1245 vi = aj + adiag_i + 1; 1246 nz = ai[i+1] - adiag_i - 1; 1247 sum = x[i]; 1248 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1249 x[i] = sum*aa[adiag_i]; 1250 } 1251 #endif 1252 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1253 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1254 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1255 PetscFunctionReturn(0); 1256 } 1257 1258 #undef __FUNCT__ 1259 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1260 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1261 { 1262 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1263 IS iscol = a->col,isrow = a->row; 1264 PetscErrorCode ierr; 1265 PetscInt i, n = A->rmap->n,j; 1266 PetscInt nz; 1267 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1268 PetscScalar *x,*tmp,sum; 1269 const PetscScalar *b; 1270 const MatScalar *aa = a->a,*v; 1271 1272 PetscFunctionBegin; 1273 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1274 1275 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1276 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1277 tmp = a->solve_work; 1278 1279 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1280 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1281 1282 /* forward solve the lower triangular */ 1283 tmp[0] = b[*r++]; 1284 for (i=1; i<n; i++) { 1285 v = aa + ai[i] ; 1286 vi = aj + ai[i] ; 1287 nz = a->diag[i] - ai[i]; 1288 sum = b[*r++]; 1289 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1290 tmp[i] = sum; 1291 } 1292 1293 /* backward solve the upper triangular */ 1294 for (i=n-1; i>=0; i--){ 1295 v = aa + a->diag[i] + 1; 1296 vi = aj + a->diag[i] + 1; 1297 nz = ai[i+1] - a->diag[i] - 1; 1298 sum = tmp[i]; 1299 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1300 tmp[i] = sum*aa[a->diag[i]]; 1301 x[*c--] += tmp[i]; 1302 } 1303 1304 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1305 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1306 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1307 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1308 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1309 1310 PetscFunctionReturn(0); 1311 } 1312 1313 #undef __FUNCT__ 1314 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1315 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1316 { 1317 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1318 IS iscol = a->col,isrow = a->row; 1319 PetscErrorCode ierr; 1320 PetscInt i, n = A->rmap->n,j; 1321 PetscInt nz; 1322 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1323 PetscScalar *x,*tmp,sum; 1324 const PetscScalar *b; 1325 const MatScalar *aa = a->a,*v; 1326 1327 PetscFunctionBegin; 1328 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1329 1330 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1331 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1332 tmp = a->solve_work; 1333 1334 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1335 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1336 1337 /* forward solve the lower triangular */ 1338 tmp[0] = b[r[0]]; 1339 v = aa; 1340 vi = aj; 1341 for (i=1; i<n; i++) { 1342 nz = ai[i+1] - ai[i]; 1343 sum = b[r[i]]; 1344 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1345 tmp[i] = sum; 1346 v += nz; vi += nz; 1347 } 1348 1349 /* backward solve the upper triangular */ 1350 v = aa + adiag[n-1]; 1351 vi = aj + adiag[n-1]; 1352 for (i=n-1; i>=0; i--){ 1353 nz = adiag[i] - adiag[i+1] - 1; 1354 sum = tmp[i]; 1355 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1356 tmp[i] = sum*v[nz]; 1357 x[c[i]] += tmp[i]; 1358 v += nz+1; vi += nz+1; 1359 } 1360 1361 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1362 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1363 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1364 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1365 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1366 1367 PetscFunctionReturn(0); 1368 } 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1372 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1373 { 1374 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1375 IS iscol = a->col,isrow = a->row; 1376 PetscErrorCode ierr; 1377 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1378 PetscInt i,n = A->rmap->n,j; 1379 PetscInt nz; 1380 PetscScalar *x,*tmp,s1; 1381 const MatScalar *aa = a->a,*v; 1382 const PetscScalar *b; 1383 1384 PetscFunctionBegin; 1385 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1386 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1387 tmp = a->solve_work; 1388 1389 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1390 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1391 1392 /* copy the b into temp work space according to permutation */ 1393 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1394 1395 /* forward solve the U^T */ 1396 for (i=0; i<n; i++) { 1397 v = aa + diag[i] ; 1398 vi = aj + diag[i] + 1; 1399 nz = ai[i+1] - diag[i] - 1; 1400 s1 = tmp[i]; 1401 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1402 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1403 tmp[i] = s1; 1404 } 1405 1406 /* backward solve the L^T */ 1407 for (i=n-1; i>=0; i--){ 1408 v = aa + diag[i] - 1 ; 1409 vi = aj + diag[i] - 1 ; 1410 nz = diag[i] - ai[i]; 1411 s1 = tmp[i]; 1412 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1413 } 1414 1415 /* copy tmp into x according to permutation */ 1416 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1417 1418 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1419 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1420 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1421 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1422 1423 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1424 PetscFunctionReturn(0); 1425 } 1426 1427 #undef __FUNCT__ 1428 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1429 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1430 { 1431 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1432 IS iscol = a->col,isrow = a->row; 1433 PetscErrorCode ierr; 1434 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1435 PetscInt i,n = A->rmap->n,j; 1436 PetscInt nz; 1437 PetscScalar *x,*tmp,s1; 1438 const MatScalar *aa = a->a,*v; 1439 const PetscScalar *b; 1440 1441 PetscFunctionBegin; 1442 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1443 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1444 tmp = a->solve_work; 1445 1446 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1447 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1448 1449 /* copy the b into temp work space according to permutation */ 1450 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1451 1452 /* forward solve the U^T */ 1453 for (i=0; i<n; i++) { 1454 v = aa + adiag[i+1] + 1; 1455 vi = aj + adiag[i+1] + 1; 1456 nz = adiag[i] - adiag[i+1] - 1; 1457 s1 = tmp[i]; 1458 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1459 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1460 tmp[i] = s1; 1461 } 1462 1463 /* backward solve the L^T */ 1464 for (i=n-1; i>=0; i--){ 1465 v = aa + ai[i]; 1466 vi = aj + ai[i]; 1467 nz = ai[i+1] - ai[i]; 1468 s1 = tmp[i]; 1469 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1470 } 1471 1472 /* copy tmp into x according to permutation */ 1473 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1474 1475 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1476 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1477 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1478 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1479 1480 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1481 PetscFunctionReturn(0); 1482 } 1483 1484 #undef __FUNCT__ 1485 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1486 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1487 { 1488 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1489 IS iscol = a->col,isrow = a->row; 1490 PetscErrorCode ierr; 1491 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1492 PetscInt i,n = A->rmap->n,j; 1493 PetscInt nz; 1494 PetscScalar *x,*tmp,s1; 1495 const MatScalar *aa = a->a,*v; 1496 const PetscScalar *b; 1497 1498 PetscFunctionBegin; 1499 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1500 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1501 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1502 tmp = a->solve_work; 1503 1504 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1505 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1506 1507 /* copy the b into temp work space according to permutation */ 1508 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1509 1510 /* forward solve the U^T */ 1511 for (i=0; i<n; i++) { 1512 v = aa + diag[i] ; 1513 vi = aj + diag[i] + 1; 1514 nz = ai[i+1] - diag[i] - 1; 1515 s1 = tmp[i]; 1516 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1517 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1518 tmp[i] = s1; 1519 } 1520 1521 /* backward solve the L^T */ 1522 for (i=n-1; i>=0; i--){ 1523 v = aa + diag[i] - 1 ; 1524 vi = aj + diag[i] - 1 ; 1525 nz = diag[i] - ai[i]; 1526 s1 = tmp[i]; 1527 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1528 } 1529 1530 /* copy tmp into x according to permutation */ 1531 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1532 1533 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1534 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1535 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1536 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1537 1538 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 #undef __FUNCT__ 1543 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1544 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1545 { 1546 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1547 IS iscol = a->col,isrow = a->row; 1548 PetscErrorCode ierr; 1549 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1550 PetscInt i,n = A->rmap->n,j; 1551 PetscInt nz; 1552 PetscScalar *x,*tmp,s1; 1553 const MatScalar *aa = a->a,*v; 1554 const PetscScalar *b; 1555 1556 PetscFunctionBegin; 1557 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1558 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1559 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1560 tmp = a->solve_work; 1561 1562 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1563 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1564 1565 /* copy the b into temp work space according to permutation */ 1566 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1567 1568 /* forward solve the U^T */ 1569 for (i=0; i<n; i++) { 1570 v = aa + adiag[i+1] + 1; 1571 vi = aj + adiag[i+1] + 1; 1572 nz = adiag[i] - adiag[i+1] - 1; 1573 s1 = tmp[i]; 1574 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1575 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1576 tmp[i] = s1; 1577 } 1578 1579 1580 /* backward solve the L^T */ 1581 for (i=n-1; i>=0; i--){ 1582 v = aa + ai[i] ; 1583 vi = aj + ai[i]; 1584 nz = ai[i+1] - ai[i]; 1585 s1 = tmp[i]; 1586 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1587 } 1588 1589 /* copy tmp into x according to permutation */ 1590 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1591 1592 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1593 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1594 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1595 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1596 1597 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 /* ----------------------------------------------------------------*/ 1602 1603 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool ); 1604 1605 /* 1606 ilu() under revised new data structure. 1607 Factored arrays bj and ba are stored as 1608 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1609 1610 bi=fact->i is an array of size n+1, in which 1611 bi+ 1612 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1613 bi[n]: points to L(n-1,n-1)+1 1614 1615 bdiag=fact->diag is an array of size n+1,in which 1616 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1617 bdiag[n]: points to entry of U(n-1,0)-1 1618 1619 U(i,:) contains bdiag[i] as its last entry, i.e., 1620 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1621 */ 1622 #undef __FUNCT__ 1623 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1624 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1625 { 1626 1627 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1628 PetscErrorCode ierr; 1629 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1630 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1631 PetscBool missing; 1632 IS isicol; 1633 1634 PetscFunctionBegin; 1635 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1636 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1637 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1638 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1639 1640 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1641 b = (Mat_SeqAIJ*)(fact)->data; 1642 1643 /* allocate matrix arrays for new data structure */ 1644 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1645 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1646 b->singlemalloc = PETSC_TRUE; 1647 if (!b->diag){ 1648 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1649 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1650 } 1651 bdiag = b->diag; 1652 1653 if (n > 0) { 1654 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1655 } 1656 1657 /* set bi and bj with new data structure */ 1658 bi = b->i; 1659 bj = b->j; 1660 1661 /* L part */ 1662 bi[0] = 0; 1663 for (i=0; i<n; i++){ 1664 nz = adiag[i] - ai[i]; 1665 bi[i+1] = bi[i] + nz; 1666 aj = a->j + ai[i]; 1667 for (j=0; j<nz; j++){ 1668 /* *bj = aj[j]; bj++; */ 1669 bj[k++] = aj[j]; 1670 } 1671 } 1672 1673 /* U part */ 1674 bdiag[n] = bi[n]-1; 1675 for (i=n-1; i>=0; i--){ 1676 nz = ai[i+1] - adiag[i] - 1; 1677 aj = a->j + adiag[i] + 1; 1678 for (j=0; j<nz; j++){ 1679 /* *bj = aj[j]; bj++; */ 1680 bj[k++] = aj[j]; 1681 } 1682 /* diag[i] */ 1683 /* *bj = i; bj++; */ 1684 bj[k++] = i; 1685 bdiag[i] = bdiag[i+1] + nz + 1; 1686 } 1687 1688 fact->factortype = MAT_FACTOR_ILU; 1689 fact->info.factor_mallocs = 0; 1690 fact->info.fill_ratio_given = info->fill; 1691 fact->info.fill_ratio_needed = 1.0; 1692 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1693 1694 b = (Mat_SeqAIJ*)(fact)->data; 1695 b->row = isrow; 1696 b->col = iscol; 1697 b->icol = isicol; 1698 ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1699 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1700 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1706 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1707 { 1708 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1709 IS isicol; 1710 PetscErrorCode ierr; 1711 const PetscInt *r,*ic; 1712 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1713 PetscInt *bi,*cols,nnz,*cols_lvl; 1714 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1715 PetscInt i,levels,diagonal_fill; 1716 PetscBool col_identity,row_identity; 1717 PetscReal f; 1718 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1719 PetscBT lnkbt; 1720 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1721 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1722 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1723 1724 PetscFunctionBegin; 1725 /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1726 /* 1727 PetscBool olddatastruct=PETSC_FALSE; 1728 ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 1729 if(olddatastruct){ 1730 ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1731 PetscFunctionReturn(0); 1732 } 1733 */ 1734 1735 levels = (PetscInt)info->levels; 1736 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1737 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1738 1739 if (!levels && row_identity && col_identity) { 1740 /* special case: ilu(0) with natural ordering */ 1741 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1742 if (a->inode.size) { 1743 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1744 } 1745 PetscFunctionReturn(0); 1746 } 1747 1748 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1749 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1750 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1751 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1752 1753 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1754 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1755 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1756 bi[0] = bdiag[0] = 0; 1757 1758 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1759 1760 /* create a linked list for storing column indices of the active row */ 1761 nlnk = n + 1; 1762 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1763 1764 /* initial FreeSpace size is f*(ai[n]+1) */ 1765 f = info->fill; 1766 diagonal_fill = (PetscInt)info->diagonal_fill; 1767 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1768 current_space = free_space; 1769 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1770 current_space_lvl = free_space_lvl; 1771 1772 for (i=0; i<n; i++) { 1773 nzi = 0; 1774 /* copy current row into linked list */ 1775 nnz = ai[r[i]+1] - ai[r[i]]; 1776 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1777 cols = aj + ai[r[i]]; 1778 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1779 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1780 nzi += nlnk; 1781 1782 /* make sure diagonal entry is included */ 1783 if (diagonal_fill && lnk[i] == -1) { 1784 fm = n; 1785 while (lnk[fm] < i) fm = lnk[fm]; 1786 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1787 lnk[fm] = i; 1788 lnk_lvl[i] = 0; 1789 nzi++; dcount++; 1790 } 1791 1792 /* add pivot rows into the active row */ 1793 nzbd = 0; 1794 prow = lnk[n]; 1795 while (prow < i) { 1796 nnz = bdiag[prow]; 1797 cols = bj_ptr[prow] + nnz + 1; 1798 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1799 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1800 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1801 nzi += nlnk; 1802 prow = lnk[prow]; 1803 nzbd++; 1804 } 1805 bdiag[i] = nzbd; 1806 bi[i+1] = bi[i] + nzi; 1807 1808 /* if free space is not available, make more free space */ 1809 if (current_space->local_remaining<nzi) { 1810 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1811 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1812 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1813 reallocs++; 1814 } 1815 1816 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1817 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1818 bj_ptr[i] = current_space->array; 1819 bjlvl_ptr[i] = current_space_lvl->array; 1820 1821 /* make sure the active row i has diagonal entry */ 1822 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1823 1824 current_space->array += nzi; 1825 current_space->local_used += nzi; 1826 current_space->local_remaining -= nzi; 1827 current_space_lvl->array += nzi; 1828 current_space_lvl->local_used += nzi; 1829 current_space_lvl->local_remaining -= nzi; 1830 } 1831 1832 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1833 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1834 1835 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1836 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1837 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1838 1839 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1840 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1841 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1842 1843 #if defined(PETSC_USE_INFO) 1844 { 1845 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1846 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1847 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1848 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1849 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1850 if (diagonal_fill) { 1851 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1852 } 1853 } 1854 #endif 1855 1856 /* put together the new matrix */ 1857 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1858 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1859 b = (Mat_SeqAIJ*)(fact)->data; 1860 b->free_a = PETSC_TRUE; 1861 b->free_ij = PETSC_TRUE; 1862 b->singlemalloc = PETSC_FALSE; 1863 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1864 b->j = bj; 1865 b->i = bi; 1866 b->diag = bdiag; 1867 b->ilen = 0; 1868 b->imax = 0; 1869 b->row = isrow; 1870 b->col = iscol; 1871 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1872 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1873 b->icol = isicol; 1874 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1875 /* In b structure: Free imax, ilen, old a, old j. 1876 Allocate bdiag, solve_work, new a, new j */ 1877 ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1878 b->maxnz = b->nz = bdiag[0]+1; 1879 (fact)->info.factor_mallocs = reallocs; 1880 (fact)->info.fill_ratio_given = f; 1881 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1882 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1883 if (a->inode.size) { 1884 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1885 } 1886 PetscFunctionReturn(0); 1887 } 1888 1889 #undef __FUNCT__ 1890 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1891 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1892 { 1893 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1894 IS isicol; 1895 PetscErrorCode ierr; 1896 const PetscInt *r,*ic; 1897 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1898 PetscInt *bi,*cols,nnz,*cols_lvl; 1899 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1900 PetscInt i,levels,diagonal_fill; 1901 PetscBool col_identity,row_identity; 1902 PetscReal f; 1903 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1904 PetscBT lnkbt; 1905 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1906 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1907 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1908 PetscBool missing; 1909 1910 PetscFunctionBegin; 1911 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1912 f = info->fill; 1913 levels = (PetscInt)info->levels; 1914 diagonal_fill = (PetscInt)info->diagonal_fill; 1915 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1916 1917 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1918 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1919 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1920 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1921 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1922 if (a->inode.size) { 1923 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1924 } 1925 fact->factortype = MAT_FACTOR_ILU; 1926 (fact)->info.factor_mallocs = 0; 1927 (fact)->info.fill_ratio_given = info->fill; 1928 (fact)->info.fill_ratio_needed = 1.0; 1929 b = (Mat_SeqAIJ*)(fact)->data; 1930 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1931 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1932 b->row = isrow; 1933 b->col = iscol; 1934 b->icol = isicol; 1935 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1936 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1937 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1938 PetscFunctionReturn(0); 1939 } 1940 1941 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1942 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1943 1944 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1945 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1946 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1947 bi[0] = bdiag[0] = 0; 1948 1949 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1950 1951 /* create a linked list for storing column indices of the active row */ 1952 nlnk = n + 1; 1953 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1954 1955 /* initial FreeSpace size is f*(ai[n]+1) */ 1956 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1957 current_space = free_space; 1958 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1959 current_space_lvl = free_space_lvl; 1960 1961 for (i=0; i<n; i++) { 1962 nzi = 0; 1963 /* copy current row into linked list */ 1964 nnz = ai[r[i]+1] - ai[r[i]]; 1965 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1966 cols = aj + ai[r[i]]; 1967 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1968 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1969 nzi += nlnk; 1970 1971 /* make sure diagonal entry is included */ 1972 if (diagonal_fill && lnk[i] == -1) { 1973 fm = n; 1974 while (lnk[fm] < i) fm = lnk[fm]; 1975 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1976 lnk[fm] = i; 1977 lnk_lvl[i] = 0; 1978 nzi++; dcount++; 1979 } 1980 1981 /* add pivot rows into the active row */ 1982 nzbd = 0; 1983 prow = lnk[n]; 1984 while (prow < i) { 1985 nnz = bdiag[prow]; 1986 cols = bj_ptr[prow] + nnz + 1; 1987 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1988 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1989 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1990 nzi += nlnk; 1991 prow = lnk[prow]; 1992 nzbd++; 1993 } 1994 bdiag[i] = nzbd; 1995 bi[i+1] = bi[i] + nzi; 1996 1997 /* if free space is not available, make more free space */ 1998 if (current_space->local_remaining<nzi) { 1999 nnz = nzi*(n - i); /* estimated and max additional space needed */ 2000 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2001 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2002 reallocs++; 2003 } 2004 2005 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2006 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2007 bj_ptr[i] = current_space->array; 2008 bjlvl_ptr[i] = current_space_lvl->array; 2009 2010 /* make sure the active row i has diagonal entry */ 2011 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2012 2013 current_space->array += nzi; 2014 current_space->local_used += nzi; 2015 current_space->local_remaining -= nzi; 2016 current_space_lvl->array += nzi; 2017 current_space_lvl->local_used += nzi; 2018 current_space_lvl->local_remaining -= nzi; 2019 } 2020 2021 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2022 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2023 2024 /* destroy list of free space and other temporary arrays */ 2025 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2026 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2027 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2028 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2029 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2030 2031 #if defined(PETSC_USE_INFO) 2032 { 2033 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2034 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2035 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2036 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2037 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2038 if (diagonal_fill) { 2039 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2040 } 2041 } 2042 #endif 2043 2044 /* put together the new matrix */ 2045 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2046 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2047 b = (Mat_SeqAIJ*)(fact)->data; 2048 b->free_a = PETSC_TRUE; 2049 b->free_ij = PETSC_TRUE; 2050 b->singlemalloc = PETSC_FALSE; 2051 ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2052 b->j = bj; 2053 b->i = bi; 2054 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2055 b->diag = bdiag; 2056 b->ilen = 0; 2057 b->imax = 0; 2058 b->row = isrow; 2059 b->col = iscol; 2060 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2061 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2062 b->icol = isicol; 2063 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2064 /* In b structure: Free imax, ilen, old a, old j. 2065 Allocate bdiag, solve_work, new a, new j */ 2066 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2067 b->maxnz = b->nz = bi[n] ; 2068 (fact)->info.factor_mallocs = reallocs; 2069 (fact)->info.fill_ratio_given = f; 2070 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2071 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2072 if (a->inode.size) { 2073 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2074 } 2075 PetscFunctionReturn(0); 2076 } 2077 2078 #undef __FUNCT__ 2079 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2080 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2081 { 2082 Mat C = B; 2083 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2084 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2085 IS ip=b->row,iip = b->icol; 2086 PetscErrorCode ierr; 2087 const PetscInt *rip,*riip; 2088 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2089 PetscInt *ai=a->i,*aj=a->j; 2090 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2091 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2092 PetscBool perm_identity; 2093 FactorShiftCtx sctx; 2094 PetscReal rs; 2095 MatScalar d,*v; 2096 2097 PetscFunctionBegin; 2098 /* MatPivotSetUp(): initialize shift context sctx */ 2099 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2100 2101 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2102 sctx.shift_top = info->zeropivot; 2103 for (i=0; i<mbs; i++) { 2104 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2105 d = (aa)[a->diag[i]]; 2106 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2107 v = aa+ai[i]; 2108 nz = ai[i+1] - ai[i]; 2109 for (j=0; j<nz; j++) 2110 rs += PetscAbsScalar(v[j]); 2111 if (rs>sctx.shift_top) sctx.shift_top = rs; 2112 } 2113 sctx.shift_top *= 1.1; 2114 sctx.nshift_max = 5; 2115 sctx.shift_lo = 0.; 2116 sctx.shift_hi = 1.; 2117 } 2118 2119 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2120 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2121 2122 /* allocate working arrays 2123 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2124 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2125 */ 2126 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 2127 2128 do { 2129 sctx.newshift = PETSC_FALSE; 2130 2131 for (i=0; i<mbs; i++) c2r[i] = mbs; 2132 il[0] = 0; 2133 2134 for (k = 0; k<mbs; k++){ 2135 /* zero rtmp */ 2136 nz = bi[k+1] - bi[k]; 2137 bjtmp = bj + bi[k]; 2138 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2139 2140 /* load in initial unfactored row */ 2141 bval = ba + bi[k]; 2142 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2143 for (j = jmin; j < jmax; j++){ 2144 col = riip[aj[j]]; 2145 if (col >= k){ /* only take upper triangular entry */ 2146 rtmp[col] = aa[j]; 2147 *bval++ = 0.0; /* for in-place factorization */ 2148 } 2149 } 2150 /* shift the diagonal of the matrix: ZeropivotApply() */ 2151 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2152 2153 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2154 dk = rtmp[k]; 2155 i = c2r[k]; /* first row to be added to k_th row */ 2156 2157 while (i < k){ 2158 nexti = c2r[i]; /* next row to be added to k_th row */ 2159 2160 /* compute multiplier, update diag(k) and U(i,k) */ 2161 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2162 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2163 dk += uikdi*ba[ili]; /* update diag[k] */ 2164 ba[ili] = uikdi; /* -U(i,k) */ 2165 2166 /* add multiple of row i to k-th row */ 2167 jmin = ili + 1; jmax = bi[i+1]; 2168 if (jmin < jmax){ 2169 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2170 /* update il and c2r for row i */ 2171 il[i] = jmin; 2172 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2173 } 2174 i = nexti; 2175 } 2176 2177 /* copy data into U(k,:) */ 2178 rs = 0.0; 2179 jmin = bi[k]; jmax = bi[k+1]-1; 2180 if (jmin < jmax) { 2181 for (j=jmin; j<jmax; j++){ 2182 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2183 } 2184 /* add the k-th row into il and c2r */ 2185 il[k] = jmin; 2186 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2187 } 2188 2189 /* MatPivotCheck() */ 2190 sctx.rs = rs; 2191 sctx.pv = dk; 2192 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2193 if(sctx.newshift) break; 2194 dk = sctx.pv; 2195 2196 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2197 } 2198 } while (sctx.newshift); 2199 2200 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2201 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2202 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2203 2204 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2205 if (perm_identity){ 2206 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2207 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2208 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2209 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2210 } else { 2211 B->ops->solve = MatSolve_SeqSBAIJ_1; 2212 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2213 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2214 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2215 } 2216 2217 C->assembled = PETSC_TRUE; 2218 C->preallocated = PETSC_TRUE; 2219 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2220 2221 /* MatPivotView() */ 2222 if (sctx.nshift){ 2223 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2224 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2225 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2226 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2227 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 2228 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 2229 } 2230 } 2231 PetscFunctionReturn(0); 2232 } 2233 2234 #undef __FUNCT__ 2235 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2236 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2237 { 2238 Mat C = B; 2239 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2240 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2241 IS ip=b->row,iip = b->icol; 2242 PetscErrorCode ierr; 2243 const PetscInt *rip,*riip; 2244 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2245 PetscInt *ai=a->i,*aj=a->j; 2246 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2247 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2248 PetscBool perm_identity; 2249 FactorShiftCtx sctx; 2250 PetscReal rs; 2251 MatScalar d,*v; 2252 2253 PetscFunctionBegin; 2254 /* MatPivotSetUp(): initialize shift context sctx */ 2255 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2256 2257 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2258 sctx.shift_top = info->zeropivot; 2259 for (i=0; i<mbs; i++) { 2260 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2261 d = (aa)[a->diag[i]]; 2262 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2263 v = aa+ai[i]; 2264 nz = ai[i+1] - ai[i]; 2265 for (j=0; j<nz; j++) 2266 rs += PetscAbsScalar(v[j]); 2267 if (rs>sctx.shift_top) sctx.shift_top = rs; 2268 } 2269 sctx.shift_top *= 1.1; 2270 sctx.nshift_max = 5; 2271 sctx.shift_lo = 0.; 2272 sctx.shift_hi = 1.; 2273 } 2274 2275 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2276 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2277 2278 /* initialization */ 2279 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2280 2281 do { 2282 sctx.newshift = PETSC_FALSE; 2283 2284 for (i=0; i<mbs; i++) jl[i] = mbs; 2285 il[0] = 0; 2286 2287 for (k = 0; k<mbs; k++){ 2288 /* zero rtmp */ 2289 nz = bi[k+1] - bi[k]; 2290 bjtmp = bj + bi[k]; 2291 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2292 2293 bval = ba + bi[k]; 2294 /* initialize k-th row by the perm[k]-th row of A */ 2295 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2296 for (j = jmin; j < jmax; j++){ 2297 col = riip[aj[j]]; 2298 if (col >= k){ /* only take upper triangular entry */ 2299 rtmp[col] = aa[j]; 2300 *bval++ = 0.0; /* for in-place factorization */ 2301 } 2302 } 2303 /* shift the diagonal of the matrix */ 2304 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2305 2306 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2307 dk = rtmp[k]; 2308 i = jl[k]; /* first row to be added to k_th row */ 2309 2310 while (i < k){ 2311 nexti = jl[i]; /* next row to be added to k_th row */ 2312 2313 /* compute multiplier, update diag(k) and U(i,k) */ 2314 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2315 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2316 dk += uikdi*ba[ili]; 2317 ba[ili] = uikdi; /* -U(i,k) */ 2318 2319 /* add multiple of row i to k-th row */ 2320 jmin = ili + 1; jmax = bi[i+1]; 2321 if (jmin < jmax){ 2322 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2323 /* update il and jl for row i */ 2324 il[i] = jmin; 2325 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2326 } 2327 i = nexti; 2328 } 2329 2330 /* shift the diagonals when zero pivot is detected */ 2331 /* compute rs=sum of abs(off-diagonal) */ 2332 rs = 0.0; 2333 jmin = bi[k]+1; 2334 nz = bi[k+1] - jmin; 2335 bcol = bj + jmin; 2336 for (j=0; j<nz; j++) { 2337 rs += PetscAbsScalar(rtmp[bcol[j]]); 2338 } 2339 2340 sctx.rs = rs; 2341 sctx.pv = dk; 2342 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2343 if (sctx.newshift) break; 2344 dk = sctx.pv; 2345 2346 /* copy data into U(k,:) */ 2347 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2348 jmin = bi[k]+1; jmax = bi[k+1]; 2349 if (jmin < jmax) { 2350 for (j=jmin; j<jmax; j++){ 2351 col = bj[j]; ba[j] = rtmp[col]; 2352 } 2353 /* add the k-th row into il and jl */ 2354 il[k] = jmin; 2355 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2356 } 2357 } 2358 } while (sctx.newshift); 2359 2360 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2361 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2362 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2363 2364 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2365 if (perm_identity){ 2366 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2367 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2368 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2369 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2370 } else { 2371 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2372 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2373 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2374 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2375 } 2376 2377 C->assembled = PETSC_TRUE; 2378 C->preallocated = PETSC_TRUE; 2379 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2380 if (sctx.nshift){ 2381 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2382 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2383 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2384 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2385 } 2386 } 2387 PetscFunctionReturn(0); 2388 } 2389 2390 /* 2391 icc() under revised new data structure. 2392 Factored arrays bj and ba are stored as 2393 U(0,:),...,U(i,:),U(n-1,:) 2394 2395 ui=fact->i is an array of size n+1, in which 2396 ui+ 2397 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2398 ui[n]: points to U(n-1,n-1)+1 2399 2400 udiag=fact->diag is an array of size n,in which 2401 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2402 2403 U(i,:) contains udiag[i] as its last entry, i.e., 2404 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2405 */ 2406 2407 #undef __FUNCT__ 2408 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2409 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2410 { 2411 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2412 Mat_SeqSBAIJ *b; 2413 PetscErrorCode ierr; 2414 PetscBool perm_identity,missing; 2415 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2416 const PetscInt *rip,*riip; 2417 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2418 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2419 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2420 PetscReal fill=info->fill,levels=info->levels; 2421 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2422 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2423 PetscBT lnkbt; 2424 IS iperm; 2425 2426 PetscFunctionBegin; 2427 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2428 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2429 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2430 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2431 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2432 2433 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2434 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2435 ui[0] = 0; 2436 2437 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2438 if (!levels && perm_identity) { 2439 for (i=0; i<am; i++) { 2440 ncols = ai[i+1] - a->diag[i]; 2441 ui[i+1] = ui[i] + ncols; 2442 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2443 } 2444 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2445 cols = uj; 2446 for (i=0; i<am; i++) { 2447 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2448 ncols = ai[i+1] - a->diag[i] -1; 2449 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2450 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2451 } 2452 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2453 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2454 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2455 2456 /* initialization */ 2457 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2458 2459 /* jl: linked list for storing indices of the pivot rows 2460 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2461 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2462 for (i=0; i<am; i++){ 2463 jl[i] = am; il[i] = 0; 2464 } 2465 2466 /* create and initialize a linked list for storing column indices of the active row k */ 2467 nlnk = am + 1; 2468 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2469 2470 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2471 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2472 current_space = free_space; 2473 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2474 current_space_lvl = free_space_lvl; 2475 2476 for (k=0; k<am; k++){ /* for each active row k */ 2477 /* initialize lnk by the column indices of row rip[k] of A */ 2478 nzk = 0; 2479 ncols = ai[rip[k]+1] - ai[rip[k]]; 2480 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2481 ncols_upper = 0; 2482 for (j=0; j<ncols; j++){ 2483 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2484 if (riip[i] >= k){ /* only take upper triangular entry */ 2485 ajtmp[ncols_upper] = i; 2486 ncols_upper++; 2487 } 2488 } 2489 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2490 nzk += nlnk; 2491 2492 /* update lnk by computing fill-in for each pivot row to be merged in */ 2493 prow = jl[k]; /* 1st pivot row */ 2494 2495 while (prow < k){ 2496 nextprow = jl[prow]; 2497 2498 /* merge prow into k-th row */ 2499 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2500 jmax = ui[prow+1]; 2501 ncols = jmax-jmin; 2502 i = jmin - ui[prow]; 2503 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2504 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2505 j = *(uj - 1); 2506 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2507 nzk += nlnk; 2508 2509 /* update il and jl for prow */ 2510 if (jmin < jmax){ 2511 il[prow] = jmin; 2512 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2513 } 2514 prow = nextprow; 2515 } 2516 2517 /* if free space is not available, make more free space */ 2518 if (current_space->local_remaining<nzk) { 2519 i = am - k + 1; /* num of unfactored rows */ 2520 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2521 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2522 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2523 reallocs++; 2524 } 2525 2526 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2527 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2528 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2529 2530 /* add the k-th row into il and jl */ 2531 if (nzk > 1){ 2532 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2533 jl[k] = jl[i]; jl[i] = k; 2534 il[k] = ui[k] + 1; 2535 } 2536 uj_ptr[k] = current_space->array; 2537 uj_lvl_ptr[k] = current_space_lvl->array; 2538 2539 current_space->array += nzk; 2540 current_space->local_used += nzk; 2541 current_space->local_remaining -= nzk; 2542 2543 current_space_lvl->array += nzk; 2544 current_space_lvl->local_used += nzk; 2545 current_space_lvl->local_remaining -= nzk; 2546 2547 ui[k+1] = ui[k] + nzk; 2548 } 2549 2550 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2551 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2552 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2553 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2554 2555 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2556 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2557 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2558 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2559 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2560 2561 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2562 2563 /* put together the new matrix in MATSEQSBAIJ format */ 2564 b = (Mat_SeqSBAIJ*)(fact)->data; 2565 b->singlemalloc = PETSC_FALSE; 2566 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2567 b->j = uj; 2568 b->i = ui; 2569 b->diag = udiag; 2570 b->free_diag = PETSC_TRUE; 2571 b->ilen = 0; 2572 b->imax = 0; 2573 b->row = perm; 2574 b->col = perm; 2575 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2576 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2577 b->icol = iperm; 2578 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2579 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2580 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2581 b->maxnz = b->nz = ui[am]; 2582 b->free_a = PETSC_TRUE; 2583 b->free_ij = PETSC_TRUE; 2584 2585 fact->info.factor_mallocs = reallocs; 2586 fact->info.fill_ratio_given = fill; 2587 if (ai[am] != 0) { 2588 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2589 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2590 } else { 2591 fact->info.fill_ratio_needed = 0.0; 2592 } 2593 #if defined(PETSC_USE_INFO) 2594 if (ai[am] != 0) { 2595 PetscReal af = fact->info.fill_ratio_needed; 2596 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2597 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2598 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2599 } else { 2600 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2601 } 2602 #endif 2603 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2604 PetscFunctionReturn(0); 2605 } 2606 2607 #undef __FUNCT__ 2608 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2609 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2610 { 2611 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2612 Mat_SeqSBAIJ *b; 2613 PetscErrorCode ierr; 2614 PetscBool perm_identity,missing; 2615 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2616 const PetscInt *rip,*riip; 2617 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2618 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2619 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2620 PetscReal fill=info->fill,levels=info->levels; 2621 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2622 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2623 PetscBT lnkbt; 2624 IS iperm; 2625 2626 PetscFunctionBegin; 2627 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2628 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2629 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2630 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2631 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2632 2633 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2634 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2635 ui[0] = 0; 2636 2637 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2638 if (!levels && perm_identity) { 2639 2640 for (i=0; i<am; i++) { 2641 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2642 udiag[i] = ui[i]; 2643 } 2644 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2645 cols = uj; 2646 for (i=0; i<am; i++) { 2647 aj = a->j + a->diag[i]; 2648 ncols = ui[i+1] - ui[i]; 2649 for (j=0; j<ncols; j++) *cols++ = *aj++; 2650 } 2651 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2652 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2653 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2654 2655 /* initialization */ 2656 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2657 2658 /* jl: linked list for storing indices of the pivot rows 2659 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2660 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2661 for (i=0; i<am; i++){ 2662 jl[i] = am; il[i] = 0; 2663 } 2664 2665 /* create and initialize a linked list for storing column indices of the active row k */ 2666 nlnk = am + 1; 2667 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2668 2669 /* initial FreeSpace size is fill*(ai[am]+1) */ 2670 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2671 current_space = free_space; 2672 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2673 current_space_lvl = free_space_lvl; 2674 2675 for (k=0; k<am; k++){ /* for each active row k */ 2676 /* initialize lnk by the column indices of row rip[k] of A */ 2677 nzk = 0; 2678 ncols = ai[rip[k]+1] - ai[rip[k]]; 2679 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2680 ncols_upper = 0; 2681 for (j=0; j<ncols; j++){ 2682 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2683 if (riip[i] >= k){ /* only take upper triangular entry */ 2684 ajtmp[ncols_upper] = i; 2685 ncols_upper++; 2686 } 2687 } 2688 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2689 nzk += nlnk; 2690 2691 /* update lnk by computing fill-in for each pivot row to be merged in */ 2692 prow = jl[k]; /* 1st pivot row */ 2693 2694 while (prow < k){ 2695 nextprow = jl[prow]; 2696 2697 /* merge prow into k-th row */ 2698 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2699 jmax = ui[prow+1]; 2700 ncols = jmax-jmin; 2701 i = jmin - ui[prow]; 2702 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2703 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2704 j = *(uj - 1); 2705 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2706 nzk += nlnk; 2707 2708 /* update il and jl for prow */ 2709 if (jmin < jmax){ 2710 il[prow] = jmin; 2711 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2712 } 2713 prow = nextprow; 2714 } 2715 2716 /* if free space is not available, make more free space */ 2717 if (current_space->local_remaining<nzk) { 2718 i = am - k + 1; /* num of unfactored rows */ 2719 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2720 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2721 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2722 reallocs++; 2723 } 2724 2725 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2726 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2727 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2728 2729 /* add the k-th row into il and jl */ 2730 if (nzk > 1){ 2731 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2732 jl[k] = jl[i]; jl[i] = k; 2733 il[k] = ui[k] + 1; 2734 } 2735 uj_ptr[k] = current_space->array; 2736 uj_lvl_ptr[k] = current_space_lvl->array; 2737 2738 current_space->array += nzk; 2739 current_space->local_used += nzk; 2740 current_space->local_remaining -= nzk; 2741 2742 current_space_lvl->array += nzk; 2743 current_space_lvl->local_used += nzk; 2744 current_space_lvl->local_remaining -= nzk; 2745 2746 ui[k+1] = ui[k] + nzk; 2747 } 2748 2749 #if defined(PETSC_USE_INFO) 2750 if (ai[am] != 0) { 2751 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2752 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2753 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2754 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2755 } else { 2756 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2757 } 2758 #endif 2759 2760 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2761 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2762 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2763 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2764 2765 /* destroy list of free space and other temporary array(s) */ 2766 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2767 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2768 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2769 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2770 2771 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2772 2773 /* put together the new matrix in MATSEQSBAIJ format */ 2774 2775 b = (Mat_SeqSBAIJ*)fact->data; 2776 b->singlemalloc = PETSC_FALSE; 2777 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2778 b->j = uj; 2779 b->i = ui; 2780 b->diag = udiag; 2781 b->free_diag = PETSC_TRUE; 2782 b->ilen = 0; 2783 b->imax = 0; 2784 b->row = perm; 2785 b->col = perm; 2786 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2787 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2788 b->icol = iperm; 2789 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2790 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2791 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2792 b->maxnz = b->nz = ui[am]; 2793 b->free_a = PETSC_TRUE; 2794 b->free_ij = PETSC_TRUE; 2795 2796 fact->info.factor_mallocs = reallocs; 2797 fact->info.fill_ratio_given = fill; 2798 if (ai[am] != 0) { 2799 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2800 } else { 2801 fact->info.fill_ratio_needed = 0.0; 2802 } 2803 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2804 PetscFunctionReturn(0); 2805 } 2806 2807 #undef __FUNCT__ 2808 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2809 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2810 { 2811 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2812 Mat_SeqSBAIJ *b; 2813 PetscErrorCode ierr; 2814 PetscBool perm_identity; 2815 PetscReal fill = info->fill; 2816 const PetscInt *rip,*riip; 2817 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2818 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2819 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2820 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2821 PetscBT lnkbt; 2822 IS iperm; 2823 2824 PetscFunctionBegin; 2825 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2826 /* check whether perm is the identity mapping */ 2827 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2828 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2829 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2830 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2831 2832 /* initialization */ 2833 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2834 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2835 ui[0] = 0; 2836 2837 /* jl: linked list for storing indices of the pivot rows 2838 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2839 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2840 for (i=0; i<am; i++){ 2841 jl[i] = am; il[i] = 0; 2842 } 2843 2844 /* create and initialize a linked list for storing column indices of the active row k */ 2845 nlnk = am + 1; 2846 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2847 2848 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2849 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2850 current_space = free_space; 2851 2852 for (k=0; k<am; k++){ /* for each active row k */ 2853 /* initialize lnk by the column indices of row rip[k] of A */ 2854 nzk = 0; 2855 ncols = ai[rip[k]+1] - ai[rip[k]]; 2856 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2857 ncols_upper = 0; 2858 for (j=0; j<ncols; j++){ 2859 i = riip[*(aj + ai[rip[k]] + j)]; 2860 if (i >= k){ /* only take upper triangular entry */ 2861 cols[ncols_upper] = i; 2862 ncols_upper++; 2863 } 2864 } 2865 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2866 nzk += nlnk; 2867 2868 /* update lnk by computing fill-in for each pivot row to be merged in */ 2869 prow = jl[k]; /* 1st pivot row */ 2870 2871 while (prow < k){ 2872 nextprow = jl[prow]; 2873 /* merge prow into k-th row */ 2874 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2875 jmax = ui[prow+1]; 2876 ncols = jmax-jmin; 2877 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2878 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2879 nzk += nlnk; 2880 2881 /* update il and jl for prow */ 2882 if (jmin < jmax){ 2883 il[prow] = jmin; 2884 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2885 } 2886 prow = nextprow; 2887 } 2888 2889 /* if free space is not available, make more free space */ 2890 if (current_space->local_remaining<nzk) { 2891 i = am - k + 1; /* num of unfactored rows */ 2892 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2893 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2894 reallocs++; 2895 } 2896 2897 /* copy data into free space, then initialize lnk */ 2898 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2899 2900 /* add the k-th row into il and jl */ 2901 if (nzk > 1){ 2902 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2903 jl[k] = jl[i]; jl[i] = k; 2904 il[k] = ui[k] + 1; 2905 } 2906 ui_ptr[k] = current_space->array; 2907 current_space->array += nzk; 2908 current_space->local_used += nzk; 2909 current_space->local_remaining -= nzk; 2910 2911 ui[k+1] = ui[k] + nzk; 2912 } 2913 2914 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2915 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2916 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2917 2918 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2919 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2920 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2921 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2922 2923 /* put together the new matrix in MATSEQSBAIJ format */ 2924 2925 b = (Mat_SeqSBAIJ*)fact->data; 2926 b->singlemalloc = PETSC_FALSE; 2927 b->free_a = PETSC_TRUE; 2928 b->free_ij = PETSC_TRUE; 2929 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2930 b->j = uj; 2931 b->i = ui; 2932 b->diag = udiag; 2933 b->free_diag = PETSC_TRUE; 2934 b->ilen = 0; 2935 b->imax = 0; 2936 b->row = perm; 2937 b->col = perm; 2938 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2939 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2940 b->icol = iperm; 2941 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2942 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2943 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2944 b->maxnz = b->nz = ui[am]; 2945 2946 fact->info.factor_mallocs = reallocs; 2947 fact->info.fill_ratio_given = fill; 2948 if (ai[am] != 0) { 2949 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2950 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2951 } else { 2952 fact->info.fill_ratio_needed = 0.0; 2953 } 2954 #if defined(PETSC_USE_INFO) 2955 if (ai[am] != 0) { 2956 PetscReal af = fact->info.fill_ratio_needed; 2957 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2958 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2959 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2960 } else { 2961 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2962 } 2963 #endif 2964 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2965 PetscFunctionReturn(0); 2966 } 2967 2968 #undef __FUNCT__ 2969 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2970 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2971 { 2972 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2973 Mat_SeqSBAIJ *b; 2974 PetscErrorCode ierr; 2975 PetscBool perm_identity; 2976 PetscReal fill = info->fill; 2977 const PetscInt *rip,*riip; 2978 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2979 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2980 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2981 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2982 PetscBT lnkbt; 2983 IS iperm; 2984 2985 PetscFunctionBegin; 2986 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2987 /* check whether perm is the identity mapping */ 2988 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2989 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2990 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2991 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2992 2993 /* initialization */ 2994 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2995 ui[0] = 0; 2996 2997 /* jl: linked list for storing indices of the pivot rows 2998 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2999 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 3000 for (i=0; i<am; i++){ 3001 jl[i] = am; il[i] = 0; 3002 } 3003 3004 /* create and initialize a linked list for storing column indices of the active row k */ 3005 nlnk = am + 1; 3006 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3007 3008 /* initial FreeSpace size is fill*(ai[am]+1) */ 3009 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3010 current_space = free_space; 3011 3012 for (k=0; k<am; k++){ /* for each active row k */ 3013 /* initialize lnk by the column indices of row rip[k] of A */ 3014 nzk = 0; 3015 ncols = ai[rip[k]+1] - ai[rip[k]]; 3016 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3017 ncols_upper = 0; 3018 for (j=0; j<ncols; j++){ 3019 i = riip[*(aj + ai[rip[k]] + j)]; 3020 if (i >= k){ /* only take upper triangular entry */ 3021 cols[ncols_upper] = i; 3022 ncols_upper++; 3023 } 3024 } 3025 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3026 nzk += nlnk; 3027 3028 /* update lnk by computing fill-in for each pivot row to be merged in */ 3029 prow = jl[k]; /* 1st pivot row */ 3030 3031 while (prow < k){ 3032 nextprow = jl[prow]; 3033 /* merge prow into k-th row */ 3034 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3035 jmax = ui[prow+1]; 3036 ncols = jmax-jmin; 3037 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3038 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3039 nzk += nlnk; 3040 3041 /* update il and jl for prow */ 3042 if (jmin < jmax){ 3043 il[prow] = jmin; 3044 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3045 } 3046 prow = nextprow; 3047 } 3048 3049 /* if free space is not available, make more free space */ 3050 if (current_space->local_remaining<nzk) { 3051 i = am - k + 1; /* num of unfactored rows */ 3052 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3053 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3054 reallocs++; 3055 } 3056 3057 /* copy data into free space, then initialize lnk */ 3058 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3059 3060 /* add the k-th row into il and jl */ 3061 if (nzk-1 > 0){ 3062 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3063 jl[k] = jl[i]; jl[i] = k; 3064 il[k] = ui[k] + 1; 3065 } 3066 ui_ptr[k] = current_space->array; 3067 current_space->array += nzk; 3068 current_space->local_used += nzk; 3069 current_space->local_remaining -= nzk; 3070 3071 ui[k+1] = ui[k] + nzk; 3072 } 3073 3074 #if defined(PETSC_USE_INFO) 3075 if (ai[am] != 0) { 3076 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3077 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3078 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3079 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3080 } else { 3081 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3082 } 3083 #endif 3084 3085 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3086 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3087 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3088 3089 /* destroy list of free space and other temporary array(s) */ 3090 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3091 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3092 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3093 3094 /* put together the new matrix in MATSEQSBAIJ format */ 3095 3096 b = (Mat_SeqSBAIJ*)fact->data; 3097 b->singlemalloc = PETSC_FALSE; 3098 b->free_a = PETSC_TRUE; 3099 b->free_ij = PETSC_TRUE; 3100 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3101 b->j = uj; 3102 b->i = ui; 3103 b->diag = 0; 3104 b->ilen = 0; 3105 b->imax = 0; 3106 b->row = perm; 3107 b->col = perm; 3108 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3109 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3110 b->icol = iperm; 3111 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3112 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3113 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3114 b->maxnz = b->nz = ui[am]; 3115 3116 fact->info.factor_mallocs = reallocs; 3117 fact->info.fill_ratio_given = fill; 3118 if (ai[am] != 0) { 3119 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3120 } else { 3121 fact->info.fill_ratio_needed = 0.0; 3122 } 3123 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3124 PetscFunctionReturn(0); 3125 } 3126 3127 #undef __FUNCT__ 3128 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3129 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3130 { 3131 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3132 PetscErrorCode ierr; 3133 PetscInt n = A->rmap->n; 3134 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3135 PetscScalar *x,sum; 3136 const PetscScalar *b; 3137 const MatScalar *aa = a->a,*v; 3138 PetscInt i,nz; 3139 3140 PetscFunctionBegin; 3141 if (!n) PetscFunctionReturn(0); 3142 3143 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3144 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3145 3146 /* forward solve the lower triangular */ 3147 x[0] = b[0]; 3148 v = aa; 3149 vi = aj; 3150 for (i=1; i<n; i++) { 3151 nz = ai[i+1] - ai[i]; 3152 sum = b[i]; 3153 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3154 v += nz; 3155 vi += nz; 3156 x[i] = sum; 3157 } 3158 3159 /* backward solve the upper triangular */ 3160 for (i=n-1; i>=0; i--){ 3161 v = aa + adiag[i+1] + 1; 3162 vi = aj + adiag[i+1] + 1; 3163 nz = adiag[i] - adiag[i+1]-1; 3164 sum = x[i]; 3165 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3166 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3167 } 3168 3169 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3170 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3171 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3172 PetscFunctionReturn(0); 3173 } 3174 3175 #undef __FUNCT__ 3176 #define __FUNCT__ "MatSolve_SeqAIJ" 3177 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3178 { 3179 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3180 IS iscol = a->col,isrow = a->row; 3181 PetscErrorCode ierr; 3182 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3183 const PetscInt *rout,*cout,*r,*c; 3184 PetscScalar *x,*tmp,sum; 3185 const PetscScalar *b; 3186 const MatScalar *aa = a->a,*v; 3187 3188 PetscFunctionBegin; 3189 if (!n) PetscFunctionReturn(0); 3190 3191 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3193 tmp = a->solve_work; 3194 3195 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3196 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3197 3198 /* forward solve the lower triangular */ 3199 tmp[0] = b[r[0]]; 3200 v = aa; 3201 vi = aj; 3202 for (i=1; i<n; i++) { 3203 nz = ai[i+1] - ai[i]; 3204 sum = b[r[i]]; 3205 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3206 tmp[i] = sum; 3207 v += nz; vi += nz; 3208 } 3209 3210 /* backward solve the upper triangular */ 3211 for (i=n-1; i>=0; i--){ 3212 v = aa + adiag[i+1]+1; 3213 vi = aj + adiag[i+1]+1; 3214 nz = adiag[i]-adiag[i+1]-1; 3215 sum = tmp[i]; 3216 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3217 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3218 } 3219 3220 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3221 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3222 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3223 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3224 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3225 PetscFunctionReturn(0); 3226 } 3227 3228 #undef __FUNCT__ 3229 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3230 /* 3231 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3232 */ 3233 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3234 { 3235 Mat B = *fact; 3236 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3237 IS isicol; 3238 PetscErrorCode ierr; 3239 const PetscInt *r,*ic; 3240 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3241 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3242 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3243 PetscInt nlnk,*lnk; 3244 PetscBT lnkbt; 3245 PetscBool row_identity,icol_identity; 3246 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3247 const PetscInt *ics; 3248 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3249 PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount; 3250 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3251 PetscBool missing; 3252 3253 PetscFunctionBegin; 3254 3255 if (dt == PETSC_DEFAULT) dt = 0.005; 3256 if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */ 3257 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3258 3259 /* ------- symbolic factorization, can be reused ---------*/ 3260 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3261 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3262 adiag=a->diag; 3263 3264 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3265 3266 /* bdiag is location of diagonal in factor */ 3267 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3268 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3269 3270 /* allocate row pointers bi */ 3271 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3272 3273 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3274 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3275 nnz_max = ai[n]+2*n*dtcount+2; 3276 3277 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3278 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3279 3280 /* put together the new matrix */ 3281 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3282 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3283 b = (Mat_SeqAIJ*)B->data; 3284 b->free_a = PETSC_TRUE; 3285 b->free_ij = PETSC_TRUE; 3286 b->singlemalloc = PETSC_FALSE; 3287 b->a = ba; 3288 b->j = bj; 3289 b->i = bi; 3290 b->diag = bdiag; 3291 b->ilen = 0; 3292 b->imax = 0; 3293 b->row = isrow; 3294 b->col = iscol; 3295 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3296 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3297 b->icol = isicol; 3298 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3299 3300 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3301 b->maxnz = nnz_max; 3302 3303 B->factortype = MAT_FACTOR_ILUDT; 3304 B->info.factor_mallocs = 0; 3305 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3306 CHKMEMQ; 3307 /* ------- end of symbolic factorization ---------*/ 3308 3309 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3310 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3311 ics = ic; 3312 3313 /* linked list for storing column indices of the active row */ 3314 nlnk = n + 1; 3315 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3316 3317 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3318 ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 3319 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3320 ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 3321 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3322 3323 bi[0] = 0; 3324 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3325 bdiag_rev[n] = bdiag[0]; 3326 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3327 for (i=0; i<n; i++) { 3328 /* copy initial fill into linked list */ 3329 nzi = 0; /* nonzeros for active row i */ 3330 nzi = ai[r[i]+1] - ai[r[i]]; 3331 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3332 nzi_al = adiag[r[i]] - ai[r[i]]; 3333 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3334 ajtmp = aj + ai[r[i]]; 3335 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3336 3337 /* load in initial (unfactored row) */ 3338 aatmp = a->a + ai[r[i]]; 3339 for (j=0; j<nzi; j++) { 3340 rtmp[ics[*ajtmp++]] = *aatmp++; 3341 } 3342 3343 /* add pivot rows into linked list */ 3344 row = lnk[n]; 3345 while (row < i ) { 3346 nzi_bl = bi[row+1] - bi[row] + 1; 3347 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3348 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3349 nzi += nlnk; 3350 row = lnk[row]; 3351 } 3352 3353 /* copy data from lnk into jtmp, then initialize lnk */ 3354 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3355 3356 /* numerical factorization */ 3357 bjtmp = jtmp; 3358 row = *bjtmp++; /* 1st pivot row */ 3359 while ( row < i ) { 3360 pc = rtmp + row; 3361 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3362 multiplier = (*pc) * (*pv); 3363 *pc = multiplier; 3364 if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 3365 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3366 pv = ba + bdiag[row+1] + 1; 3367 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3368 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3369 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3370 ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); 3371 } 3372 row = *bjtmp++; 3373 } 3374 3375 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3376 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3377 nzi_bl = 0; j = 0; 3378 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 3379 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3380 nzi_bl++; j++; 3381 } 3382 nzi_bu = nzi - nzi_bl -1; 3383 while (j < nzi){ 3384 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3385 j++; 3386 } 3387 3388 bjtmp = bj + bi[i]; 3389 batmp = ba + bi[i]; 3390 /* apply level dropping rule to L part */ 3391 ncut = nzi_al + dtcount; 3392 if (ncut < nzi_bl){ 3393 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3394 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3395 } else { 3396 ncut = nzi_bl; 3397 } 3398 for (j=0; j<ncut; j++){ 3399 bjtmp[j] = jtmp[j]; 3400 batmp[j] = vtmp[j]; 3401 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3402 } 3403 bi[i+1] = bi[i] + ncut; 3404 nzi = ncut + 1; 3405 3406 /* apply level dropping rule to U part */ 3407 ncut = nzi_au + dtcount; 3408 if (ncut < nzi_bu){ 3409 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3410 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3411 } else { 3412 ncut = nzi_bu; 3413 } 3414 nzi += ncut; 3415 3416 /* mark bdiagonal */ 3417 bdiag[i+1] = bdiag[i] - (ncut + 1); 3418 bdiag_rev[n-i-1] = bdiag[i+1]; 3419 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3420 bjtmp = bj + bdiag[i]; 3421 batmp = ba + bdiag[i]; 3422 *bjtmp = i; 3423 *batmp = diag_tmp; /* rtmp[i]; */ 3424 if (*batmp == 0.0) { 3425 *batmp = dt+shift; 3426 /* printf(" row %d add shift %g\n",i,shift); */ 3427 } 3428 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3429 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3430 3431 bjtmp = bj + bdiag[i+1]+1; 3432 batmp = ba + bdiag[i+1]+1; 3433 for (k=0; k<ncut; k++){ 3434 bjtmp[k] = jtmp[nzi_bl+1+k]; 3435 batmp[k] = vtmp[nzi_bl+1+k]; 3436 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3437 } 3438 /* printf("\n"); */ 3439 3440 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3441 /* 3442 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3443 printf(" ----------------------------\n"); 3444 */ 3445 } /* for (i=0; i<n; i++) */ 3446 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3447 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3448 3449 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3450 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3451 3452 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3453 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3454 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3455 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3456 3457 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3458 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3459 3460 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3461 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3462 if (row_identity && icol_identity) { 3463 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3464 } else { 3465 B->ops->solve = MatSolve_SeqAIJ; 3466 } 3467 3468 B->ops->solveadd = 0; 3469 B->ops->solvetranspose = 0; 3470 B->ops->solvetransposeadd = 0; 3471 B->ops->matsolve = 0; 3472 B->assembled = PETSC_TRUE; 3473 B->preallocated = PETSC_TRUE; 3474 PetscFunctionReturn(0); 3475 } 3476 3477 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3478 #undef __FUNCT__ 3479 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3480 /* 3481 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3482 */ 3483 3484 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3485 { 3486 PetscErrorCode ierr; 3487 3488 PetscFunctionBegin; 3489 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3490 PetscFunctionReturn(0); 3491 } 3492 3493 /* 3494 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3495 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3496 */ 3497 #undef __FUNCT__ 3498 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3499 /* 3500 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3501 */ 3502 3503 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3504 { 3505 Mat C=fact; 3506 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 3507 IS isrow = b->row,isicol = b->icol; 3508 PetscErrorCode ierr; 3509 const PetscInt *r,*ic,*ics; 3510 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3511 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3512 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3513 PetscReal dt=info->dt,shift=info->shiftamount; 3514 PetscBool row_identity, col_identity; 3515 3516 PetscFunctionBegin; 3517 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3518 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3519 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3520 ics = ic; 3521 3522 for (i=0; i<n; i++){ 3523 /* initialize rtmp array */ 3524 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3525 bjtmp = bj + bi[i]; 3526 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3527 rtmp[i] = 0.0; 3528 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3529 bjtmp = bj + bdiag[i+1] + 1; 3530 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3531 3532 /* load in initial unfactored row of A */ 3533 /* printf("row %d\n",i); */ 3534 nz = ai[r[i]+1] - ai[r[i]]; 3535 ajtmp = aj + ai[r[i]]; 3536 v = aa + ai[r[i]]; 3537 for (j=0; j<nz; j++) { 3538 rtmp[ics[*ajtmp++]] = v[j]; 3539 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3540 } 3541 /* printf("\n"); */ 3542 3543 /* numerical factorization */ 3544 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3545 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3546 k = 0; 3547 while (k < nzl){ 3548 row = *bjtmp++; 3549 /* printf(" prow %d\n",row); */ 3550 pc = rtmp + row; 3551 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3552 multiplier = (*pc) * (*pv); 3553 *pc = multiplier; 3554 if (PetscAbsScalar(multiplier) > dt){ 3555 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3556 pv = b->a + bdiag[row+1] + 1; 3557 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3558 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3559 /* ierr = PetscLogFlops(2.0*nz);CHKERRQ(ierr); */ 3560 } 3561 k++; 3562 } 3563 3564 /* finished row so stick it into b->a */ 3565 /* L-part */ 3566 pv = b->a + bi[i] ; 3567 pj = bj + bi[i] ; 3568 nzl = bi[i+1] - bi[i]; 3569 for (j=0; j<nzl; j++) { 3570 pv[j] = rtmp[pj[j]]; 3571 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3572 } 3573 3574 /* diagonal: invert diagonal entries for simplier triangular solves */ 3575 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3576 b->a[bdiag[i]] = 1.0/rtmp[i]; 3577 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3578 3579 /* U-part */ 3580 pv = b->a + bdiag[i+1] + 1; 3581 pj = bj + bdiag[i+1] + 1; 3582 nzu = bdiag[i] - bdiag[i+1] - 1; 3583 for (j=0; j<nzu; j++) { 3584 pv[j] = rtmp[pj[j]]; 3585 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3586 } 3587 /* printf("\n"); */ 3588 } 3589 3590 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3591 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3592 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3593 3594 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3595 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3596 if (row_identity && col_identity) { 3597 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3598 } else { 3599 C->ops->solve = MatSolve_SeqAIJ; 3600 } 3601 C->ops->solveadd = 0; 3602 C->ops->solvetranspose = 0; 3603 C->ops->solvetransposeadd = 0; 3604 C->ops->matsolve = 0; 3605 C->assembled = PETSC_TRUE; 3606 C->preallocated = PETSC_TRUE; 3607 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3608 PetscFunctionReturn(0); 3609 } 3610