1 2 #include <../src/mat/impls/aij/seq/aij.h> 3 #include <../src/mat/impls/sbaij/seq/sbaij.h> 4 #include <petscbt.h> 5 #include <../src/mat/utils/freespace.h> 6 7 EXTERN_C_BEGIN 8 #undef __FUNCT__ 9 #define __FUNCT__ "MatGetOrdering_Flow_SeqAIJ" 10 /* 11 Computes an ordering to get most of the large numerical values in the lower triangular part of the matrix 12 13 This code does not work and is not called anywhere. It would be registered with MatOrderingRegisterAll() 14 */ 15 PetscErrorCode MatGetOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol) 16 { 17 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->data; 18 PetscErrorCode ierr; 19 PetscInt i,j,jj,k, kk,n = mat->rmap->n, current = 0, newcurrent = 0,*order; 20 const PetscInt *ai = a->i, *aj = a->j; 21 const PetscScalar *aa = a->a; 22 PetscBool *done; 23 PetscReal best,past = 0,future; 24 25 PetscFunctionBegin; 26 /* pick initial row */ 27 best = -1; 28 for (i=0; i<n; i++) { 29 future = 0.0; 30 for (j=ai[i]; j<ai[i+1]; j++) { 31 if (aj[j] != i) future += PetscAbsScalar(aa[j]); else past = PetscAbsScalar(aa[j]); 32 } 33 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 34 if (past/future > best) { 35 best = past/future; 36 current = i; 37 } 38 } 39 40 ierr = PetscMalloc(n*sizeof(PetscBool),&done);CHKERRQ(ierr); 41 ierr = PetscMemzero(done,n*sizeof(PetscBool));CHKERRQ(ierr); 42 ierr = PetscMalloc(n*sizeof(PetscInt),&order);CHKERRQ(ierr); 43 order[0] = current; 44 for (i=0; i<n-1; i++) { 45 done[current] = PETSC_TRUE; 46 best = -1; 47 /* loop over all neighbors of current pivot */ 48 for (j=ai[current]; j<ai[current+1]; j++) { 49 jj = aj[j]; 50 if (done[jj]) continue; 51 /* loop over columns of potential next row computing weights for below and above diagonal */ 52 past = future = 0.0; 53 for (k=ai[jj]; k<ai[jj+1]; k++) { 54 kk = aj[k]; 55 if (done[kk]) past += PetscAbsScalar(aa[k]); 56 else if (kk != jj) future += PetscAbsScalar(aa[k]); 57 } 58 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 59 if (past/future > best) { 60 best = past/future; 61 newcurrent = jj; 62 } 63 } 64 if (best == -1) { /* no neighbors to select from so select best of all that remain */ 65 best = -1; 66 for (k=0; k<n; k++) { 67 if (done[k]) continue; 68 future = 0.0; 69 past = 0.0; 70 for (j=ai[k]; j<ai[k+1]; j++) { 71 kk = aj[j]; 72 if (done[kk]) past += PetscAbsScalar(aa[j]); 73 else if (kk != k) future += PetscAbsScalar(aa[j]); 74 } 75 if (!future) future = 1.e-10; /* if there is zero in the upper diagonal part want to rank this row high */ 76 if (past/future > best) { 77 best = past/future; 78 newcurrent = k; 79 } 80 } 81 } 82 if (current == newcurrent) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"newcurrent cannot be current"); 83 current = newcurrent; 84 order[i+1] = current; 85 } 86 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,order,PETSC_COPY_VALUES,irow);CHKERRQ(ierr); 87 *icol = *irow; 88 ierr = PetscObjectReference((PetscObject)*irow);CHKERRQ(ierr); 89 ierr = PetscFree(done);CHKERRQ(ierr); 90 ierr = PetscFree(order);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 EXTERN_C_END 94 95 EXTERN_C_BEGIN 96 #undef __FUNCT__ 97 #define __FUNCT__ "MatGetFactorAvailable_seqaij_petsc" 98 PetscErrorCode MatGetFactorAvailable_seqaij_petsc(Mat A,MatFactorType ftype,PetscBool *flg) 99 { 100 PetscFunctionBegin; 101 *flg = PETSC_TRUE; 102 PetscFunctionReturn(0); 103 } 104 EXTERN_C_END 105 106 EXTERN_C_BEGIN 107 #undef __FUNCT__ 108 #define __FUNCT__ "MatGetFactor_seqaij_petsc" 109 PetscErrorCode MatGetFactor_seqaij_petsc(Mat A,MatFactorType ftype,Mat *B) 110 { 111 PetscInt n = A->rmap->n; 112 PetscErrorCode ierr; 113 114 PetscFunctionBegin; 115 ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr); 116 ierr = MatSetSizes(*B,n,n,n,n);CHKERRQ(ierr); 117 if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU || ftype == MAT_FACTOR_ILUDT){ 118 ierr = MatSetType(*B,MATSEQAIJ);CHKERRQ(ierr); 119 (*B)->ops->ilufactorsymbolic = MatILUFactorSymbolic_SeqAIJ; 120 (*B)->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ; 121 ierr = MatSetBlockSizes(*B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 122 } else if (ftype == MAT_FACTOR_CHOLESKY || ftype == MAT_FACTOR_ICC) { 123 ierr = MatSetType(*B,MATSEQSBAIJ);CHKERRQ(ierr); 124 ierr = MatSeqSBAIJSetPreallocation(*B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 125 (*B)->ops->iccfactorsymbolic = MatICCFactorSymbolic_SeqAIJ; 126 (*B)->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SeqAIJ; 127 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 128 (*B)->factortype = ftype; 129 PetscFunctionReturn(0); 130 } 131 EXTERN_C_END 132 133 #undef __FUNCT__ 134 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_inplace" 135 PetscErrorCode MatLUFactorSymbolic_SeqAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 136 { 137 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 138 IS isicol; 139 PetscErrorCode ierr; 140 const PetscInt *r,*ic; 141 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 142 PetscInt *bi,*bj,*ajtmp; 143 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 144 PetscReal f; 145 PetscInt nlnk,*lnk,k,**bi_ptr; 146 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 147 PetscBT lnkbt; 148 149 PetscFunctionBegin; 150 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 151 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 152 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 153 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 154 155 /* get new row pointers */ 156 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 157 bi[0] = 0; 158 159 /* bdiag is location of diagonal in factor */ 160 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 161 bdiag[0] = 0; 162 163 /* linked list for storing column indices of the active row */ 164 nlnk = n + 1; 165 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 166 167 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 168 169 /* initial FreeSpace size is f*(ai[n]+1) */ 170 f = info->fill; 171 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 172 current_space = free_space; 173 174 for (i=0; i<n; i++) { 175 /* copy previous fill into linked list */ 176 nzi = 0; 177 nnz = ai[r[i]+1] - ai[r[i]]; 178 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 179 ajtmp = aj + ai[r[i]]; 180 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 181 nzi += nlnk; 182 183 /* add pivot rows into linked list */ 184 row = lnk[n]; 185 while (row < i) { 186 nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 187 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 188 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 189 nzi += nlnk; 190 row = lnk[row]; 191 } 192 bi[i+1] = bi[i] + nzi; 193 im[i] = nzi; 194 195 /* mark bdiag */ 196 nzbd = 0; 197 nnz = nzi; 198 k = lnk[n]; 199 while (nnz-- && k < i){ 200 nzbd++; 201 k = lnk[k]; 202 } 203 bdiag[i] = bi[i] + nzbd; 204 205 /* if free space is not available, make more free space */ 206 if (current_space->local_remaining<nzi) { 207 nnz = (n - i)*nzi; /* estimated and max additional space needed */ 208 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 209 reallocs++; 210 } 211 212 /* copy data into free space, then initialize lnk */ 213 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 214 bi_ptr[i] = current_space->array; 215 current_space->array += nzi; 216 current_space->local_used += nzi; 217 current_space->local_remaining -= nzi; 218 } 219 #if defined(PETSC_USE_INFO) 220 if (ai[n] != 0) { 221 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 222 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 223 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 224 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 225 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 226 } else { 227 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 228 } 229 #endif 230 231 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 232 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 233 234 /* destroy list of free space and other temporary array(s) */ 235 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 236 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 237 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 238 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 239 240 /* put together the new matrix */ 241 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 242 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 243 b = (Mat_SeqAIJ*)(B)->data; 244 b->free_a = PETSC_TRUE; 245 b->free_ij = PETSC_TRUE; 246 b->singlemalloc = PETSC_FALSE; 247 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 248 b->j = bj; 249 b->i = bi; 250 b->diag = bdiag; 251 b->ilen = 0; 252 b->imax = 0; 253 b->row = isrow; 254 b->col = iscol; 255 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 256 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 257 b->icol = isicol; 258 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 259 260 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 261 ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 262 b->maxnz = b->nz = bi[n] ; 263 264 (B)->factortype = MAT_FACTOR_LU; 265 (B)->info.factor_mallocs = reallocs; 266 (B)->info.fill_ratio_given = f; 267 268 if (ai[n]) { 269 (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 270 } else { 271 (B)->info.fill_ratio_needed = 0.0; 272 } 273 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 274 if (a->inode.size) { 275 (B)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 276 } 277 PetscFunctionReturn(0); 278 } 279 280 #undef __FUNCT__ 281 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ" 282 PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 283 { 284 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 285 IS isicol; 286 PetscErrorCode ierr; 287 const PetscInt *r,*ic; 288 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j; 289 PetscInt *bi,*bj,*ajtmp; 290 PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 291 PetscReal f; 292 PetscInt nlnk,*lnk,k,**bi_ptr; 293 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 294 PetscBT lnkbt; 295 296 PetscFunctionBegin; 297 /* Uncomment the oldatastruct part only while testing new data structure for MatSolve() */ 298 /* 299 PetscBool olddatastruct=PETSC_FALSE; 300 ierr = PetscOptionsGetBool(PETSC_NULL,"-lu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 301 if(olddatastruct){ 302 ierr = MatLUFactorSymbolic_SeqAIJ_inplace(B,A,isrow,iscol,info);CHKERRQ(ierr); 303 PetscFunctionReturn(0); 304 } 305 */ 306 if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 307 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 308 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 309 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 310 311 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 312 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 313 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 314 bi[0] = bdiag[0] = 0; 315 316 /* linked list for storing column indices of the active row */ 317 nlnk = n + 1; 318 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 319 320 ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr); 321 322 /* initial FreeSpace size is f*(ai[n]+1) */ 323 f = info->fill; 324 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 325 current_space = free_space; 326 327 for (i=0; i<n; i++) { 328 /* copy previous fill into linked list */ 329 nzi = 0; 330 nnz = ai[r[i]+1] - ai[r[i]]; 331 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 332 ajtmp = aj + ai[r[i]]; 333 ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 334 nzi += nlnk; 335 336 /* add pivot rows into linked list */ 337 row = lnk[n]; 338 while (row < i){ 339 nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 340 ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 341 ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr); 342 nzi += nlnk; 343 row = lnk[row]; 344 } 345 bi[i+1] = bi[i] + nzi; 346 im[i] = nzi; 347 348 /* mark bdiag */ 349 nzbd = 0; 350 nnz = nzi; 351 k = lnk[n]; 352 while (nnz-- && k < i){ 353 nzbd++; 354 k = lnk[k]; 355 } 356 bdiag[i] = nzbd; /* note: bdiag[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 357 358 /* if free space is not available, make more free space */ 359 if (current_space->local_remaining<nzi) { 360 nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */ 361 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 362 reallocs++; 363 } 364 365 /* copy data into free space, then initialize lnk */ 366 ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 367 bi_ptr[i] = current_space->array; 368 current_space->array += nzi; 369 current_space->local_used += nzi; 370 current_space->local_remaining -= nzi; 371 } 372 373 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 374 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 375 376 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 377 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 378 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 379 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 380 ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr); 381 382 /* put together the new matrix */ 383 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 384 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 385 b = (Mat_SeqAIJ*)(B)->data; 386 b->free_a = PETSC_TRUE; 387 b->free_ij = PETSC_TRUE; 388 b->singlemalloc = PETSC_FALSE; 389 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 390 b->j = bj; 391 b->i = bi; 392 b->diag = bdiag; 393 b->ilen = 0; 394 b->imax = 0; 395 b->row = isrow; 396 b->col = iscol; 397 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 398 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 399 b->icol = isicol; 400 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 401 402 /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */ 403 ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 404 b->maxnz = b->nz = bdiag[0]+1; 405 B->factortype = MAT_FACTOR_LU; 406 B->info.factor_mallocs = reallocs; 407 B->info.fill_ratio_given = f; 408 409 if (ai[n]) { 410 B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 411 } else { 412 B->info.fill_ratio_needed = 0.0; 413 } 414 #if defined(PETSC_USE_INFO) 415 if (ai[n] != 0) { 416 PetscReal af = B->info.fill_ratio_needed; 417 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 418 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 419 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr); 420 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 421 } else { 422 ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr); 423 } 424 #endif 425 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 426 if (a->inode.size) { 427 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 428 } 429 PetscFunctionReturn(0); 430 } 431 432 /* 433 Trouble in factorization, should we dump the original matrix? 434 */ 435 #undef __FUNCT__ 436 #define __FUNCT__ "MatFactorDumpMatrix" 437 PetscErrorCode MatFactorDumpMatrix(Mat A) 438 { 439 PetscErrorCode ierr; 440 PetscBool flg = PETSC_FALSE; 441 442 PetscFunctionBegin; 443 ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_factor_dump_on_error",&flg,PETSC_NULL);CHKERRQ(ierr); 444 if (flg) { 445 PetscViewer viewer; 446 char filename[PETSC_MAX_PATH_LEN]; 447 448 ierr = PetscSNPrintf(filename,PETSC_MAX_PATH_LEN,"matrix_factor_error.%d",PetscGlobalRank);CHKERRQ(ierr); 449 ierr = PetscViewerBinaryOpen(((PetscObject)A)->comm,filename,FILE_MODE_WRITE,&viewer);CHKERRQ(ierr); 450 ierr = MatView(A,viewer);CHKERRQ(ierr); 451 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 452 } 453 PetscFunctionReturn(0); 454 } 455 456 #undef __FUNCT__ 457 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ" 458 PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 459 { 460 Mat C=B; 461 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 462 IS isrow = b->row,isicol = b->icol; 463 PetscErrorCode ierr; 464 const PetscInt *r,*ic,*ics; 465 const PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*bdiag=b->diag; 466 PetscInt i,j,k,nz,nzL,row,*pj; 467 const PetscInt *ajtmp,*bjtmp; 468 MatScalar *rtmp,*pc,multiplier,*pv; 469 const MatScalar *aa=a->a,*v; 470 PetscBool row_identity,col_identity; 471 FactorShiftCtx sctx; 472 const PetscInt *ddiag; 473 PetscReal rs; 474 MatScalar d; 475 476 PetscFunctionBegin; 477 /* MatPivotSetUp(): initialize shift context sctx */ 478 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 479 480 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 481 ddiag = a->diag; 482 sctx.shift_top = info->zeropivot; 483 for (i=0; i<n; i++) { 484 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 485 d = (aa)[ddiag[i]]; 486 rs = -PetscAbsScalar(d) - PetscRealPart(d); 487 v = aa+ai[i]; 488 nz = ai[i+1] - ai[i]; 489 for (j=0; j<nz; j++) 490 rs += PetscAbsScalar(v[j]); 491 if (rs>sctx.shift_top) sctx.shift_top = rs; 492 } 493 sctx.shift_top *= 1.1; 494 sctx.nshift_max = 5; 495 sctx.shift_lo = 0.; 496 sctx.shift_hi = 1.; 497 } 498 499 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 500 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 501 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 502 ics = ic; 503 504 do { 505 sctx.newshift = PETSC_FALSE; 506 for (i=0; i<n; i++){ 507 /* zero rtmp */ 508 /* L part */ 509 nz = bi[i+1] - bi[i]; 510 bjtmp = bj + bi[i]; 511 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 512 513 /* U part */ 514 nz = bdiag[i]-bdiag[i+1]; 515 bjtmp = bj + bdiag[i+1]+1; 516 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 517 518 /* load in initial (unfactored row) */ 519 nz = ai[r[i]+1] - ai[r[i]]; 520 ajtmp = aj + ai[r[i]]; 521 v = aa + ai[r[i]]; 522 for (j=0; j<nz; j++) { 523 rtmp[ics[ajtmp[j]]] = v[j]; 524 } 525 /* ZeropivotApply() */ 526 rtmp[i] += sctx.shift_amount; /* shift the diagonal of the matrix */ 527 528 /* elimination */ 529 bjtmp = bj + bi[i]; 530 row = *bjtmp++; 531 nzL = bi[i+1] - bi[i]; 532 for(k=0; k < nzL;k++) { 533 pc = rtmp + row; 534 if (*pc != 0.0) { 535 pv = b->a + bdiag[row]; 536 multiplier = *pc * (*pv); 537 *pc = multiplier; 538 pj = b->j + bdiag[row+1]+1; /* beginning of U(row,:) */ 539 pv = b->a + bdiag[row+1]+1; 540 nz = bdiag[row]-bdiag[row+1]-1; /* num of entries in U(row,:) excluding diag */ 541 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 542 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 543 } 544 row = *bjtmp++; 545 } 546 547 /* finished row so stick it into b->a */ 548 rs = 0.0; 549 /* L part */ 550 pv = b->a + bi[i] ; 551 pj = b->j + bi[i] ; 552 nz = bi[i+1] - bi[i]; 553 for (j=0; j<nz; j++) { 554 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 555 } 556 557 /* U part */ 558 pv = b->a + bdiag[i+1]+1; 559 pj = b->j + bdiag[i+1]+1; 560 nz = bdiag[i] - bdiag[i+1]-1; 561 for (j=0; j<nz; j++) { 562 pv[j] = rtmp[pj[j]]; rs += PetscAbsScalar(pv[j]); 563 } 564 565 sctx.rs = rs; 566 sctx.pv = rtmp[i]; 567 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 568 if(sctx.newshift) break; /* break for-loop */ 569 rtmp[i] = sctx.pv; /* sctx.pv might be updated in the case of MAT_SHIFT_INBLOCKS */ 570 571 /* Mark diagonal and invert diagonal for simplier triangular solves */ 572 pv = b->a + bdiag[i]; 573 *pv = 1.0/rtmp[i]; 574 575 } /* endof for (i=0; i<n; i++){ */ 576 577 /* MatPivotRefine() */ 578 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max){ 579 /* 580 * if no shift in this attempt & shifting & started shifting & can refine, 581 * then try lower shift 582 */ 583 sctx.shift_hi = sctx.shift_fraction; 584 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 585 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 586 sctx.newshift = PETSC_TRUE; 587 sctx.nshift++; 588 } 589 } while (sctx.newshift); 590 591 ierr = PetscFree(rtmp);CHKERRQ(ierr); 592 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 593 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 594 595 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 596 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 597 if (row_identity && col_identity) { 598 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 599 } else { 600 C->ops->solve = MatSolve_SeqAIJ; 601 } 602 C->ops->solveadd = MatSolveAdd_SeqAIJ; 603 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ; 604 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ; 605 C->ops->matsolve = MatMatSolve_SeqAIJ; 606 C->assembled = PETSC_TRUE; 607 C->preallocated = PETSC_TRUE; 608 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 609 610 /* MatShiftView(A,info,&sctx) */ 611 if (sctx.nshift){ 612 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 613 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 614 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 615 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 616 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 617 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 618 } 619 } 620 ierr = Mat_CheckInode_FactorLU(C,PETSC_FALSE);CHKERRQ(ierr); 621 PetscFunctionReturn(0); 622 } 623 624 #undef __FUNCT__ 625 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_inplace" 626 PetscErrorCode MatLUFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 627 { 628 Mat C=B; 629 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 630 IS isrow = b->row,isicol = b->icol; 631 PetscErrorCode ierr; 632 const PetscInt *r,*ic,*ics; 633 PetscInt nz,row,i,j,n=A->rmap->n,diag; 634 const PetscInt *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 635 const PetscInt *ajtmp,*bjtmp,*diag_offset = b->diag,*pj; 636 MatScalar *pv,*rtmp,*pc,multiplier,d; 637 const MatScalar *v,*aa=a->a; 638 PetscReal rs=0.0; 639 FactorShiftCtx sctx; 640 const PetscInt *ddiag; 641 PetscBool row_identity, col_identity; 642 643 PetscFunctionBegin; 644 /* MatPivotSetUp(): initialize shift context sctx */ 645 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 646 647 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 648 ddiag = a->diag; 649 sctx.shift_top = info->zeropivot; 650 for (i=0; i<n; i++) { 651 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 652 d = (aa)[ddiag[i]]; 653 rs = -PetscAbsScalar(d) - PetscRealPart(d); 654 v = aa+ai[i]; 655 nz = ai[i+1] - ai[i]; 656 for (j=0; j<nz; j++) 657 rs += PetscAbsScalar(v[j]); 658 if (rs>sctx.shift_top) sctx.shift_top = rs; 659 } 660 sctx.shift_top *= 1.1; 661 sctx.nshift_max = 5; 662 sctx.shift_lo = 0.; 663 sctx.shift_hi = 1.; 664 } 665 666 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 667 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 668 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 669 ics = ic; 670 671 do { 672 sctx.newshift = PETSC_FALSE; 673 for (i=0; i<n; i++){ 674 nz = bi[i+1] - bi[i]; 675 bjtmp = bj + bi[i]; 676 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 677 678 /* load in initial (unfactored row) */ 679 nz = ai[r[i]+1] - ai[r[i]]; 680 ajtmp = aj + ai[r[i]]; 681 v = aa + ai[r[i]]; 682 for (j=0; j<nz; j++) { 683 rtmp[ics[ajtmp[j]]] = v[j]; 684 } 685 rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 686 687 row = *bjtmp++; 688 while (row < i) { 689 pc = rtmp + row; 690 if (*pc != 0.0) { 691 pv = b->a + diag_offset[row]; 692 pj = b->j + diag_offset[row] + 1; 693 multiplier = *pc / *pv++; 694 *pc = multiplier; 695 nz = bi[row+1] - diag_offset[row] - 1; 696 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 697 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 698 } 699 row = *bjtmp++; 700 } 701 /* finished row so stick it into b->a */ 702 pv = b->a + bi[i] ; 703 pj = b->j + bi[i] ; 704 nz = bi[i+1] - bi[i]; 705 diag = diag_offset[i] - bi[i]; 706 rs = 0.0; 707 for (j=0; j<nz; j++) { 708 pv[j] = rtmp[pj[j]]; 709 rs += PetscAbsScalar(pv[j]); 710 } 711 rs -= PetscAbsScalar(pv[diag]); 712 713 sctx.rs = rs; 714 sctx.pv = pv[diag]; 715 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 716 if (sctx.newshift) break; 717 pv[diag] = sctx.pv; 718 } 719 720 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 721 /* 722 * if no shift in this attempt & shifting & started shifting & can refine, 723 * then try lower shift 724 */ 725 sctx.shift_hi = sctx.shift_fraction; 726 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 727 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 728 sctx.newshift = PETSC_TRUE; 729 sctx.nshift++; 730 } 731 } while (sctx.newshift); 732 733 /* invert diagonal entries for simplier triangular solves */ 734 for (i=0; i<n; i++) { 735 b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]]; 736 } 737 ierr = PetscFree(rtmp);CHKERRQ(ierr); 738 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 739 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 740 741 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 742 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 743 if (row_identity && col_identity) { 744 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering_inplace; 745 } else { 746 C->ops->solve = MatSolve_SeqAIJ_inplace; 747 } 748 C->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 749 C->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 750 C->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 751 C->ops->matsolve = MatMatSolve_SeqAIJ_inplace; 752 C->assembled = PETSC_TRUE; 753 C->preallocated = PETSC_TRUE; 754 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 755 if (sctx.nshift){ 756 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 757 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 758 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 759 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 760 } 761 } 762 (C)->ops->solve = MatSolve_SeqAIJ_inplace; 763 (C)->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 764 ierr = Mat_CheckInode(C,PETSC_FALSE);CHKERRQ(ierr); 765 PetscFunctionReturn(0); 766 } 767 768 /* 769 This routine implements inplace ILU(0) with row or/and column permutations. 770 Input: 771 A - original matrix 772 Output; 773 A - a->i (rowptr) is same as original rowptr, but factored i-the row is stored in rowperm[i] 774 a->j (col index) is permuted by the inverse of colperm, then sorted 775 a->a reordered accordingly with a->j 776 a->diag (ptr to diagonal elements) is updated. 777 */ 778 #undef __FUNCT__ 779 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_InplaceWithPerm" 780 PetscErrorCode MatLUFactorNumeric_SeqAIJ_InplaceWithPerm(Mat B,Mat A,const MatFactorInfo *info) 781 { 782 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 783 IS isrow = a->row,isicol = a->icol; 784 PetscErrorCode ierr; 785 const PetscInt *r,*ic,*ics; 786 PetscInt i,j,n=A->rmap->n,*ai=a->i,*aj=a->j; 787 PetscInt *ajtmp,nz,row; 788 PetscInt *diag = a->diag,nbdiag,*pj; 789 PetscScalar *rtmp,*pc,multiplier,d; 790 MatScalar *pv,*v; 791 PetscReal rs; 792 FactorShiftCtx sctx; 793 const MatScalar *aa=a->a,*vtmp; 794 795 PetscFunctionBegin; 796 if (A != B) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"input and output matrix must have same address"); 797 798 /* MatPivotSetUp(): initialize shift context sctx */ 799 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 800 801 if (info->shifttype == (PetscReal) MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 802 const PetscInt *ddiag = a->diag; 803 sctx.shift_top = info->zeropivot; 804 for (i=0; i<n; i++) { 805 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 806 d = (aa)[ddiag[i]]; 807 rs = -PetscAbsScalar(d) - PetscRealPart(d); 808 vtmp = aa+ai[i]; 809 nz = ai[i+1] - ai[i]; 810 for (j=0; j<nz; j++) 811 rs += PetscAbsScalar(vtmp[j]); 812 if (rs>sctx.shift_top) sctx.shift_top = rs; 813 } 814 sctx.shift_top *= 1.1; 815 sctx.nshift_max = 5; 816 sctx.shift_lo = 0.; 817 sctx.shift_hi = 1.; 818 } 819 820 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 821 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 822 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);CHKERRQ(ierr); 823 ierr = PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));CHKERRQ(ierr); 824 ics = ic; 825 826 #if defined(MV) 827 sctx.shift_top = 0.; 828 sctx.nshift_max = 0; 829 sctx.shift_lo = 0.; 830 sctx.shift_hi = 0.; 831 sctx.shift_fraction = 0.; 832 833 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 834 sctx.shift_top = 0.; 835 for (i=0; i<n; i++) { 836 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 837 d = (a->a)[diag[i]]; 838 rs = -PetscAbsScalar(d) - PetscRealPart(d); 839 v = a->a+ai[i]; 840 nz = ai[i+1] - ai[i]; 841 for (j=0; j<nz; j++) 842 rs += PetscAbsScalar(v[j]); 843 if (rs>sctx.shift_top) sctx.shift_top = rs; 844 } 845 if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot; 846 sctx.shift_top *= 1.1; 847 sctx.nshift_max = 5; 848 sctx.shift_lo = 0.; 849 sctx.shift_hi = 1.; 850 } 851 852 sctx.shift_amount = 0.; 853 sctx.nshift = 0; 854 #endif 855 856 do { 857 sctx.newshift = PETSC_FALSE; 858 for (i=0; i<n; i++){ 859 /* load in initial unfactored row */ 860 nz = ai[r[i]+1] - ai[r[i]]; 861 ajtmp = aj + ai[r[i]]; 862 v = a->a + ai[r[i]]; 863 /* sort permuted ajtmp and values v accordingly */ 864 for (j=0; j<nz; j++) ajtmp[j] = ics[ajtmp[j]]; 865 ierr = PetscSortIntWithScalarArray(nz,ajtmp,v);CHKERRQ(ierr); 866 867 diag[r[i]] = ai[r[i]]; 868 for (j=0; j<nz; j++) { 869 rtmp[ajtmp[j]] = v[j]; 870 if (ajtmp[j] < i) diag[r[i]]++; /* update a->diag */ 871 } 872 rtmp[r[i]] += sctx.shift_amount; /* shift the diagonal of the matrix */ 873 874 row = *ajtmp++; 875 while (row < i) { 876 pc = rtmp + row; 877 if (*pc != 0.0) { 878 pv = a->a + diag[r[row]]; 879 pj = aj + diag[r[row]] + 1; 880 881 multiplier = *pc / *pv++; 882 *pc = multiplier; 883 nz = ai[r[row]+1] - diag[r[row]] - 1; 884 for (j=0; j<nz; j++) rtmp[pj[j]] -= multiplier * pv[j]; 885 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 886 } 887 row = *ajtmp++; 888 } 889 /* finished row so overwrite it onto a->a */ 890 pv = a->a + ai[r[i]] ; 891 pj = aj + ai[r[i]] ; 892 nz = ai[r[i]+1] - ai[r[i]]; 893 nbdiag = diag[r[i]] - ai[r[i]]; /* num of entries before the diagonal */ 894 895 rs = 0.0; 896 for (j=0; j<nz; j++) { 897 pv[j] = rtmp[pj[j]]; 898 if (j != nbdiag) rs += PetscAbsScalar(pv[j]); 899 } 900 901 sctx.rs = rs; 902 sctx.pv = pv[nbdiag]; 903 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 904 if (sctx.newshift) break; 905 pv[nbdiag] = sctx.pv; 906 } 907 908 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE && !sctx.newshift && sctx.shift_fraction>0 && sctx.nshift<sctx.nshift_max) { 909 /* 910 * if no shift in this attempt & shifting & started shifting & can refine, 911 * then try lower shift 912 */ 913 sctx.shift_hi = sctx.shift_fraction; 914 sctx.shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.; 915 sctx.shift_amount = sctx.shift_fraction * sctx.shift_top; 916 sctx.newshift = PETSC_TRUE; 917 sctx.nshift++; 918 } 919 } while (sctx.newshift); 920 921 /* invert diagonal entries for simplier triangular solves */ 922 for (i=0; i<n; i++) { 923 a->a[diag[r[i]]] = 1.0/a->a[diag[r[i]]]; 924 } 925 926 ierr = PetscFree(rtmp);CHKERRQ(ierr); 927 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 928 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 929 A->ops->solve = MatSolve_SeqAIJ_InplaceWithPerm; 930 A->ops->solveadd = MatSolveAdd_SeqAIJ_inplace; 931 A->ops->solvetranspose = MatSolveTranspose_SeqAIJ_inplace; 932 A->ops->solvetransposeadd = MatSolveTransposeAdd_SeqAIJ_inplace; 933 A->assembled = PETSC_TRUE; 934 A->preallocated = PETSC_TRUE; 935 ierr = PetscLogFlops(A->cmap->n);CHKERRQ(ierr); 936 if (sctx.nshift){ 937 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 938 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 939 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 940 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 941 } 942 } 943 PetscFunctionReturn(0); 944 } 945 946 /* ----------------------------------------------------------- */ 947 #undef __FUNCT__ 948 #define __FUNCT__ "MatLUFactor_SeqAIJ" 949 PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,const MatFactorInfo *info) 950 { 951 PetscErrorCode ierr; 952 Mat C; 953 954 PetscFunctionBegin; 955 ierr = MatGetFactor(A,MATSOLVERPETSC,MAT_FACTOR_LU,&C);CHKERRQ(ierr); 956 ierr = MatLUFactorSymbolic(C,A,row,col,info);CHKERRQ(ierr); 957 ierr = MatLUFactorNumeric(C,A,info);CHKERRQ(ierr); 958 A->ops->solve = C->ops->solve; 959 A->ops->solvetranspose = C->ops->solvetranspose; 960 ierr = MatHeaderMerge(A,C);CHKERRQ(ierr); 961 ierr = PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);CHKERRQ(ierr); 962 PetscFunctionReturn(0); 963 } 964 /* ----------------------------------------------------------- */ 965 966 967 #undef __FUNCT__ 968 #define __FUNCT__ "MatSolve_SeqAIJ_inplace" 969 PetscErrorCode MatSolve_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 970 { 971 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 972 IS iscol = a->col,isrow = a->row; 973 PetscErrorCode ierr; 974 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 975 PetscInt nz; 976 const PetscInt *rout,*cout,*r,*c; 977 PetscScalar *x,*tmp,*tmps,sum; 978 const PetscScalar *b; 979 const MatScalar *aa = a->a,*v; 980 981 PetscFunctionBegin; 982 if (!n) PetscFunctionReturn(0); 983 984 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 985 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 986 tmp = a->solve_work; 987 988 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 989 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 990 991 /* forward solve the lower triangular */ 992 tmp[0] = b[*r++]; 993 tmps = tmp; 994 for (i=1; i<n; i++) { 995 v = aa + ai[i] ; 996 vi = aj + ai[i] ; 997 nz = a->diag[i] - ai[i]; 998 sum = b[*r++]; 999 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1000 tmp[i] = sum; 1001 } 1002 1003 /* backward solve the upper triangular */ 1004 for (i=n-1; i>=0; i--){ 1005 v = aa + a->diag[i] + 1; 1006 vi = aj + a->diag[i] + 1; 1007 nz = ai[i+1] - a->diag[i] - 1; 1008 sum = tmp[i]; 1009 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1010 x[*c--] = tmp[i] = sum*aa[a->diag[i]]; 1011 } 1012 1013 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1014 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1015 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1016 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1017 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1018 PetscFunctionReturn(0); 1019 } 1020 1021 #undef __FUNCT__ 1022 #define __FUNCT__ "MatMatSolve_SeqAIJ_inplace" 1023 PetscErrorCode MatMatSolve_SeqAIJ_inplace(Mat A,Mat B,Mat X) 1024 { 1025 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1026 IS iscol = a->col,isrow = a->row; 1027 PetscErrorCode ierr; 1028 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1029 PetscInt nz,neq; 1030 const PetscInt *rout,*cout,*r,*c; 1031 PetscScalar *x,*b,*tmp,*tmps,sum; 1032 const MatScalar *aa = a->a,*v; 1033 PetscBool bisdense,xisdense; 1034 1035 PetscFunctionBegin; 1036 if (!n) PetscFunctionReturn(0); 1037 1038 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1039 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1040 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1041 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1042 1043 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1044 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1045 1046 tmp = a->solve_work; 1047 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1048 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1049 1050 for (neq=0; neq<B->cmap->n; neq++){ 1051 /* forward solve the lower triangular */ 1052 tmp[0] = b[r[0]]; 1053 tmps = tmp; 1054 for (i=1; i<n; i++) { 1055 v = aa + ai[i] ; 1056 vi = aj + ai[i] ; 1057 nz = a->diag[i] - ai[i]; 1058 sum = b[r[i]]; 1059 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1060 tmp[i] = sum; 1061 } 1062 /* backward solve the upper triangular */ 1063 for (i=n-1; i>=0; i--){ 1064 v = aa + a->diag[i] + 1; 1065 vi = aj + a->diag[i] + 1; 1066 nz = ai[i+1] - a->diag[i] - 1; 1067 sum = tmp[i]; 1068 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1069 x[c[i]] = tmp[i] = sum*aa[a->diag[i]]; 1070 } 1071 1072 b += n; 1073 x += n; 1074 } 1075 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1076 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1077 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1078 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1079 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatMatSolve_SeqAIJ" 1085 PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X) 1086 { 1087 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1088 IS iscol = a->col,isrow = a->row; 1089 PetscErrorCode ierr; 1090 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1091 PetscInt nz,neq; 1092 const PetscInt *rout,*cout,*r,*c; 1093 PetscScalar *x,*b,*tmp,sum; 1094 const MatScalar *aa = a->a,*v; 1095 PetscBool bisdense,xisdense; 1096 1097 PetscFunctionBegin; 1098 if (!n) PetscFunctionReturn(0); 1099 1100 ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQDENSE,&bisdense);CHKERRQ(ierr); 1101 if (!bisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"B matrix must be a SeqDense matrix"); 1102 ierr = PetscObjectTypeCompare((PetscObject)X,MATSEQDENSE,&xisdense);CHKERRQ(ierr); 1103 if (!xisdense) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"X matrix must be a SeqDense matrix"); 1104 1105 ierr = MatGetArray(B,&b);CHKERRQ(ierr); 1106 ierr = MatGetArray(X,&x);CHKERRQ(ierr); 1107 1108 tmp = a->solve_work; 1109 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1110 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1111 1112 for (neq=0; neq<B->cmap->n; neq++){ 1113 /* forward solve the lower triangular */ 1114 tmp[0] = b[r[0]]; 1115 v = aa; 1116 vi = aj; 1117 for (i=1; i<n; i++) { 1118 nz = ai[i+1] - ai[i]; 1119 sum = b[r[i]]; 1120 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1121 tmp[i] = sum; 1122 v += nz; vi += nz; 1123 } 1124 1125 /* backward solve the upper triangular */ 1126 for (i=n-1; i>=0; i--){ 1127 v = aa + adiag[i+1]+1; 1128 vi = aj + adiag[i+1]+1; 1129 nz = adiag[i]-adiag[i+1]-1; 1130 sum = tmp[i]; 1131 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 1132 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 1133 } 1134 1135 b += n; 1136 x += n; 1137 } 1138 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1139 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1140 ierr = MatRestoreArray(B,&b);CHKERRQ(ierr); 1141 ierr = MatRestoreArray(X,&x);CHKERRQ(ierr); 1142 ierr = PetscLogFlops(B->cmap->n*(2.0*a->nz - n));CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 #undef __FUNCT__ 1147 #define __FUNCT__ "MatSolve_SeqAIJ_InplaceWithPerm" 1148 PetscErrorCode MatSolve_SeqAIJ_InplaceWithPerm(Mat A,Vec bb,Vec xx) 1149 { 1150 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1151 IS iscol = a->col,isrow = a->row; 1152 PetscErrorCode ierr; 1153 const PetscInt *r,*c,*rout,*cout; 1154 PetscInt i, n = A->rmap->n,*vi,*ai = a->i,*aj = a->j; 1155 PetscInt nz,row; 1156 PetscScalar *x,*b,*tmp,*tmps,sum; 1157 const MatScalar *aa = a->a,*v; 1158 1159 PetscFunctionBegin; 1160 if (!n) PetscFunctionReturn(0); 1161 1162 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 1163 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1164 tmp = a->solve_work; 1165 1166 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1167 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1168 1169 /* forward solve the lower triangular */ 1170 tmp[0] = b[*r++]; 1171 tmps = tmp; 1172 for (row=1; row<n; row++) { 1173 i = rout[row]; /* permuted row */ 1174 v = aa + ai[i] ; 1175 vi = aj + ai[i] ; 1176 nz = a->diag[i] - ai[i]; 1177 sum = b[*r++]; 1178 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1179 tmp[row] = sum; 1180 } 1181 1182 /* backward solve the upper triangular */ 1183 for (row=n-1; row>=0; row--){ 1184 i = rout[row]; /* permuted row */ 1185 v = aa + a->diag[i] + 1; 1186 vi = aj + a->diag[i] + 1; 1187 nz = ai[i+1] - a->diag[i] - 1; 1188 sum = tmp[row]; 1189 PetscSparseDenseMinusDot(sum,tmps,v,vi,nz); 1190 x[*c--] = tmp[row] = sum*aa[a->diag[i]]; 1191 } 1192 1193 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1194 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1195 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 1196 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1197 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 /* ----------------------------------------------------------- */ 1202 #include <../src/mat/impls/aij/seq/ftn-kernels/fsolve.h> 1203 #undef __FUNCT__ 1204 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering_inplace" 1205 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx) 1206 { 1207 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1208 PetscErrorCode ierr; 1209 PetscInt n = A->rmap->n; 1210 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag; 1211 PetscScalar *x; 1212 const PetscScalar *b; 1213 const MatScalar *aa = a->a; 1214 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1215 PetscInt adiag_i,i,nz,ai_i; 1216 const PetscInt *vi; 1217 const MatScalar *v; 1218 PetscScalar sum; 1219 #endif 1220 1221 PetscFunctionBegin; 1222 if (!n) PetscFunctionReturn(0); 1223 1224 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1225 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1226 1227 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 1228 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 1229 #else 1230 /* forward solve the lower triangular */ 1231 x[0] = b[0]; 1232 for (i=1; i<n; i++) { 1233 ai_i = ai[i]; 1234 v = aa + ai_i; 1235 vi = aj + ai_i; 1236 nz = adiag[i] - ai_i; 1237 sum = b[i]; 1238 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1239 x[i] = sum; 1240 } 1241 1242 /* backward solve the upper triangular */ 1243 for (i=n-1; i>=0; i--){ 1244 adiag_i = adiag[i]; 1245 v = aa + adiag_i + 1; 1246 vi = aj + adiag_i + 1; 1247 nz = ai[i+1] - adiag_i - 1; 1248 sum = x[i]; 1249 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 1250 x[i] = sum*aa[adiag_i]; 1251 } 1252 #endif 1253 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 1254 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1255 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 #undef __FUNCT__ 1260 #define __FUNCT__ "MatSolveAdd_SeqAIJ_inplace" 1261 PetscErrorCode MatSolveAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec yy,Vec xx) 1262 { 1263 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1264 IS iscol = a->col,isrow = a->row; 1265 PetscErrorCode ierr; 1266 PetscInt i, n = A->rmap->n,j; 1267 PetscInt nz; 1268 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j; 1269 PetscScalar *x,*tmp,sum; 1270 const PetscScalar *b; 1271 const MatScalar *aa = a->a,*v; 1272 1273 PetscFunctionBegin; 1274 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1275 1276 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1277 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1278 tmp = a->solve_work; 1279 1280 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1281 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 1282 1283 /* forward solve the lower triangular */ 1284 tmp[0] = b[*r++]; 1285 for (i=1; i<n; i++) { 1286 v = aa + ai[i] ; 1287 vi = aj + ai[i] ; 1288 nz = a->diag[i] - ai[i]; 1289 sum = b[*r++]; 1290 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1291 tmp[i] = sum; 1292 } 1293 1294 /* backward solve the upper triangular */ 1295 for (i=n-1; i>=0; i--){ 1296 v = aa + a->diag[i] + 1; 1297 vi = aj + a->diag[i] + 1; 1298 nz = ai[i+1] - a->diag[i] - 1; 1299 sum = tmp[i]; 1300 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1301 tmp[i] = sum*aa[a->diag[i]]; 1302 x[*c--] += tmp[i]; 1303 } 1304 1305 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1306 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1307 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1308 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1309 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1310 1311 PetscFunctionReturn(0); 1312 } 1313 1314 #undef __FUNCT__ 1315 #define __FUNCT__ "MatSolveAdd_SeqAIJ" 1316 PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx) 1317 { 1318 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1319 IS iscol = a->col,isrow = a->row; 1320 PetscErrorCode ierr; 1321 PetscInt i, n = A->rmap->n,j; 1322 PetscInt nz; 1323 const PetscInt *rout,*cout,*r,*c,*vi,*ai = a->i,*aj = a->j,*adiag = a->diag; 1324 PetscScalar *x,*tmp,sum; 1325 const PetscScalar *b; 1326 const MatScalar *aa = a->a,*v; 1327 1328 PetscFunctionBegin; 1329 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 1330 1331 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1332 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1333 tmp = a->solve_work; 1334 1335 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1336 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1337 1338 /* forward solve the lower triangular */ 1339 tmp[0] = b[r[0]]; 1340 v = aa; 1341 vi = aj; 1342 for (i=1; i<n; i++) { 1343 nz = ai[i+1] - ai[i]; 1344 sum = b[r[i]]; 1345 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1346 tmp[i] = sum; 1347 v += nz; vi += nz; 1348 } 1349 1350 /* backward solve the upper triangular */ 1351 v = aa + adiag[n-1]; 1352 vi = aj + adiag[n-1]; 1353 for (i=n-1; i>=0; i--){ 1354 nz = adiag[i] - adiag[i+1] - 1; 1355 sum = tmp[i]; 1356 for (j=0; j<nz; j++) sum -= v[j]*tmp[vi[j]]; 1357 tmp[i] = sum*v[nz]; 1358 x[c[i]] += tmp[i]; 1359 v += nz+1; vi += nz+1; 1360 } 1361 1362 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1363 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1364 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1365 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1366 ierr = PetscLogFlops(2.0*a->nz);CHKERRQ(ierr); 1367 1368 PetscFunctionReturn(0); 1369 } 1370 1371 #undef __FUNCT__ 1372 #define __FUNCT__ "MatSolveTranspose_SeqAIJ_inplace" 1373 PetscErrorCode MatSolveTranspose_SeqAIJ_inplace(Mat A,Vec bb,Vec xx) 1374 { 1375 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1376 IS iscol = a->col,isrow = a->row; 1377 PetscErrorCode ierr; 1378 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1379 PetscInt i,n = A->rmap->n,j; 1380 PetscInt nz; 1381 PetscScalar *x,*tmp,s1; 1382 const MatScalar *aa = a->a,*v; 1383 const PetscScalar *b; 1384 1385 PetscFunctionBegin; 1386 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1387 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1388 tmp = a->solve_work; 1389 1390 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1391 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1392 1393 /* copy the b into temp work space according to permutation */ 1394 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1395 1396 /* forward solve the U^T */ 1397 for (i=0; i<n; i++) { 1398 v = aa + diag[i] ; 1399 vi = aj + diag[i] + 1; 1400 nz = ai[i+1] - diag[i] - 1; 1401 s1 = tmp[i]; 1402 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1403 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1404 tmp[i] = s1; 1405 } 1406 1407 /* backward solve the L^T */ 1408 for (i=n-1; i>=0; i--){ 1409 v = aa + diag[i] - 1 ; 1410 vi = aj + diag[i] - 1 ; 1411 nz = diag[i] - ai[i]; 1412 s1 = tmp[i]; 1413 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1414 } 1415 1416 /* copy tmp into x according to permutation */ 1417 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1418 1419 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1420 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1421 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1422 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1423 1424 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1425 PetscFunctionReturn(0); 1426 } 1427 1428 #undef __FUNCT__ 1429 #define __FUNCT__ "MatSolveTranspose_SeqAIJ" 1430 PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx) 1431 { 1432 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1433 IS iscol = a->col,isrow = a->row; 1434 PetscErrorCode ierr; 1435 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1436 PetscInt i,n = A->rmap->n,j; 1437 PetscInt nz; 1438 PetscScalar *x,*tmp,s1; 1439 const MatScalar *aa = a->a,*v; 1440 const PetscScalar *b; 1441 1442 PetscFunctionBegin; 1443 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1444 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1445 tmp = a->solve_work; 1446 1447 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1448 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1449 1450 /* copy the b into temp work space according to permutation */ 1451 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1452 1453 /* forward solve the U^T */ 1454 for (i=0; i<n; i++) { 1455 v = aa + adiag[i+1] + 1; 1456 vi = aj + adiag[i+1] + 1; 1457 nz = adiag[i] - adiag[i+1] - 1; 1458 s1 = tmp[i]; 1459 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1460 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1461 tmp[i] = s1; 1462 } 1463 1464 /* backward solve the L^T */ 1465 for (i=n-1; i>=0; i--){ 1466 v = aa + ai[i]; 1467 vi = aj + ai[i]; 1468 nz = ai[i+1] - ai[i]; 1469 s1 = tmp[i]; 1470 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1471 } 1472 1473 /* copy tmp into x according to permutation */ 1474 for (i=0; i<n; i++) x[r[i]] = tmp[i]; 1475 1476 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1477 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1478 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1479 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1480 1481 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1482 PetscFunctionReturn(0); 1483 } 1484 1485 #undef __FUNCT__ 1486 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ_inplace" 1487 PetscErrorCode MatSolveTransposeAdd_SeqAIJ_inplace(Mat A,Vec bb,Vec zz,Vec xx) 1488 { 1489 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1490 IS iscol = a->col,isrow = a->row; 1491 PetscErrorCode ierr; 1492 const PetscInt *rout,*cout,*r,*c,*diag = a->diag,*ai = a->i,*aj = a->j,*vi; 1493 PetscInt i,n = A->rmap->n,j; 1494 PetscInt nz; 1495 PetscScalar *x,*tmp,s1; 1496 const MatScalar *aa = a->a,*v; 1497 const PetscScalar *b; 1498 1499 PetscFunctionBegin; 1500 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1501 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1502 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1503 tmp = a->solve_work; 1504 1505 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1506 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1507 1508 /* copy the b into temp work space according to permutation */ 1509 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1510 1511 /* forward solve the U^T */ 1512 for (i=0; i<n; i++) { 1513 v = aa + diag[i] ; 1514 vi = aj + diag[i] + 1; 1515 nz = ai[i+1] - diag[i] - 1; 1516 s1 = tmp[i]; 1517 s1 *= (*v++); /* multiply by inverse of diagonal entry */ 1518 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1519 tmp[i] = s1; 1520 } 1521 1522 /* backward solve the L^T */ 1523 for (i=n-1; i>=0; i--){ 1524 v = aa + diag[i] - 1 ; 1525 vi = aj + diag[i] - 1 ; 1526 nz = diag[i] - ai[i]; 1527 s1 = tmp[i]; 1528 for (j=0; j>-nz; j--) tmp[vi[j]] -= s1*v[j]; 1529 } 1530 1531 /* copy tmp into x according to permutation */ 1532 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1533 1534 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1535 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1536 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1537 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1538 1539 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1540 PetscFunctionReturn(0); 1541 } 1542 1543 #undef __FUNCT__ 1544 #define __FUNCT__ "MatSolveTransposeAdd_SeqAIJ" 1545 PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx) 1546 { 1547 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 1548 IS iscol = a->col,isrow = a->row; 1549 PetscErrorCode ierr; 1550 const PetscInt *rout,*cout,*r,*c,*adiag = a->diag,*ai = a->i,*aj = a->j,*vi; 1551 PetscInt i,n = A->rmap->n,j; 1552 PetscInt nz; 1553 PetscScalar *x,*tmp,s1; 1554 const MatScalar *aa = a->a,*v; 1555 const PetscScalar *b; 1556 1557 PetscFunctionBegin; 1558 if (zz != xx) {ierr = VecCopy(zz,xx);CHKERRQ(ierr);} 1559 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 1560 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1561 tmp = a->solve_work; 1562 1563 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 1564 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 1565 1566 /* copy the b into temp work space according to permutation */ 1567 for (i=0; i<n; i++) tmp[i] = b[c[i]]; 1568 1569 /* forward solve the U^T */ 1570 for (i=0; i<n; i++) { 1571 v = aa + adiag[i+1] + 1; 1572 vi = aj + adiag[i+1] + 1; 1573 nz = adiag[i] - adiag[i+1] - 1; 1574 s1 = tmp[i]; 1575 s1 *= v[nz]; /* multiply by inverse of diagonal entry */ 1576 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1577 tmp[i] = s1; 1578 } 1579 1580 1581 /* backward solve the L^T */ 1582 for (i=n-1; i>=0; i--){ 1583 v = aa + ai[i] ; 1584 vi = aj + ai[i]; 1585 nz = ai[i+1] - ai[i]; 1586 s1 = tmp[i]; 1587 for (j=0; j<nz; j++) tmp[vi[j]] -= s1*v[j]; 1588 } 1589 1590 /* copy tmp into x according to permutation */ 1591 for (i=0; i<n; i++) x[r[i]] += tmp[i]; 1592 1593 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 1594 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 1595 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1596 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1597 1598 ierr = PetscLogFlops(2.0*a->nz-A->cmap->n);CHKERRQ(ierr); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 /* ----------------------------------------------------------------*/ 1603 1604 extern PetscErrorCode MatDuplicateNoCreate_SeqAIJ(Mat,Mat,MatDuplicateOption,PetscBool ); 1605 1606 /* 1607 ilu() under revised new data structure. 1608 Factored arrays bj and ba are stored as 1609 L(0,:), L(1,:), ...,L(n-1,:), U(n-1,:),...,U(i,:),U(i-1,:),...,U(0,:) 1610 1611 bi=fact->i is an array of size n+1, in which 1612 bi+ 1613 bi[i]: points to 1st entry of L(i,:),i=0,...,n-1 1614 bi[n]: points to L(n-1,n-1)+1 1615 1616 bdiag=fact->diag is an array of size n+1,in which 1617 bdiag[i]: points to diagonal of U(i,:), i=0,...,n-1 1618 bdiag[n]: points to entry of U(n-1,0)-1 1619 1620 U(i,:) contains bdiag[i] as its last entry, i.e., 1621 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 1622 */ 1623 #undef __FUNCT__ 1624 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_ilu0" 1625 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_ilu0(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1626 { 1627 1628 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1629 PetscErrorCode ierr; 1630 const PetscInt n=A->rmap->n,*ai=a->i,*aj,*adiag=a->diag; 1631 PetscInt i,j,k=0,nz,*bi,*bj,*bdiag; 1632 PetscBool missing; 1633 IS isicol; 1634 1635 PetscFunctionBegin; 1636 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1637 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 1638 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 1639 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1640 1641 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_FALSE);CHKERRQ(ierr); 1642 b = (Mat_SeqAIJ*)(fact)->data; 1643 1644 /* allocate matrix arrays for new data structure */ 1645 ierr = PetscMalloc3(ai[n]+1,PetscScalar,&b->a,ai[n]+1,PetscInt,&b->j,n+1,PetscInt,&b->i);CHKERRQ(ierr); 1646 ierr = PetscLogObjectMemory(fact,ai[n]*(sizeof(PetscScalar)+sizeof(PetscInt))+(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1647 b->singlemalloc = PETSC_TRUE; 1648 if (!b->diag){ 1649 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&b->diag);CHKERRQ(ierr); 1650 ierr = PetscLogObjectMemory(fact,(n+1)*sizeof(PetscInt));CHKERRQ(ierr); 1651 } 1652 bdiag = b->diag; 1653 1654 if (n > 0) { 1655 ierr = PetscMemzero(b->a,(ai[n])*sizeof(MatScalar));CHKERRQ(ierr); 1656 } 1657 1658 /* set bi and bj with new data structure */ 1659 bi = b->i; 1660 bj = b->j; 1661 1662 /* L part */ 1663 bi[0] = 0; 1664 for (i=0; i<n; i++){ 1665 nz = adiag[i] - ai[i]; 1666 bi[i+1] = bi[i] + nz; 1667 aj = a->j + ai[i]; 1668 for (j=0; j<nz; j++){ 1669 /* *bj = aj[j]; bj++; */ 1670 bj[k++] = aj[j]; 1671 } 1672 } 1673 1674 /* U part */ 1675 bdiag[n] = bi[n]-1; 1676 for (i=n-1; i>=0; i--){ 1677 nz = ai[i+1] - adiag[i] - 1; 1678 aj = a->j + adiag[i] + 1; 1679 for (j=0; j<nz; j++){ 1680 /* *bj = aj[j]; bj++; */ 1681 bj[k++] = aj[j]; 1682 } 1683 /* diag[i] */ 1684 /* *bj = i; bj++; */ 1685 bj[k++] = i; 1686 bdiag[i] = bdiag[i+1] + nz + 1; 1687 } 1688 1689 fact->factortype = MAT_FACTOR_ILU; 1690 fact->info.factor_mallocs = 0; 1691 fact->info.fill_ratio_given = info->fill; 1692 fact->info.fill_ratio_needed = 1.0; 1693 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1694 1695 b = (Mat_SeqAIJ*)(fact)->data; 1696 b->row = isrow; 1697 b->col = iscol; 1698 b->icol = isicol; 1699 ierr = PetscMalloc((fact->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1700 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1701 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1702 PetscFunctionReturn(0); 1703 } 1704 1705 #undef __FUNCT__ 1706 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ" 1707 PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1708 { 1709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1710 IS isicol; 1711 PetscErrorCode ierr; 1712 const PetscInt *r,*ic; 1713 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j; 1714 PetscInt *bi,*cols,nnz,*cols_lvl; 1715 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1716 PetscInt i,levels,diagonal_fill; 1717 PetscBool col_identity,row_identity; 1718 PetscReal f; 1719 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1720 PetscBT lnkbt; 1721 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1722 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1723 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1724 1725 PetscFunctionBegin; 1726 /* Uncomment the old data struct part only while testing new data structure for MatSolve() */ 1727 /* 1728 PetscBool olddatastruct=PETSC_FALSE; 1729 ierr = PetscOptionsGetBool(PETSC_NULL,"-ilu_old",&olddatastruct,PETSC_NULL);CHKERRQ(ierr); 1730 if(olddatastruct){ 1731 ierr = MatILUFactorSymbolic_SeqAIJ_inplace(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1732 PetscFunctionReturn(0); 1733 } 1734 */ 1735 1736 levels = (PetscInt)info->levels; 1737 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1738 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1739 1740 if (!levels && row_identity && col_identity) { 1741 /* special case: ilu(0) with natural ordering */ 1742 ierr = MatILUFactorSymbolic_SeqAIJ_ilu0(fact,A,isrow,iscol,info);CHKERRQ(ierr); 1743 if (a->inode.size) { 1744 fact->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1745 } 1746 PetscFunctionReturn(0); 1747 } 1748 1749 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1750 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1751 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1752 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1753 1754 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1755 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1756 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1757 bi[0] = bdiag[0] = 0; 1758 1759 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1760 1761 /* create a linked list for storing column indices of the active row */ 1762 nlnk = n + 1; 1763 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1764 1765 /* initial FreeSpace size is f*(ai[n]+1) */ 1766 f = info->fill; 1767 diagonal_fill = (PetscInt)info->diagonal_fill; 1768 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1769 current_space = free_space; 1770 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1771 current_space_lvl = free_space_lvl; 1772 1773 for (i=0; i<n; i++) { 1774 nzi = 0; 1775 /* copy current row into linked list */ 1776 nnz = ai[r[i]+1] - ai[r[i]]; 1777 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1778 cols = aj + ai[r[i]]; 1779 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1780 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1781 nzi += nlnk; 1782 1783 /* make sure diagonal entry is included */ 1784 if (diagonal_fill && lnk[i] == -1) { 1785 fm = n; 1786 while (lnk[fm] < i) fm = lnk[fm]; 1787 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1788 lnk[fm] = i; 1789 lnk_lvl[i] = 0; 1790 nzi++; dcount++; 1791 } 1792 1793 /* add pivot rows into the active row */ 1794 nzbd = 0; 1795 prow = lnk[n]; 1796 while (prow < i) { 1797 nnz = bdiag[prow]; 1798 cols = bj_ptr[prow] + nnz + 1; 1799 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1800 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1801 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1802 nzi += nlnk; 1803 prow = lnk[prow]; 1804 nzbd++; 1805 } 1806 bdiag[i] = nzbd; 1807 bi[i+1] = bi[i] + nzi; 1808 1809 /* if free space is not available, make more free space */ 1810 if (current_space->local_remaining<nzi) { 1811 nnz = 2*nzi*(n - i); /* estimated and max additional space needed */ 1812 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 1813 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 1814 reallocs++; 1815 } 1816 1817 /* copy data into free_space and free_space_lvl, then initialize lnk */ 1818 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 1819 bj_ptr[i] = current_space->array; 1820 bjlvl_ptr[i] = current_space_lvl->array; 1821 1822 /* make sure the active row i has diagonal entry */ 1823 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 1824 1825 current_space->array += nzi; 1826 current_space->local_used += nzi; 1827 current_space->local_remaining -= nzi; 1828 current_space_lvl->array += nzi; 1829 current_space_lvl->local_used += nzi; 1830 current_space_lvl->local_remaining -= nzi; 1831 } 1832 1833 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 1834 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 1835 1836 /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 1837 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 1838 ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr); 1839 1840 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 1841 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 1842 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 1843 1844 #if defined(PETSC_USE_INFO) 1845 { 1846 PetscReal af = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1847 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 1848 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 1849 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 1850 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 1851 if (diagonal_fill) { 1852 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 1853 } 1854 } 1855 #endif 1856 1857 /* put together the new matrix */ 1858 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 1859 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 1860 b = (Mat_SeqAIJ*)(fact)->data; 1861 b->free_a = PETSC_TRUE; 1862 b->free_ij = PETSC_TRUE; 1863 b->singlemalloc = PETSC_FALSE; 1864 ierr = PetscMalloc((bdiag[0]+1)*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 1865 b->j = bj; 1866 b->i = bi; 1867 b->diag = bdiag; 1868 b->ilen = 0; 1869 b->imax = 0; 1870 b->row = isrow; 1871 b->col = iscol; 1872 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1873 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1874 b->icol = isicol; 1875 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1876 /* In b structure: Free imax, ilen, old a, old j. 1877 Allocate bdiag, solve_work, new a, new j */ 1878 ierr = PetscLogObjectMemory(fact,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 1879 b->maxnz = b->nz = bdiag[0]+1; 1880 (fact)->info.factor_mallocs = reallocs; 1881 (fact)->info.fill_ratio_given = f; 1882 (fact)->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 1883 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ; 1884 if (a->inode.size) { 1885 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode; 1886 } 1887 PetscFunctionReturn(0); 1888 } 1889 1890 #undef __FUNCT__ 1891 #define __FUNCT__ "MatILUFactorSymbolic_SeqAIJ_inplace" 1892 PetscErrorCode MatILUFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 1893 { 1894 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b; 1895 IS isicol; 1896 PetscErrorCode ierr; 1897 const PetscInt *r,*ic; 1898 PetscInt n=A->rmap->n,*ai=a->i,*aj=a->j,d; 1899 PetscInt *bi,*cols,nnz,*cols_lvl; 1900 PetscInt *bdiag,prow,fm,nzbd,reallocs=0,dcount=0; 1901 PetscInt i,levels,diagonal_fill; 1902 PetscBool col_identity,row_identity; 1903 PetscReal f; 1904 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL; 1905 PetscBT lnkbt; 1906 PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr; 1907 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 1908 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 1909 PetscBool missing; 1910 1911 PetscFunctionBegin; 1912 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 1913 f = info->fill; 1914 levels = (PetscInt)info->levels; 1915 diagonal_fill = (PetscInt)info->diagonal_fill; 1916 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 1917 1918 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 1919 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 1920 if (!levels && row_identity && col_identity) { /* special case: ilu(0) with natural ordering */ 1921 ierr = MatDuplicateNoCreate_SeqAIJ(fact,A,MAT_DO_NOT_COPY_VALUES,PETSC_TRUE);CHKERRQ(ierr); 1922 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 1923 if (a->inode.size) { 1924 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 1925 } 1926 fact->factortype = MAT_FACTOR_ILU; 1927 (fact)->info.factor_mallocs = 0; 1928 (fact)->info.fill_ratio_given = info->fill; 1929 (fact)->info.fill_ratio_needed = 1.0; 1930 b = (Mat_SeqAIJ*)(fact)->data; 1931 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 1932 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 1933 b->row = isrow; 1934 b->col = iscol; 1935 b->icol = isicol; 1936 ierr = PetscMalloc(((fact)->rmap->n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 1937 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 1938 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 1939 PetscFunctionReturn(0); 1940 } 1941 1942 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 1943 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 1944 1945 /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1946 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 1947 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); 1948 bi[0] = bdiag[0] = 0; 1949 1950 ierr = PetscMalloc2(n,PetscInt*,&bj_ptr,n,PetscInt*,&bjlvl_ptr);CHKERRQ(ierr); 1951 1952 /* create a linked list for storing column indices of the active row */ 1953 nlnk = n + 1; 1954 ierr = PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1955 1956 /* initial FreeSpace size is f*(ai[n]+1) */ 1957 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr); 1958 current_space = free_space; 1959 ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);CHKERRQ(ierr); 1960 current_space_lvl = free_space_lvl; 1961 1962 for (i=0; i<n; i++) { 1963 nzi = 0; 1964 /* copy current row into linked list */ 1965 nnz = ai[r[i]+1] - ai[r[i]]; 1966 if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 1967 cols = aj + ai[r[i]]; 1968 lnk[i] = -1; /* marker to indicate if diagonal exists */ 1969 ierr = PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 1970 nzi += nlnk; 1971 1972 /* make sure diagonal entry is included */ 1973 if (diagonal_fill && lnk[i] == -1) { 1974 fm = n; 1975 while (lnk[fm] < i) fm = lnk[fm]; 1976 lnk[i] = lnk[fm]; /* insert diagonal into linked list */ 1977 lnk[fm] = i; 1978 lnk_lvl[i] = 0; 1979 nzi++; dcount++; 1980 } 1981 1982 /* add pivot rows into the active row */ 1983 nzbd = 0; 1984 prow = lnk[n]; 1985 while (prow < i) { 1986 nnz = bdiag[prow]; 1987 cols = bj_ptr[prow] + nnz + 1; 1988 cols_lvl = bjlvl_ptr[prow] + nnz + 1; 1989 nnz = bi[prow+1] - bi[prow] - nnz - 1; 1990 ierr = PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);CHKERRQ(ierr); 1991 nzi += nlnk; 1992 prow = lnk[prow]; 1993 nzbd++; 1994 } 1995 bdiag[i] = nzbd; 1996 bi[i+1] = bi[i] + nzi; 1997 1998 /* if free space is not available, make more free space */ 1999 if (current_space->local_remaining<nzi) { 2000 nnz = nzi*(n - i); /* estimated and max additional space needed */ 2001 ierr = PetscFreeSpaceGet(nnz,¤t_space);CHKERRQ(ierr); 2002 ierr = PetscFreeSpaceGet(nnz,¤t_space_lvl);CHKERRQ(ierr); 2003 reallocs++; 2004 } 2005 2006 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2007 ierr = PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2008 bj_ptr[i] = current_space->array; 2009 bjlvl_ptr[i] = current_space_lvl->array; 2010 2011 /* make sure the active row i has diagonal entry */ 2012 if (*(bj_ptr[i]+bdiag[i]) != i) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\ntry running with -pc_factor_nonzeros_along_diagonal or -pc_factor_diagonal_fill",i); 2013 2014 current_space->array += nzi; 2015 current_space->local_used += nzi; 2016 current_space->local_remaining -= nzi; 2017 current_space_lvl->array += nzi; 2018 current_space_lvl->local_used += nzi; 2019 current_space_lvl->local_remaining -= nzi; 2020 } 2021 2022 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 2023 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 2024 2025 /* destroy list of free space and other temporary arrays */ 2026 ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 2027 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); /* copy free_space -> bj */ 2028 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2029 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2030 ierr = PetscFree2(bj_ptr,bjlvl_ptr);CHKERRQ(ierr); 2031 2032 #if defined(PETSC_USE_INFO) 2033 { 2034 PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2035 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr); 2036 ierr = PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2037 ierr = PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);CHKERRQ(ierr); 2038 ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr); 2039 if (diagonal_fill) { 2040 ierr = PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);CHKERRQ(ierr); 2041 } 2042 } 2043 #endif 2044 2045 /* put together the new matrix */ 2046 ierr = MatSeqAIJSetPreallocation_SeqAIJ(fact,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 2047 ierr = PetscLogObjectParent(fact,isicol);CHKERRQ(ierr); 2048 b = (Mat_SeqAIJ*)(fact)->data; 2049 b->free_a = PETSC_TRUE; 2050 b->free_ij = PETSC_TRUE; 2051 b->singlemalloc = PETSC_FALSE; 2052 ierr = PetscMalloc(bi[n]*sizeof(PetscScalar),&b->a);CHKERRQ(ierr); 2053 b->j = bj; 2054 b->i = bi; 2055 for (i=0; i<n; i++) bdiag[i] += bi[i]; 2056 b->diag = bdiag; 2057 b->ilen = 0; 2058 b->imax = 0; 2059 b->row = isrow; 2060 b->col = iscol; 2061 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 2062 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 2063 b->icol = isicol; 2064 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2065 /* In b structure: Free imax, ilen, old a, old j. 2066 Allocate bdiag, solve_work, new a, new j */ 2067 ierr = PetscLogObjectMemory(fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));CHKERRQ(ierr); 2068 b->maxnz = b->nz = bi[n] ; 2069 (fact)->info.factor_mallocs = reallocs; 2070 (fact)->info.fill_ratio_given = f; 2071 (fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 2072 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_inplace; 2073 if (a->inode.size) { 2074 (fact)->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_Inode_inplace; 2075 } 2076 PetscFunctionReturn(0); 2077 } 2078 2079 #undef __FUNCT__ 2080 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ" 2081 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat B,Mat A,const MatFactorInfo *info) 2082 { 2083 Mat C = B; 2084 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2085 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2086 IS ip=b->row,iip = b->icol; 2087 PetscErrorCode ierr; 2088 const PetscInt *rip,*riip; 2089 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp; 2090 PetscInt *ai=a->i,*aj=a->j; 2091 PetscInt k,jmin,jmax,*c2r,*il,col,nexti,ili,nz; 2092 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2093 PetscBool perm_identity; 2094 FactorShiftCtx sctx; 2095 PetscReal rs; 2096 MatScalar d,*v; 2097 2098 PetscFunctionBegin; 2099 /* MatPivotSetUp(): initialize shift context sctx */ 2100 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2101 2102 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2103 sctx.shift_top = info->zeropivot; 2104 for (i=0; i<mbs; i++) { 2105 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2106 d = (aa)[a->diag[i]]; 2107 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2108 v = aa+ai[i]; 2109 nz = ai[i+1] - ai[i]; 2110 for (j=0; j<nz; j++) 2111 rs += PetscAbsScalar(v[j]); 2112 if (rs>sctx.shift_top) sctx.shift_top = rs; 2113 } 2114 sctx.shift_top *= 1.1; 2115 sctx.nshift_max = 5; 2116 sctx.shift_lo = 0.; 2117 sctx.shift_hi = 1.; 2118 } 2119 2120 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2121 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2122 2123 /* allocate working arrays 2124 c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col 2125 il: for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays 2126 */ 2127 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&c2r);CHKERRQ(ierr); 2128 2129 do { 2130 sctx.newshift = PETSC_FALSE; 2131 2132 for (i=0; i<mbs; i++) c2r[i] = mbs; 2133 il[0] = 0; 2134 2135 for (k = 0; k<mbs; k++){ 2136 /* zero rtmp */ 2137 nz = bi[k+1] - bi[k]; 2138 bjtmp = bj + bi[k]; 2139 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2140 2141 /* load in initial unfactored row */ 2142 bval = ba + bi[k]; 2143 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2144 for (j = jmin; j < jmax; j++){ 2145 col = riip[aj[j]]; 2146 if (col >= k){ /* only take upper triangular entry */ 2147 rtmp[col] = aa[j]; 2148 *bval++ = 0.0; /* for in-place factorization */ 2149 } 2150 } 2151 /* shift the diagonal of the matrix: ZeropivotApply() */ 2152 rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */ 2153 2154 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2155 dk = rtmp[k]; 2156 i = c2r[k]; /* first row to be added to k_th row */ 2157 2158 while (i < k){ 2159 nexti = c2r[i]; /* next row to be added to k_th row */ 2160 2161 /* compute multiplier, update diag(k) and U(i,k) */ 2162 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2163 uikdi = - ba[ili]*ba[bdiag[i]]; /* diagonal(k) */ 2164 dk += uikdi*ba[ili]; /* update diag[k] */ 2165 ba[ili] = uikdi; /* -U(i,k) */ 2166 2167 /* add multiple of row i to k-th row */ 2168 jmin = ili + 1; jmax = bi[i+1]; 2169 if (jmin < jmax){ 2170 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2171 /* update il and c2r for row i */ 2172 il[i] = jmin; 2173 j = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i; 2174 } 2175 i = nexti; 2176 } 2177 2178 /* copy data into U(k,:) */ 2179 rs = 0.0; 2180 jmin = bi[k]; jmax = bi[k+1]-1; 2181 if (jmin < jmax) { 2182 for (j=jmin; j<jmax; j++){ 2183 col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]); 2184 } 2185 /* add the k-th row into il and c2r */ 2186 il[k] = jmin; 2187 i = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k; 2188 } 2189 2190 /* MatPivotCheck() */ 2191 sctx.rs = rs; 2192 sctx.pv = dk; 2193 ierr = MatPivotCheck(A,info,&sctx,i);CHKERRQ(ierr); 2194 if(sctx.newshift) break; 2195 dk = sctx.pv; 2196 2197 ba[bdiag[k]] = 1.0/dk; /* U(k,k) */ 2198 } 2199 } while (sctx.newshift); 2200 2201 ierr = PetscFree3(rtmp,il,c2r);CHKERRQ(ierr); 2202 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2203 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2204 2205 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2206 if (perm_identity){ 2207 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2208 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering; 2209 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering; 2210 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering; 2211 } else { 2212 B->ops->solve = MatSolve_SeqSBAIJ_1; 2213 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1; 2214 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1; 2215 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1; 2216 } 2217 2218 C->assembled = PETSC_TRUE; 2219 C->preallocated = PETSC_TRUE; 2220 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2221 2222 /* MatPivotView() */ 2223 if (sctx.nshift){ 2224 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2225 ierr = PetscInfo4(A,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,sctx.shift_fraction,sctx.shift_top);CHKERRQ(ierr); 2226 } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2227 ierr = PetscInfo2(A,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2228 } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS){ 2229 ierr = PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %G\n",sctx.nshift,info->shiftamount);CHKERRQ(ierr); 2230 } 2231 } 2232 PetscFunctionReturn(0); 2233 } 2234 2235 #undef __FUNCT__ 2236 #define __FUNCT__ "MatCholeskyFactorNumeric_SeqAIJ_inplace" 2237 PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ_inplace(Mat B,Mat A,const MatFactorInfo *info) 2238 { 2239 Mat C = B; 2240 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 2241 Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data; 2242 IS ip=b->row,iip = b->icol; 2243 PetscErrorCode ierr; 2244 const PetscInt *rip,*riip; 2245 PetscInt i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bcol,*bjtmp; 2246 PetscInt *ai=a->i,*aj=a->j; 2247 PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz; 2248 MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi; 2249 PetscBool perm_identity; 2250 FactorShiftCtx sctx; 2251 PetscReal rs; 2252 MatScalar d,*v; 2253 2254 PetscFunctionBegin; 2255 /* MatPivotSetUp(): initialize shift context sctx */ 2256 ierr = PetscMemzero(&sctx,sizeof(FactorShiftCtx));CHKERRQ(ierr); 2257 2258 if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */ 2259 sctx.shift_top = info->zeropivot; 2260 for (i=0; i<mbs; i++) { 2261 /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */ 2262 d = (aa)[a->diag[i]]; 2263 rs = -PetscAbsScalar(d) - PetscRealPart(d); 2264 v = aa+ai[i]; 2265 nz = ai[i+1] - ai[i]; 2266 for (j=0; j<nz; j++) 2267 rs += PetscAbsScalar(v[j]); 2268 if (rs>sctx.shift_top) sctx.shift_top = rs; 2269 } 2270 sctx.shift_top *= 1.1; 2271 sctx.nshift_max = 5; 2272 sctx.shift_lo = 0.; 2273 sctx.shift_hi = 1.; 2274 } 2275 2276 ierr = ISGetIndices(ip,&rip);CHKERRQ(ierr); 2277 ierr = ISGetIndices(iip,&riip);CHKERRQ(ierr); 2278 2279 /* initialization */ 2280 ierr = PetscMalloc3(mbs,MatScalar,&rtmp,mbs,PetscInt,&il,mbs,PetscInt,&jl);CHKERRQ(ierr); 2281 2282 do { 2283 sctx.newshift = PETSC_FALSE; 2284 2285 for (i=0; i<mbs; i++) jl[i] = mbs; 2286 il[0] = 0; 2287 2288 for (k = 0; k<mbs; k++){ 2289 /* zero rtmp */ 2290 nz = bi[k+1] - bi[k]; 2291 bjtmp = bj + bi[k]; 2292 for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0; 2293 2294 bval = ba + bi[k]; 2295 /* initialize k-th row by the perm[k]-th row of A */ 2296 jmin = ai[rip[k]]; jmax = ai[rip[k]+1]; 2297 for (j = jmin; j < jmax; j++){ 2298 col = riip[aj[j]]; 2299 if (col >= k){ /* only take upper triangular entry */ 2300 rtmp[col] = aa[j]; 2301 *bval++ = 0.0; /* for in-place factorization */ 2302 } 2303 } 2304 /* shift the diagonal of the matrix */ 2305 if (sctx.nshift) rtmp[k] += sctx.shift_amount; 2306 2307 /* modify k-th row by adding in those rows i with U(i,k)!=0 */ 2308 dk = rtmp[k]; 2309 i = jl[k]; /* first row to be added to k_th row */ 2310 2311 while (i < k){ 2312 nexti = jl[i]; /* next row to be added to k_th row */ 2313 2314 /* compute multiplier, update diag(k) and U(i,k) */ 2315 ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */ 2316 uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */ 2317 dk += uikdi*ba[ili]; 2318 ba[ili] = uikdi; /* -U(i,k) */ 2319 2320 /* add multiple of row i to k-th row */ 2321 jmin = ili + 1; jmax = bi[i+1]; 2322 if (jmin < jmax){ 2323 for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j]; 2324 /* update il and jl for row i */ 2325 il[i] = jmin; 2326 j = bj[jmin]; jl[i] = jl[j]; jl[j] = i; 2327 } 2328 i = nexti; 2329 } 2330 2331 /* shift the diagonals when zero pivot is detected */ 2332 /* compute rs=sum of abs(off-diagonal) */ 2333 rs = 0.0; 2334 jmin = bi[k]+1; 2335 nz = bi[k+1] - jmin; 2336 bcol = bj + jmin; 2337 for (j=0; j<nz; j++) { 2338 rs += PetscAbsScalar(rtmp[bcol[j]]); 2339 } 2340 2341 sctx.rs = rs; 2342 sctx.pv = dk; 2343 ierr = MatPivotCheck(A,info,&sctx,k);CHKERRQ(ierr); 2344 if (sctx.newshift) break; 2345 dk = sctx.pv; 2346 2347 /* copy data into U(k,:) */ 2348 ba[bi[k]] = 1.0/dk; /* U(k,k) */ 2349 jmin = bi[k]+1; jmax = bi[k+1]; 2350 if (jmin < jmax) { 2351 for (j=jmin; j<jmax; j++){ 2352 col = bj[j]; ba[j] = rtmp[col]; 2353 } 2354 /* add the k-th row into il and jl */ 2355 il[k] = jmin; 2356 i = bj[jmin]; jl[k] = jl[i]; jl[i] = k; 2357 } 2358 } 2359 } while (sctx.newshift); 2360 2361 ierr = PetscFree3(rtmp,il,jl);CHKERRQ(ierr); 2362 ierr = ISRestoreIndices(ip,&rip);CHKERRQ(ierr); 2363 ierr = ISRestoreIndices(iip,&riip);CHKERRQ(ierr); 2364 2365 ierr = ISIdentity(ip,&perm_identity);CHKERRQ(ierr); 2366 if (perm_identity){ 2367 B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2368 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2369 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2370 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace; 2371 } else { 2372 B->ops->solve = MatSolve_SeqSBAIJ_1_inplace; 2373 B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace; 2374 B->ops->forwardsolve = MatForwardSolve_SeqSBAIJ_1_inplace; 2375 B->ops->backwardsolve = MatBackwardSolve_SeqSBAIJ_1_inplace; 2376 } 2377 2378 C->assembled = PETSC_TRUE; 2379 C->preallocated = PETSC_TRUE; 2380 ierr = PetscLogFlops(C->rmap->n);CHKERRQ(ierr); 2381 if (sctx.nshift){ 2382 if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) { 2383 ierr = PetscInfo2(A,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2384 } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { 2385 ierr = PetscInfo2(A,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);CHKERRQ(ierr); 2386 } 2387 } 2388 PetscFunctionReturn(0); 2389 } 2390 2391 /* 2392 icc() under revised new data structure. 2393 Factored arrays bj and ba are stored as 2394 U(0,:),...,U(i,:),U(n-1,:) 2395 2396 ui=fact->i is an array of size n+1, in which 2397 ui+ 2398 ui[i]: points to 1st entry of U(i,:),i=0,...,n-1 2399 ui[n]: points to U(n-1,n-1)+1 2400 2401 udiag=fact->diag is an array of size n,in which 2402 udiag[i]: points to diagonal of U(i,:), i=0,...,n-1 2403 2404 U(i,:) contains udiag[i] as its last entry, i.e., 2405 U(i,:) = (u[i,i+1],...,u[i,n-1],diag[i]) 2406 */ 2407 2408 #undef __FUNCT__ 2409 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ" 2410 PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2411 { 2412 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2413 Mat_SeqSBAIJ *b; 2414 PetscErrorCode ierr; 2415 PetscBool perm_identity,missing; 2416 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2417 const PetscInt *rip,*riip; 2418 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2419 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2420 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2421 PetscReal fill=info->fill,levels=info->levels; 2422 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2423 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2424 PetscBT lnkbt; 2425 IS iperm; 2426 2427 PetscFunctionBegin; 2428 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2429 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2430 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2431 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2432 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2433 2434 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2435 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2436 ui[0] = 0; 2437 2438 /* ICC(0) without matrix ordering: simply rearrange column indices */ 2439 if (!levels && perm_identity) { 2440 for (i=0; i<am; i++) { 2441 ncols = ai[i+1] - a->diag[i]; 2442 ui[i+1] = ui[i] + ncols; 2443 udiag[i] = ui[i+1] - 1; /* points to the last entry of U(i,:) */ 2444 } 2445 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2446 cols = uj; 2447 for (i=0; i<am; i++) { 2448 aj = a->j + a->diag[i] + 1; /* 1st entry of U(i,:) without diagonal */ 2449 ncols = ai[i+1] - a->diag[i] -1; 2450 for (j=0; j<ncols; j++) *cols++ = aj[j]; 2451 *cols++ = i; /* diagoanl is located as the last entry of U(i,:) */ 2452 } 2453 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2454 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2455 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2456 2457 /* initialization */ 2458 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2459 2460 /* jl: linked list for storing indices of the pivot rows 2461 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2462 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2463 for (i=0; i<am; i++){ 2464 jl[i] = am; il[i] = 0; 2465 } 2466 2467 /* create and initialize a linked list for storing column indices of the active row k */ 2468 nlnk = am + 1; 2469 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2470 2471 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2472 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2473 current_space = free_space; 2474 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space_lvl);CHKERRQ(ierr); 2475 current_space_lvl = free_space_lvl; 2476 2477 for (k=0; k<am; k++){ /* for each active row k */ 2478 /* initialize lnk by the column indices of row rip[k] of A */ 2479 nzk = 0; 2480 ncols = ai[rip[k]+1] - ai[rip[k]]; 2481 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2482 ncols_upper = 0; 2483 for (j=0; j<ncols; j++){ 2484 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2485 if (riip[i] >= k){ /* only take upper triangular entry */ 2486 ajtmp[ncols_upper] = i; 2487 ncols_upper++; 2488 } 2489 } 2490 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2491 nzk += nlnk; 2492 2493 /* update lnk by computing fill-in for each pivot row to be merged in */ 2494 prow = jl[k]; /* 1st pivot row */ 2495 2496 while (prow < k){ 2497 nextprow = jl[prow]; 2498 2499 /* merge prow into k-th row */ 2500 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2501 jmax = ui[prow+1]; 2502 ncols = jmax-jmin; 2503 i = jmin - ui[prow]; 2504 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2505 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2506 j = *(uj - 1); 2507 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2508 nzk += nlnk; 2509 2510 /* update il and jl for prow */ 2511 if (jmin < jmax){ 2512 il[prow] = jmin; 2513 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2514 } 2515 prow = nextprow; 2516 } 2517 2518 /* if free space is not available, make more free space */ 2519 if (current_space->local_remaining<nzk) { 2520 i = am - k + 1; /* num of unfactored rows */ 2521 i *= PetscMin(nzk, i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2522 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2523 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2524 reallocs++; 2525 } 2526 2527 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2528 if (nzk == 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2529 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2530 2531 /* add the k-th row into il and jl */ 2532 if (nzk > 1){ 2533 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2534 jl[k] = jl[i]; jl[i] = k; 2535 il[k] = ui[k] + 1; 2536 } 2537 uj_ptr[k] = current_space->array; 2538 uj_lvl_ptr[k] = current_space_lvl->array; 2539 2540 current_space->array += nzk; 2541 current_space->local_used += nzk; 2542 current_space->local_remaining -= nzk; 2543 2544 current_space_lvl->array += nzk; 2545 current_space_lvl->local_used += nzk; 2546 current_space_lvl->local_remaining -= nzk; 2547 2548 ui[k+1] = ui[k] + nzk; 2549 } 2550 2551 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2552 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2553 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2554 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2555 2556 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2557 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2558 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2559 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2560 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2561 2562 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2563 2564 /* put together the new matrix in MATSEQSBAIJ format */ 2565 b = (Mat_SeqSBAIJ*)(fact)->data; 2566 b->singlemalloc = PETSC_FALSE; 2567 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2568 b->j = uj; 2569 b->i = ui; 2570 b->diag = udiag; 2571 b->free_diag = PETSC_TRUE; 2572 b->ilen = 0; 2573 b->imax = 0; 2574 b->row = perm; 2575 b->col = perm; 2576 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2577 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2578 b->icol = iperm; 2579 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2580 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2581 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2582 b->maxnz = b->nz = ui[am]; 2583 b->free_a = PETSC_TRUE; 2584 b->free_ij = PETSC_TRUE; 2585 2586 fact->info.factor_mallocs = reallocs; 2587 fact->info.fill_ratio_given = fill; 2588 if (ai[am] != 0) { 2589 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2590 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2591 } else { 2592 fact->info.fill_ratio_needed = 0.0; 2593 } 2594 #if defined(PETSC_USE_INFO) 2595 if (ai[am] != 0) { 2596 PetscReal af = fact->info.fill_ratio_needed; 2597 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2598 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2599 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2600 } else { 2601 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2602 } 2603 #endif 2604 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2605 PetscFunctionReturn(0); 2606 } 2607 2608 #undef __FUNCT__ 2609 #define __FUNCT__ "MatICCFactorSymbolic_SeqAIJ_inplace" 2610 PetscErrorCode MatICCFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2611 { 2612 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2613 Mat_SeqSBAIJ *b; 2614 PetscErrorCode ierr; 2615 PetscBool perm_identity,missing; 2616 PetscInt reallocs=0,i,*ai=a->i,*aj=a->j,am=A->rmap->n,*ui,*udiag; 2617 const PetscInt *rip,*riip; 2618 PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow; 2619 PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL,d; 2620 PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr; 2621 PetscReal fill=info->fill,levels=info->levels; 2622 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2623 PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL; 2624 PetscBT lnkbt; 2625 IS iperm; 2626 2627 PetscFunctionBegin; 2628 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2629 ierr = MatMissingDiagonal(A,&missing,&d);CHKERRQ(ierr); 2630 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d); 2631 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2632 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2633 2634 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2635 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2636 ui[0] = 0; 2637 2638 /* ICC(0) without matrix ordering: simply copies fill pattern */ 2639 if (!levels && perm_identity) { 2640 2641 for (i=0; i<am; i++) { 2642 ui[i+1] = ui[i] + ai[i+1] - a->diag[i]; 2643 udiag[i] = ui[i]; 2644 } 2645 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2646 cols = uj; 2647 for (i=0; i<am; i++) { 2648 aj = a->j + a->diag[i]; 2649 ncols = ui[i+1] - ui[i]; 2650 for (j=0; j<ncols; j++) *cols++ = *aj++; 2651 } 2652 } else { /* case: levels>0 || (levels=0 && !perm_identity) */ 2653 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2654 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2655 2656 /* initialization */ 2657 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);CHKERRQ(ierr); 2658 2659 /* jl: linked list for storing indices of the pivot rows 2660 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2661 ierr = PetscMalloc4(am,PetscInt*,&uj_ptr,am,PetscInt*,&uj_lvl_ptr,am,PetscInt,&jl,am,PetscInt,&il);CHKERRQ(ierr); 2662 for (i=0; i<am; i++){ 2663 jl[i] = am; il[i] = 0; 2664 } 2665 2666 /* create and initialize a linked list for storing column indices of the active row k */ 2667 nlnk = am + 1; 2668 ierr = PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2669 2670 /* initial FreeSpace size is fill*(ai[am]+1) */ 2671 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 2672 current_space = free_space; 2673 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);CHKERRQ(ierr); 2674 current_space_lvl = free_space_lvl; 2675 2676 for (k=0; k<am; k++){ /* for each active row k */ 2677 /* initialize lnk by the column indices of row rip[k] of A */ 2678 nzk = 0; 2679 ncols = ai[rip[k]+1] - ai[rip[k]]; 2680 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2681 ncols_upper = 0; 2682 for (j=0; j<ncols; j++){ 2683 i = *(aj + ai[rip[k]] + j); /* unpermuted column index */ 2684 if (riip[i] >= k){ /* only take upper triangular entry */ 2685 ajtmp[ncols_upper] = i; 2686 ncols_upper++; 2687 } 2688 } 2689 ierr = PetscIncompleteLLInit(ncols_upper,ajtmp,am,riip,nlnk,lnk,lnk_lvl,lnkbt);CHKERRQ(ierr); 2690 nzk += nlnk; 2691 2692 /* update lnk by computing fill-in for each pivot row to be merged in */ 2693 prow = jl[k]; /* 1st pivot row */ 2694 2695 while (prow < k){ 2696 nextprow = jl[prow]; 2697 2698 /* merge prow into k-th row */ 2699 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2700 jmax = ui[prow+1]; 2701 ncols = jmax-jmin; 2702 i = jmin - ui[prow]; 2703 cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2704 uj = uj_lvl_ptr[prow] + i; /* levels of cols */ 2705 j = *(uj - 1); 2706 ierr = PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);CHKERRQ(ierr); 2707 nzk += nlnk; 2708 2709 /* update il and jl for prow */ 2710 if (jmin < jmax){ 2711 il[prow] = jmin; 2712 j = *cols; jl[prow] = jl[j]; jl[j] = prow; 2713 } 2714 prow = nextprow; 2715 } 2716 2717 /* if free space is not available, make more free space */ 2718 if (current_space->local_remaining<nzk) { 2719 i = am - k + 1; /* num of unfactored rows */ 2720 i *= PetscMin(nzk, (i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2721 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2722 ierr = PetscFreeSpaceGet(i,¤t_space_lvl);CHKERRQ(ierr); 2723 reallocs++; 2724 } 2725 2726 /* copy data into free_space and free_space_lvl, then initialize lnk */ 2727 if (!nzk) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Empty row %D in ICC matrix factor",k); 2728 ierr = PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);CHKERRQ(ierr); 2729 2730 /* add the k-th row into il and jl */ 2731 if (nzk > 1){ 2732 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2733 jl[k] = jl[i]; jl[i] = k; 2734 il[k] = ui[k] + 1; 2735 } 2736 uj_ptr[k] = current_space->array; 2737 uj_lvl_ptr[k] = current_space_lvl->array; 2738 2739 current_space->array += nzk; 2740 current_space->local_used += nzk; 2741 current_space->local_remaining -= nzk; 2742 2743 current_space_lvl->array += nzk; 2744 current_space_lvl->local_used += nzk; 2745 current_space_lvl->local_remaining -= nzk; 2746 2747 ui[k+1] = ui[k] + nzk; 2748 } 2749 2750 #if defined(PETSC_USE_INFO) 2751 if (ai[am] != 0) { 2752 PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]); 2753 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2754 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2755 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2756 } else { 2757 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2758 } 2759 #endif 2760 2761 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2762 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2763 ierr = PetscFree4(uj_ptr,uj_lvl_ptr,jl,il);CHKERRQ(ierr); 2764 ierr = PetscFree(ajtmp);CHKERRQ(ierr); 2765 2766 /* destroy list of free space and other temporary array(s) */ 2767 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2768 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 2769 ierr = PetscIncompleteLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2770 ierr = PetscFreeSpaceDestroy(free_space_lvl);CHKERRQ(ierr); 2771 2772 } /* end of case: levels>0 || (levels=0 && !perm_identity) */ 2773 2774 /* put together the new matrix in MATSEQSBAIJ format */ 2775 2776 b = (Mat_SeqSBAIJ*)fact->data; 2777 b->singlemalloc = PETSC_FALSE; 2778 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2779 b->j = uj; 2780 b->i = ui; 2781 b->diag = udiag; 2782 b->free_diag = PETSC_TRUE; 2783 b->ilen = 0; 2784 b->imax = 0; 2785 b->row = perm; 2786 b->col = perm; 2787 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2788 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2789 b->icol = iperm; 2790 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2791 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2792 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2793 b->maxnz = b->nz = ui[am]; 2794 b->free_a = PETSC_TRUE; 2795 b->free_ij = PETSC_TRUE; 2796 2797 fact->info.factor_mallocs = reallocs; 2798 fact->info.fill_ratio_given = fill; 2799 if (ai[am] != 0) { 2800 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 2801 } else { 2802 fact->info.fill_ratio_needed = 0.0; 2803 } 2804 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 2805 PetscFunctionReturn(0); 2806 } 2807 2808 #undef __FUNCT__ 2809 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ" 2810 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2811 { 2812 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2813 Mat_SeqSBAIJ *b; 2814 PetscErrorCode ierr; 2815 PetscBool perm_identity; 2816 PetscReal fill = info->fill; 2817 const PetscInt *rip,*riip; 2818 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2819 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2820 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr,*udiag; 2821 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2822 PetscBT lnkbt; 2823 IS iperm; 2824 2825 PetscFunctionBegin; 2826 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2827 /* check whether perm is the identity mapping */ 2828 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2829 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2830 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2831 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2832 2833 /* initialization */ 2834 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2835 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&udiag);CHKERRQ(ierr); 2836 ui[0] = 0; 2837 2838 /* jl: linked list for storing indices of the pivot rows 2839 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 2840 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 2841 for (i=0; i<am; i++){ 2842 jl[i] = am; il[i] = 0; 2843 } 2844 2845 /* create and initialize a linked list for storing column indices of the active row k */ 2846 nlnk = am + 1; 2847 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2848 2849 /* initial FreeSpace size is fill*(ai[am]+am)/2 */ 2850 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+am)/2),&free_space);CHKERRQ(ierr); 2851 current_space = free_space; 2852 2853 for (k=0; k<am; k++){ /* for each active row k */ 2854 /* initialize lnk by the column indices of row rip[k] of A */ 2855 nzk = 0; 2856 ncols = ai[rip[k]+1] - ai[rip[k]]; 2857 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 2858 ncols_upper = 0; 2859 for (j=0; j<ncols; j++){ 2860 i = riip[*(aj + ai[rip[k]] + j)]; 2861 if (i >= k){ /* only take upper triangular entry */ 2862 cols[ncols_upper] = i; 2863 ncols_upper++; 2864 } 2865 } 2866 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2867 nzk += nlnk; 2868 2869 /* update lnk by computing fill-in for each pivot row to be merged in */ 2870 prow = jl[k]; /* 1st pivot row */ 2871 2872 while (prow < k){ 2873 nextprow = jl[prow]; 2874 /* merge prow into k-th row */ 2875 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 2876 jmax = ui[prow+1]; 2877 ncols = jmax-jmin; 2878 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 2879 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 2880 nzk += nlnk; 2881 2882 /* update il and jl for prow */ 2883 if (jmin < jmax){ 2884 il[prow] = jmin; 2885 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 2886 } 2887 prow = nextprow; 2888 } 2889 2890 /* if free space is not available, make more free space */ 2891 if (current_space->local_remaining<nzk) { 2892 i = am - k + 1; /* num of unfactored rows */ 2893 i *= PetscMin(nzk,i-1); /* i*nzk, i*(i-1): estimated and max additional space needed */ 2894 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 2895 reallocs++; 2896 } 2897 2898 /* copy data into free space, then initialize lnk */ 2899 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 2900 2901 /* add the k-th row into il and jl */ 2902 if (nzk > 1){ 2903 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 2904 jl[k] = jl[i]; jl[i] = k; 2905 il[k] = ui[k] + 1; 2906 } 2907 ui_ptr[k] = current_space->array; 2908 current_space->array += nzk; 2909 current_space->local_used += nzk; 2910 current_space->local_remaining -= nzk; 2911 2912 ui[k+1] = ui[k] + nzk; 2913 } 2914 2915 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 2916 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 2917 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 2918 2919 /* copy free_space into uj and free free_space; set ui, uj, udiag in new datastructure; */ 2920 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 2921 ierr = PetscFreeSpaceContiguous_Cholesky(&free_space,uj,am,ui,udiag);CHKERRQ(ierr); /* store matrix factor */ 2922 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 2923 2924 /* put together the new matrix in MATSEQSBAIJ format */ 2925 2926 b = (Mat_SeqSBAIJ*)fact->data; 2927 b->singlemalloc = PETSC_FALSE; 2928 b->free_a = PETSC_TRUE; 2929 b->free_ij = PETSC_TRUE; 2930 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 2931 b->j = uj; 2932 b->i = ui; 2933 b->diag = udiag; 2934 b->free_diag = PETSC_TRUE; 2935 b->ilen = 0; 2936 b->imax = 0; 2937 b->row = perm; 2938 b->col = perm; 2939 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2940 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 2941 b->icol = iperm; 2942 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 2943 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 2944 ierr = PetscLogObjectMemory(fact,ui[am]*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 2945 b->maxnz = b->nz = ui[am]; 2946 2947 fact->info.factor_mallocs = reallocs; 2948 fact->info.fill_ratio_given = fill; 2949 if (ai[am] != 0) { 2950 /* nonzeros in lower triangular part of A (including diagonals) = (ai[am]+am)/2 */ 2951 fact->info.fill_ratio_needed = ((PetscReal)2*ui[am])/(ai[am]+am); 2952 } else { 2953 fact->info.fill_ratio_needed = 0.0; 2954 } 2955 #if defined(PETSC_USE_INFO) 2956 if (ai[am] != 0) { 2957 PetscReal af = fact->info.fill_ratio_needed; 2958 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 2959 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 2960 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 2961 } else { 2962 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 2963 } 2964 #endif 2965 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ; 2966 PetscFunctionReturn(0); 2967 } 2968 2969 #undef __FUNCT__ 2970 #define __FUNCT__ "MatCholeskyFactorSymbolic_SeqAIJ_inplace" 2971 PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info) 2972 { 2973 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 2974 Mat_SeqSBAIJ *b; 2975 PetscErrorCode ierr; 2976 PetscBool perm_identity; 2977 PetscReal fill = info->fill; 2978 const PetscInt *rip,*riip; 2979 PetscInt i,am=A->rmap->n,*ai=a->i,*aj=a->j,reallocs=0,prow; 2980 PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow; 2981 PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr; 2982 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2983 PetscBT lnkbt; 2984 IS iperm; 2985 2986 PetscFunctionBegin; 2987 if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n); 2988 /* check whether perm is the identity mapping */ 2989 ierr = ISIdentity(perm,&perm_identity);CHKERRQ(ierr); 2990 ierr = ISInvertPermutation(perm,PETSC_DECIDE,&iperm);CHKERRQ(ierr); 2991 ierr = ISGetIndices(iperm,&riip);CHKERRQ(ierr); 2992 ierr = ISGetIndices(perm,&rip);CHKERRQ(ierr); 2993 2994 /* initialization */ 2995 ierr = PetscMalloc((am+1)*sizeof(PetscInt),&ui);CHKERRQ(ierr); 2996 ui[0] = 0; 2997 2998 /* jl: linked list for storing indices of the pivot rows 2999 il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */ 3000 ierr = PetscMalloc4(am,PetscInt*,&ui_ptr,am,PetscInt,&jl,am,PetscInt,&il,am,PetscInt,&cols);CHKERRQ(ierr); 3001 for (i=0; i<am; i++){ 3002 jl[i] = am; il[i] = 0; 3003 } 3004 3005 /* create and initialize a linked list for storing column indices of the active row k */ 3006 nlnk = am + 1; 3007 ierr = PetscLLCreate(am,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3008 3009 /* initial FreeSpace size is fill*(ai[am]+1) */ 3010 ierr = PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);CHKERRQ(ierr); 3011 current_space = free_space; 3012 3013 for (k=0; k<am; k++){ /* for each active row k */ 3014 /* initialize lnk by the column indices of row rip[k] of A */ 3015 nzk = 0; 3016 ncols = ai[rip[k]+1] - ai[rip[k]]; 3017 if (!ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",rip[k],k); 3018 ncols_upper = 0; 3019 for (j=0; j<ncols; j++){ 3020 i = riip[*(aj + ai[rip[k]] + j)]; 3021 if (i >= k){ /* only take upper triangular entry */ 3022 cols[ncols_upper] = i; 3023 ncols_upper++; 3024 } 3025 } 3026 ierr = PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3027 nzk += nlnk; 3028 3029 /* update lnk by computing fill-in for each pivot row to be merged in */ 3030 prow = jl[k]; /* 1st pivot row */ 3031 3032 while (prow < k){ 3033 nextprow = jl[prow]; 3034 /* merge prow into k-th row */ 3035 jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */ 3036 jmax = ui[prow+1]; 3037 ncols = jmax-jmin; 3038 uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */ 3039 ierr = PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3040 nzk += nlnk; 3041 3042 /* update il and jl for prow */ 3043 if (jmin < jmax){ 3044 il[prow] = jmin; 3045 j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow; 3046 } 3047 prow = nextprow; 3048 } 3049 3050 /* if free space is not available, make more free space */ 3051 if (current_space->local_remaining<nzk) { 3052 i = am - k + 1; /* num of unfactored rows */ 3053 i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */ 3054 ierr = PetscFreeSpaceGet(i,¤t_space);CHKERRQ(ierr); 3055 reallocs++; 3056 } 3057 3058 /* copy data into free space, then initialize lnk */ 3059 ierr = PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 3060 3061 /* add the k-th row into il and jl */ 3062 if (nzk-1 > 0){ 3063 i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */ 3064 jl[k] = jl[i]; jl[i] = k; 3065 il[k] = ui[k] + 1; 3066 } 3067 ui_ptr[k] = current_space->array; 3068 current_space->array += nzk; 3069 current_space->local_used += nzk; 3070 current_space->local_remaining -= nzk; 3071 3072 ui[k+1] = ui[k] + nzk; 3073 } 3074 3075 #if defined(PETSC_USE_INFO) 3076 if (ai[am] != 0) { 3077 PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]); 3078 ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);CHKERRQ(ierr); 3079 ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr); 3080 ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);CHKERRQ(ierr); 3081 } else { 3082 ierr = PetscInfo(A,"Empty matrix.\n");CHKERRQ(ierr); 3083 } 3084 #endif 3085 3086 ierr = ISRestoreIndices(perm,&rip);CHKERRQ(ierr); 3087 ierr = ISRestoreIndices(iperm,&riip);CHKERRQ(ierr); 3088 ierr = PetscFree4(ui_ptr,jl,il,cols);CHKERRQ(ierr); 3089 3090 /* destroy list of free space and other temporary array(s) */ 3091 ierr = PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);CHKERRQ(ierr); 3092 ierr = PetscFreeSpaceContiguous(&free_space,uj);CHKERRQ(ierr); 3093 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3094 3095 /* put together the new matrix in MATSEQSBAIJ format */ 3096 3097 b = (Mat_SeqSBAIJ*)fact->data; 3098 b->singlemalloc = PETSC_FALSE; 3099 b->free_a = PETSC_TRUE; 3100 b->free_ij = PETSC_TRUE; 3101 ierr = PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);CHKERRQ(ierr); 3102 b->j = uj; 3103 b->i = ui; 3104 b->diag = 0; 3105 b->ilen = 0; 3106 b->imax = 0; 3107 b->row = perm; 3108 b->col = perm; 3109 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3110 ierr = PetscObjectReference((PetscObject)perm);CHKERRQ(ierr); 3111 b->icol = iperm; 3112 b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */ 3113 ierr = PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3114 ierr = PetscLogObjectMemory(fact,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3115 b->maxnz = b->nz = ui[am]; 3116 3117 fact->info.factor_mallocs = reallocs; 3118 fact->info.fill_ratio_given = fill; 3119 if (ai[am] != 0) { 3120 fact->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]); 3121 } else { 3122 fact->info.fill_ratio_needed = 0.0; 3123 } 3124 fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ_inplace; 3125 PetscFunctionReturn(0); 3126 } 3127 3128 #undef __FUNCT__ 3129 #define __FUNCT__ "MatSolve_SeqAIJ_NaturalOrdering" 3130 PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx) 3131 { 3132 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3133 PetscErrorCode ierr; 3134 PetscInt n = A->rmap->n; 3135 const PetscInt *ai = a->i,*aj = a->j,*adiag = a->diag,*vi; 3136 PetscScalar *x,sum; 3137 const PetscScalar *b; 3138 const MatScalar *aa = a->a,*v; 3139 PetscInt i,nz; 3140 3141 PetscFunctionBegin; 3142 if (!n) PetscFunctionReturn(0); 3143 3144 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3145 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3146 3147 /* forward solve the lower triangular */ 3148 x[0] = b[0]; 3149 v = aa; 3150 vi = aj; 3151 for (i=1; i<n; i++) { 3152 nz = ai[i+1] - ai[i]; 3153 sum = b[i]; 3154 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3155 v += nz; 3156 vi += nz; 3157 x[i] = sum; 3158 } 3159 3160 /* backward solve the upper triangular */ 3161 for (i=n-1; i>=0; i--){ 3162 v = aa + adiag[i+1] + 1; 3163 vi = aj + adiag[i+1] + 1; 3164 nz = adiag[i] - adiag[i+1]-1; 3165 sum = x[i]; 3166 PetscSparseDenseMinusDot(sum,x,v,vi,nz); 3167 x[i] = sum*v[nz]; /* x[i]=aa[adiag[i]]*sum; v++; */ 3168 } 3169 3170 ierr = PetscLogFlops(2.0*a->nz - A->cmap->n);CHKERRQ(ierr); 3171 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3172 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3173 PetscFunctionReturn(0); 3174 } 3175 3176 #undef __FUNCT__ 3177 #define __FUNCT__ "MatSolve_SeqAIJ" 3178 PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx) 3179 { 3180 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 3181 IS iscol = a->col,isrow = a->row; 3182 PetscErrorCode ierr; 3183 PetscInt i,n=A->rmap->n,*vi,*ai=a->i,*aj=a->j,*adiag = a->diag,nz; 3184 const PetscInt *rout,*cout,*r,*c; 3185 PetscScalar *x,*tmp,sum; 3186 const PetscScalar *b; 3187 const MatScalar *aa = a->a,*v; 3188 3189 PetscFunctionBegin; 3190 if (!n) PetscFunctionReturn(0); 3191 3192 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 3193 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 3194 tmp = a->solve_work; 3195 3196 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 3197 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 3198 3199 /* forward solve the lower triangular */ 3200 tmp[0] = b[r[0]]; 3201 v = aa; 3202 vi = aj; 3203 for (i=1; i<n; i++) { 3204 nz = ai[i+1] - ai[i]; 3205 sum = b[r[i]]; 3206 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3207 tmp[i] = sum; 3208 v += nz; vi += nz; 3209 } 3210 3211 /* backward solve the upper triangular */ 3212 for (i=n-1; i>=0; i--){ 3213 v = aa + adiag[i+1]+1; 3214 vi = aj + adiag[i+1]+1; 3215 nz = adiag[i]-adiag[i+1]-1; 3216 sum = tmp[i]; 3217 PetscSparseDenseMinusDot(sum,tmp,v,vi,nz); 3218 x[c[i]] = tmp[i] = sum*v[nz]; /* v[nz] = aa[adiag[i]] */ 3219 } 3220 3221 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 3222 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 3223 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 3224 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 3225 ierr = PetscLogFlops(2*a->nz - A->cmap->n);CHKERRQ(ierr); 3226 PetscFunctionReturn(0); 3227 } 3228 3229 #undef __FUNCT__ 3230 #define __FUNCT__ "MatILUDTFactor_SeqAIJ" 3231 /* 3232 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3233 */ 3234 PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,const MatFactorInfo *info,Mat *fact) 3235 { 3236 Mat B = *fact; 3237 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b; 3238 IS isicol; 3239 PetscErrorCode ierr; 3240 const PetscInt *r,*ic; 3241 PetscInt i,n=A->rmap->n,*ai=a->i,*aj=a->j,*ajtmp,*adiag; 3242 PetscInt *bi,*bj,*bdiag,*bdiag_rev; 3243 PetscInt row,nzi,nzi_bl,nzi_bu,*im,nzi_al,nzi_au; 3244 PetscInt nlnk,*lnk; 3245 PetscBT lnkbt; 3246 PetscBool row_identity,icol_identity; 3247 MatScalar *aatmp,*pv,*batmp,*ba,*rtmp,*pc,multiplier,*vtmp,diag_tmp; 3248 const PetscInt *ics; 3249 PetscInt j,nz,*pj,*bjtmp,k,ncut,*jtmp; 3250 PetscReal dt=info->dt,dtcol=info->dtcol,shift=info->shiftamount; 3251 PetscInt dtcount=(PetscInt)info->dtcount,nnz_max; 3252 PetscBool missing; 3253 3254 PetscFunctionBegin; 3255 3256 if (dt == PETSC_DEFAULT) dt = 0.005; 3257 if (dtcol == PETSC_DEFAULT) dtcol = 0.01; /* XXX unused! */ 3258 if (dtcount == PETSC_DEFAULT) dtcount = (PetscInt)(1.5*a->rmax); 3259 3260 /* ------- symbolic factorization, can be reused ---------*/ 3261 ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr); 3262 if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i); 3263 adiag=a->diag; 3264 3265 ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr); 3266 3267 /* bdiag is location of diagonal in factor */ 3268 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr); /* becomes b->diag */ 3269 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag_rev);CHKERRQ(ierr); /* temporary */ 3270 3271 /* allocate row pointers bi */ 3272 ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr); 3273 3274 /* allocate bj and ba; max num of nonzero entries is (ai[n]+2*n*dtcount+2) */ 3275 if (dtcount > n-1) dtcount = n-1; /* diagonal is excluded */ 3276 nnz_max = ai[n]+2*n*dtcount+2; 3277 3278 ierr = PetscMalloc((nnz_max+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr); 3279 ierr = PetscMalloc((nnz_max+1)*sizeof(MatScalar),&ba);CHKERRQ(ierr); 3280 3281 /* put together the new matrix */ 3282 ierr = MatSeqAIJSetPreallocation_SeqAIJ(B,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr); 3283 ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr); 3284 b = (Mat_SeqAIJ*)B->data; 3285 b->free_a = PETSC_TRUE; 3286 b->free_ij = PETSC_TRUE; 3287 b->singlemalloc = PETSC_FALSE; 3288 b->a = ba; 3289 b->j = bj; 3290 b->i = bi; 3291 b->diag = bdiag; 3292 b->ilen = 0; 3293 b->imax = 0; 3294 b->row = isrow; 3295 b->col = iscol; 3296 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 3297 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 3298 b->icol = isicol; 3299 ierr = PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr); 3300 3301 ierr = PetscLogObjectMemory(B,nnz_max*(sizeof(PetscInt)+sizeof(MatScalar)));CHKERRQ(ierr); 3302 b->maxnz = nnz_max; 3303 3304 B->factortype = MAT_FACTOR_ILUDT; 3305 B->info.factor_mallocs = 0; 3306 B->info.fill_ratio_given = ((PetscReal)nnz_max)/((PetscReal)ai[n]); 3307 CHKMEMQ; 3308 /* ------- end of symbolic factorization ---------*/ 3309 3310 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3311 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3312 ics = ic; 3313 3314 /* linked list for storing column indices of the active row */ 3315 nlnk = n + 1; 3316 ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3317 3318 /* im: used by PetscLLAddSortedLU(); jtmp: working array for column indices of active row */ 3319 ierr = PetscMalloc2(n,PetscInt,&im,n,PetscInt,&jtmp);CHKERRQ(ierr); 3320 /* rtmp, vtmp: working arrays for sparse and contiguous row entries of active row */ 3321 ierr = PetscMalloc2(n,MatScalar,&rtmp,n,MatScalar,&vtmp);CHKERRQ(ierr); 3322 ierr = PetscMemzero(rtmp,n*sizeof(MatScalar));CHKERRQ(ierr); 3323 3324 bi[0] = 0; 3325 bdiag[0] = nnz_max-1; /* location of diag[0] in factor B */ 3326 bdiag_rev[n] = bdiag[0]; 3327 bi[2*n+1] = bdiag[0]+1; /* endof bj and ba array */ 3328 for (i=0; i<n; i++) { 3329 /* copy initial fill into linked list */ 3330 nzi = 0; /* nonzeros for active row i */ 3331 nzi = ai[r[i]+1] - ai[r[i]]; 3332 if (!nzi) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i); 3333 nzi_al = adiag[r[i]] - ai[r[i]]; 3334 nzi_au = ai[r[i]+1] - adiag[r[i]] -1; 3335 ajtmp = aj + ai[r[i]]; 3336 ierr = PetscLLAddPerm(nzi,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr); 3337 3338 /* load in initial (unfactored row) */ 3339 aatmp = a->a + ai[r[i]]; 3340 for (j=0; j<nzi; j++) { 3341 rtmp[ics[*ajtmp++]] = *aatmp++; 3342 } 3343 3344 /* add pivot rows into linked list */ 3345 row = lnk[n]; 3346 while (row < i ) { 3347 nzi_bl = bi[row+1] - bi[row] + 1; 3348 bjtmp = bj + bdiag[row+1]+1; /* points to 1st column next to the diagonal in U */ 3349 ierr = PetscLLAddSortedLU(bjtmp,row,nlnk,lnk,lnkbt,i,nzi_bl,im);CHKERRQ(ierr); 3350 nzi += nlnk; 3351 row = lnk[row]; 3352 } 3353 3354 /* copy data from lnk into jtmp, then initialize lnk */ 3355 ierr = PetscLLClean(n,n,nzi,lnk,jtmp,lnkbt);CHKERRQ(ierr); 3356 3357 /* numerical factorization */ 3358 bjtmp = jtmp; 3359 row = *bjtmp++; /* 1st pivot row */ 3360 while ( row < i ) { 3361 pc = rtmp + row; 3362 pv = ba + bdiag[row]; /* 1./(diag of the pivot row) */ 3363 multiplier = (*pc) * (*pv); 3364 *pc = multiplier; 3365 if (PetscAbsScalar(*pc) > dt){ /* apply tolerance dropping rule */ 3366 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3367 pv = ba + bdiag[row+1] + 1; 3368 /* if (multiplier < -1.0 or multiplier >1.0) printf("row/prow %d, %d, multiplier %g\n",i,row,multiplier); */ 3369 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3370 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3371 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3372 } 3373 row = *bjtmp++; 3374 } 3375 3376 /* copy sparse rtmp into contiguous vtmp; separate L and U part */ 3377 diag_tmp = rtmp[i]; /* save diagonal value - may not needed?? */ 3378 nzi_bl = 0; j = 0; 3379 while (jtmp[j] < i){ /* Note: jtmp is sorted */ 3380 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3381 nzi_bl++; j++; 3382 } 3383 nzi_bu = nzi - nzi_bl -1; 3384 while (j < nzi){ 3385 vtmp[j] = rtmp[jtmp[j]]; rtmp[jtmp[j]]=0.0; 3386 j++; 3387 } 3388 3389 bjtmp = bj + bi[i]; 3390 batmp = ba + bi[i]; 3391 /* apply level dropping rule to L part */ 3392 ncut = nzi_al + dtcount; 3393 if (ncut < nzi_bl){ 3394 ierr = PetscSortSplit(ncut,nzi_bl,vtmp,jtmp);CHKERRQ(ierr); 3395 ierr = PetscSortIntWithScalarArray(ncut,jtmp,vtmp);CHKERRQ(ierr); 3396 } else { 3397 ncut = nzi_bl; 3398 } 3399 for (j=0; j<ncut; j++){ 3400 bjtmp[j] = jtmp[j]; 3401 batmp[j] = vtmp[j]; 3402 /* printf(" (%d,%g),",bjtmp[j],batmp[j]); */ 3403 } 3404 bi[i+1] = bi[i] + ncut; 3405 nzi = ncut + 1; 3406 3407 /* apply level dropping rule to U part */ 3408 ncut = nzi_au + dtcount; 3409 if (ncut < nzi_bu){ 3410 ierr = PetscSortSplit(ncut,nzi_bu,vtmp+nzi_bl+1,jtmp+nzi_bl+1);CHKERRQ(ierr); 3411 ierr = PetscSortIntWithScalarArray(ncut,jtmp+nzi_bl+1,vtmp+nzi_bl+1);CHKERRQ(ierr); 3412 } else { 3413 ncut = nzi_bu; 3414 } 3415 nzi += ncut; 3416 3417 /* mark bdiagonal */ 3418 bdiag[i+1] = bdiag[i] - (ncut + 1); 3419 bdiag_rev[n-i-1] = bdiag[i+1]; 3420 bi[2*n - i] = bi[2*n - i +1] - (ncut + 1); 3421 bjtmp = bj + bdiag[i]; 3422 batmp = ba + bdiag[i]; 3423 *bjtmp = i; 3424 *batmp = diag_tmp; /* rtmp[i]; */ 3425 if (*batmp == 0.0) { 3426 *batmp = dt+shift; 3427 /* printf(" row %d add shift %g\n",i,shift); */ 3428 } 3429 *batmp = 1.0/(*batmp); /* invert diagonal entries for simplier triangular solves */ 3430 /* printf(" (%d,%g),",*bjtmp,*batmp); */ 3431 3432 bjtmp = bj + bdiag[i+1]+1; 3433 batmp = ba + bdiag[i+1]+1; 3434 for (k=0; k<ncut; k++){ 3435 bjtmp[k] = jtmp[nzi_bl+1+k]; 3436 batmp[k] = vtmp[nzi_bl+1+k]; 3437 /* printf(" (%d,%g),",bjtmp[k],batmp[k]); */ 3438 } 3439 /* printf("\n"); */ 3440 3441 im[i] = nzi; /* used by PetscLLAddSortedLU() */ 3442 /* 3443 printf("row %d: bi %d, bdiag %d\n",i,bi[i],bdiag[i]); 3444 printf(" ----------------------------\n"); 3445 */ 3446 } /* for (i=0; i<n; i++) */ 3447 /* printf("end of L %d, beginning of U %d\n",bi[n],bdiag[n]); */ 3448 if (bi[n] >= bdiag[n]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"end of L array %d cannot >= the beginning of U array %d",bi[n],bdiag[n]); 3449 3450 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3451 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3452 3453 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 3454 ierr = PetscFree2(im,jtmp);CHKERRQ(ierr); 3455 ierr = PetscFree2(rtmp,vtmp);CHKERRQ(ierr); 3456 ierr = PetscFree(bdiag_rev);CHKERRQ(ierr); 3457 3458 ierr = PetscLogFlops(B->cmap->n);CHKERRQ(ierr); 3459 b->maxnz = b->nz = bi[n] + bdiag[0] - bdiag[n]; 3460 3461 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3462 ierr = ISIdentity(isicol,&icol_identity);CHKERRQ(ierr); 3463 if (row_identity && icol_identity) { 3464 B->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3465 } else { 3466 B->ops->solve = MatSolve_SeqAIJ; 3467 } 3468 3469 B->ops->solveadd = 0; 3470 B->ops->solvetranspose = 0; 3471 B->ops->solvetransposeadd = 0; 3472 B->ops->matsolve = 0; 3473 B->assembled = PETSC_TRUE; 3474 B->preallocated = PETSC_TRUE; 3475 PetscFunctionReturn(0); 3476 } 3477 3478 /* a wraper of MatILUDTFactor_SeqAIJ() */ 3479 #undef __FUNCT__ 3480 #define __FUNCT__ "MatILUDTFactorSymbolic_SeqAIJ" 3481 /* 3482 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3483 */ 3484 3485 PetscErrorCode MatILUDTFactorSymbolic_SeqAIJ(Mat fact,Mat A,IS row,IS col,const MatFactorInfo *info) 3486 { 3487 PetscErrorCode ierr; 3488 3489 PetscFunctionBegin; 3490 ierr = MatILUDTFactor_SeqAIJ(A,row,col,info,&fact);CHKERRQ(ierr); 3491 PetscFunctionReturn(0); 3492 } 3493 3494 /* 3495 same as MatLUFactorNumeric_SeqAIJ(), except using contiguous array matrix factors 3496 - intend to replace existing MatLUFactorNumeric_SeqAIJ() 3497 */ 3498 #undef __FUNCT__ 3499 #define __FUNCT__ "MatILUDTFactorNumeric_SeqAIJ" 3500 /* 3501 This will get a new name and become a varient of MatILUFactor_SeqAIJ() there is no longer seperate functions in the matrix function table for dt factors 3502 */ 3503 3504 PetscErrorCode MatILUDTFactorNumeric_SeqAIJ(Mat fact,Mat A,const MatFactorInfo *info) 3505 { 3506 Mat C=fact; 3507 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data; 3508 IS isrow = b->row,isicol = b->icol; 3509 PetscErrorCode ierr; 3510 const PetscInt *r,*ic,*ics; 3511 PetscInt i,j,k,n=A->rmap->n,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 3512 PetscInt *ajtmp,*bjtmp,nz,nzl,nzu,row,*bdiag = b->diag,*pj; 3513 MatScalar *rtmp,*pc,multiplier,*v,*pv,*aa=a->a; 3514 PetscReal dt=info->dt,shift=info->shiftamount; 3515 PetscBool row_identity, col_identity; 3516 3517 PetscFunctionBegin; 3518 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 3519 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 3520 ierr = PetscMalloc((n+1)*sizeof(MatScalar),&rtmp);CHKERRQ(ierr); 3521 ics = ic; 3522 3523 for (i=0; i<n; i++){ 3524 /* initialize rtmp array */ 3525 nzl = bi[i+1] - bi[i]; /* num of nozeros in L(i,:) */ 3526 bjtmp = bj + bi[i]; 3527 for (j=0; j<nzl; j++) rtmp[*bjtmp++] = 0.0; 3528 rtmp[i] = 0.0; 3529 nzu = bdiag[i] - bdiag[i+1]; /* num of nozeros in U(i,:) */ 3530 bjtmp = bj + bdiag[i+1] + 1; 3531 for (j=0; j<nzu; j++) rtmp[*bjtmp++] = 0.0; 3532 3533 /* load in initial unfactored row of A */ 3534 /* printf("row %d\n",i); */ 3535 nz = ai[r[i]+1] - ai[r[i]]; 3536 ajtmp = aj + ai[r[i]]; 3537 v = aa + ai[r[i]]; 3538 for (j=0; j<nz; j++) { 3539 rtmp[ics[*ajtmp++]] = v[j]; 3540 /* printf(" (%d,%g),",ics[ajtmp[j]],rtmp[ics[ajtmp[j]]]); */ 3541 } 3542 /* printf("\n"); */ 3543 3544 /* numerical factorization */ 3545 bjtmp = bj + bi[i]; /* point to 1st entry of L(i,:) */ 3546 nzl = bi[i+1] - bi[i]; /* num of entries in L(i,:) */ 3547 k = 0; 3548 while (k < nzl){ 3549 row = *bjtmp++; 3550 /* printf(" prow %d\n",row); */ 3551 pc = rtmp + row; 3552 pv = b->a + bdiag[row]; /* 1./(diag of the pivot row) */ 3553 multiplier = (*pc) * (*pv); 3554 *pc = multiplier; 3555 if (PetscAbsScalar(multiplier) > dt){ 3556 pj = bj + bdiag[row+1] + 1; /* point to 1st entry of U(row,:) */ 3557 pv = b->a + bdiag[row+1] + 1; 3558 nz = bdiag[row] - bdiag[row+1] - 1; /* num of entries in U(row,:), excluding diagonal */ 3559 for (j=0; j<nz; j++) rtmp[*pj++] -= multiplier * (*pv++); 3560 ierr = PetscLogFlops(1+2*nz);CHKERRQ(ierr); 3561 } 3562 k++; 3563 } 3564 3565 /* finished row so stick it into b->a */ 3566 /* L-part */ 3567 pv = b->a + bi[i] ; 3568 pj = bj + bi[i] ; 3569 nzl = bi[i+1] - bi[i]; 3570 for (j=0; j<nzl; j++) { 3571 pv[j] = rtmp[pj[j]]; 3572 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3573 } 3574 3575 /* diagonal: invert diagonal entries for simplier triangular solves */ 3576 if (rtmp[i] == 0.0) rtmp[i] = dt+shift; 3577 b->a[bdiag[i]] = 1.0/rtmp[i]; 3578 /* printf(" (%d,%g),",i,b->a[bdiag[i]]); */ 3579 3580 /* U-part */ 3581 pv = b->a + bdiag[i+1] + 1; 3582 pj = bj + bdiag[i+1] + 1; 3583 nzu = bdiag[i] - bdiag[i+1] - 1; 3584 for (j=0; j<nzu; j++) { 3585 pv[j] = rtmp[pj[j]]; 3586 /* printf(" (%d,%g),",pj[j],pv[j]); */ 3587 } 3588 /* printf("\n"); */ 3589 } 3590 3591 ierr = PetscFree(rtmp);CHKERRQ(ierr); 3592 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 3593 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 3594 3595 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 3596 ierr = ISIdentity(isicol,&col_identity);CHKERRQ(ierr); 3597 if (row_identity && col_identity) { 3598 C->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 3599 } else { 3600 C->ops->solve = MatSolve_SeqAIJ; 3601 } 3602 C->ops->solveadd = 0; 3603 C->ops->solvetranspose = 0; 3604 C->ops->solvetransposeadd = 0; 3605 C->ops->matsolve = 0; 3606 C->assembled = PETSC_TRUE; 3607 C->preallocated = PETSC_TRUE; 3608 ierr = PetscLogFlops(C->cmap->n);CHKERRQ(ierr); 3609 PetscFunctionReturn(0); 3610 } 3611