1 /*$Id: aijfact.c,v 1.123 1999/09/27 21:29:41 bsmith Exp bsmith $*/ 2 3 #include "src/mat/impls/aij/seq/aij.h" 4 #include "src/vec/vecimpl.h" 5 #include "src/inline/dot.h" 6 7 #undef __FUNC__ 8 #define __FUNC__ "MatOrdering_Flow_SeqAIJ" 9 int MatOrdering_Flow_SeqAIJ(Mat mat,MatOrderingType type,IS *irow,IS *icol) 10 { 11 PetscFunctionBegin; 12 13 SETERRQ(PETSC_ERR_SUP,0,"Code not written"); 14 #if !defined(PETSC_USE_DEBUG) 15 PetscFunctionReturn(0); 16 #endif 17 } 18 19 /* 20 Factorization code for AIJ format. 21 */ 22 #undef __FUNC__ 23 #define __FUNC__ "MatLUFactorSymbolic_SeqAIJ" 24 int MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 25 { 26 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 27 IS isicol; 28 int *r,*ic, ierr, i, n = a->m, *ai = a->i, *aj = a->j; 29 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 30 int *idnew, idx, row,m,fm, nnz, nzi, realloc = 0,nzbd,*im; 31 32 PetscFunctionBegin; 33 PetscValidHeaderSpecific(isrow,IS_COOKIE); 34 PetscValidHeaderSpecific(iscol,IS_COOKIE); 35 36 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 37 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 38 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 39 40 /* get new row pointers */ 41 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 42 ainew[0] = -shift; 43 /* don't know how many column pointers are needed so estimate */ 44 jmax = (int) (f*ai[n]+(!shift)); 45 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 46 /* fill is a linked list of nonzeros in active row */ 47 fill = (int *) PetscMalloc( (2*n+1)*sizeof(int));CHKPTRQ(fill); 48 im = fill + n + 1; 49 /* idnew is location of diagonal in factor */ 50 idnew = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(idnew); 51 idnew[0] = -shift; 52 53 for ( i=0; i<n; i++ ) { 54 /* first copy previous fill into linked list */ 55 nnz = nz = ai[r[i]+1] - ai[r[i]]; 56 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 57 ajtmp = aj + ai[r[i]] + shift; 58 fill[n] = n; 59 while (nz--) { 60 fm = n; 61 idx = ic[*ajtmp++ + shift]; 62 do { 63 m = fm; 64 fm = fill[m]; 65 } while (fm < idx); 66 fill[m] = idx; 67 fill[idx] = fm; 68 } 69 row = fill[n]; 70 while ( row < i ) { 71 ajtmp = ajnew + idnew[row] + (!shift); 72 nzbd = 1 + idnew[row] - ainew[row]; 73 nz = im[row] - nzbd; 74 fm = row; 75 while (nz-- > 0) { 76 idx = *ajtmp++ + shift; 77 nzbd++; 78 if (idx == i) im[row] = nzbd; 79 do { 80 m = fm; 81 fm = fill[m]; 82 } while (fm < idx); 83 if (fm != idx) { 84 fill[m] = idx; 85 fill[idx] = fm; 86 fm = idx; 87 nnz++; 88 } 89 } 90 row = fill[row]; 91 } 92 /* copy new filled row into permanent storage */ 93 ainew[i+1] = ainew[i] + nnz; 94 if (ainew[i+1] > jmax) { 95 96 /* estimate how much additional space we will need */ 97 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 98 /* just double the memory each time */ 99 int maxadd = jmax; 100 /* maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); */ 101 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 102 jmax += maxadd; 103 104 /* allocate a longer ajnew */ 105 ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 106 ierr = PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int));CHKERRQ(ierr); 107 ierr = PetscFree(ajnew);CHKERRQ(ierr); 108 ajnew = ajtmp; 109 realloc++; /* count how many times we realloc */ 110 } 111 ajtmp = ajnew + ainew[i] + shift; 112 fm = fill[n]; 113 nzi = 0; 114 im[i] = nnz; 115 while (nnz--) { 116 if (fm < i) nzi++; 117 *ajtmp++ = fm - shift; 118 fm = fill[fm]; 119 } 120 idnew[i] = ainew[i] + nzi; 121 } 122 if (ai[n] != 0) { 123 double af = ((double)ainew[n])/((double)ai[n]); 124 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 125 realloc,f,af); 126 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:Run with -pc_lu_fill %g or use \n",af); 127 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:PCLUSetFill(pc,%g);\n",af); 128 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ:for best performance.\n"); 129 } else { 130 PLogInfo(A,"MatLUFactorSymbolic_SeqAIJ: Empty matrix\n"); 131 } 132 133 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 134 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 135 136 ierr = PetscFree(fill);CHKERRQ(ierr); 137 138 /* put together the new matrix */ 139 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,B);CHKERRQ(ierr); 140 PLogObjectParent(*B,isicol); 141 b = (Mat_SeqAIJ *) (*B)->data; 142 ierr = PetscFree(b->imax);CHKERRQ(ierr); 143 b->singlemalloc = 0; 144 /* the next line frees the default space generated by the Create() */ 145 ierr = PetscFree(b->a);CHKERRQ(ierr); 146 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 147 b->a = (Scalar *) PetscMalloc((ainew[n]+shift+1)*sizeof(Scalar));CHKPTRQ(b->a); 148 b->j = ajnew; 149 b->i = ainew; 150 b->diag = idnew; 151 b->ilen = 0; 152 b->imax = 0; 153 b->row = isrow; 154 b->col = iscol; 155 b->icol = isicol; 156 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 157 /* In b structure: Free imax, ilen, old a, old j. 158 Allocate idnew, solve_work, new a, new j */ 159 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 160 b->maxnz = b->nz = ainew[n] + shift; 161 162 (*B)->factor = FACTOR_LU;; 163 (*B)->info.factor_mallocs = realloc; 164 (*B)->info.fill_ratio_given = f; 165 (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant if A has inodes */ 166 167 if (ai[n] != 0) { 168 (*B)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[n]); 169 } else { 170 (*B)->info.fill_ratio_needed = 0.0; 171 } 172 PetscFunctionReturn(0); 173 } 174 /* ----------------------------------------------------------- */ 175 extern int Mat_AIJ_CheckInode(Mat); 176 177 #undef __FUNC__ 178 #define __FUNC__ "MatLUFactorNumeric_SeqAIJ" 179 int MatLUFactorNumeric_SeqAIJ(Mat A,Mat *B) 180 { 181 Mat C = *B; 182 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b = (Mat_SeqAIJ *)C->data; 183 IS isrow = b->row, isicol = b->icol; 184 int *r,*ic, ierr, i, j, n = a->m, *ai = b->i, *aj = b->j; 185 int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 186 int *diag_offset = b->diag,diag,k; 187 int preserve_row_sums = (int) a->ilu_preserve_row_sums; 188 register int *pj; 189 Scalar *rtmp,*v, *pc, multiplier,sum,inner_sum,*rowsums = 0; 190 double ssum; 191 register Scalar *pv, *rtmps,*u_values; 192 193 PetscFunctionBegin; 194 195 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 196 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 197 rtmp = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar) );CHKPTRQ(rtmp); 198 ierr = PetscMemzero(rtmp,(n+1)*sizeof(Scalar));CHKERRQ(ierr); 199 rtmps = rtmp + shift; ics = ic + shift; 200 201 /* precalculate row sums */ 202 if (preserve_row_sums) { 203 rowsums = (Scalar *) PetscMalloc( n*sizeof(Scalar) );CHKPTRQ(rowsums); 204 for ( i=0; i<n; i++ ) { 205 nz = a->i[r[i]+1] - a->i[r[i]]; 206 v = a->a + a->i[r[i]] + shift; 207 sum = 0.0; 208 for ( j=0; j<nz; j++ ) sum += v[j]; 209 rowsums[i] = sum; 210 } 211 } 212 213 for ( i=0; i<n; i++ ) { 214 nz = ai[i+1] - ai[i]; 215 ajtmp = aj + ai[i] + shift; 216 for ( j=0; j<nz; j++ ) rtmps[ajtmp[j]] = 0.0; 217 218 /* load in initial (unfactored row) */ 219 nz = a->i[r[i]+1] - a->i[r[i]]; 220 ajtmpold = a->j + a->i[r[i]] + shift; 221 v = a->a + a->i[r[i]] + shift; 222 for ( j=0; j<nz; j++ ) rtmp[ics[ajtmpold[j]]] = v[j]; 223 224 row = *ajtmp++ + shift; 225 while (row < i ) { 226 pc = rtmp + row; 227 if (*pc != 0.0) { 228 pv = b->a + diag_offset[row] + shift; 229 pj = b->j + diag_offset[row] + (!shift); 230 multiplier = *pc / *pv++; 231 *pc = multiplier; 232 nz = ai[row+1] - diag_offset[row] - 1; 233 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 234 PLogFlops(2*nz); 235 } 236 row = *ajtmp++ + shift; 237 } 238 /* finished row so stick it into b->a */ 239 pv = b->a + ai[i] + shift; 240 pj = b->j + ai[i] + shift; 241 nz = ai[i+1] - ai[i]; 242 for ( j=0; j<nz; j++ ) {pv[j] = rtmps[pj[j]];} 243 diag = diag_offset[i] - ai[i]; 244 /* 245 Possibly adjust diagonal entry on current row to force 246 LU matrix to have same row sum as initial matrix. 247 */ 248 if (pv[diag] == 0.0) { 249 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,0,"Zero pivot row %d",i); 250 } 251 if (preserve_row_sums) { 252 pj = b->j + ai[i] + shift; 253 sum = rowsums[i]; 254 for ( j=0; j<diag; j++ ) { 255 u_values = b->a + diag_offset[pj[j]] + shift; 256 nz = ai[pj[j]+1] - diag_offset[pj[j]]; 257 inner_sum = 0.0; 258 for ( k=0; k<nz; k++ ) { 259 inner_sum += u_values[k]; 260 } 261 sum -= pv[j]*inner_sum; 262 263 } 264 nz = ai[i+1] - diag_offset[i] - 1; 265 u_values = b->a + diag_offset[i] + 1 + shift; 266 for ( k=0; k<nz; k++ ) { 267 sum -= u_values[k]; 268 } 269 ssum = PetscAbsScalar(sum/pv[diag]); 270 if (ssum < 1000. && ssum > .001) pv[diag] = sum; 271 } 272 /* check pivot entry for current row */ 273 } 274 275 /* invert diagonal entries for simplier triangular solves */ 276 for ( i=0; i<n; i++ ) { 277 b->a[diag_offset[i]+shift] = 1.0/b->a[diag_offset[i]+shift]; 278 } 279 280 if (preserve_row_sums) {ierr = PetscFree(rowsums);CHKERRQ(ierr);} 281 ierr = PetscFree(rtmp);CHKERRQ(ierr); 282 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 283 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 284 C->factor = FACTOR_LU; 285 ierr = Mat_AIJ_CheckInode(C);CHKERRQ(ierr); 286 C->assembled = PETSC_TRUE; 287 PLogFlops(b->n); 288 PetscFunctionReturn(0); 289 } 290 /* ----------------------------------------------------------- */ 291 #undef __FUNC__ 292 #define __FUNC__ "MatLUFactor_SeqAIJ" 293 int MatLUFactor_SeqAIJ(Mat A,IS row,IS col,double f) 294 { 295 Mat_SeqAIJ *mat = (Mat_SeqAIJ *) A->data; 296 int ierr; 297 Mat C; 298 PetscOps *Abops; 299 MatOps Aops; 300 301 PetscFunctionBegin; 302 ierr = MatLUFactorSymbolic(A,row,col,f,&C);CHKERRQ(ierr); 303 ierr = MatLUFactorNumeric(A,&C);CHKERRQ(ierr); 304 305 /* free all the data structures from mat */ 306 ierr = PetscFree(mat->a);CHKERRQ(ierr); 307 if (!mat->singlemalloc) { 308 ierr = PetscFree(mat->i);CHKERRQ(ierr); 309 ierr = PetscFree(mat->j);CHKERRQ(ierr); 310 } 311 if (mat->diag) {ierr = PetscFree(mat->diag);CHKERRQ(ierr);} 312 if (mat->ilen) {ierr = PetscFree(mat->ilen);CHKERRQ(ierr);} 313 if (mat->imax) {ierr = PetscFree(mat->imax);CHKERRQ(ierr);} 314 if (mat->solve_work) {ierr = PetscFree(mat->solve_work);CHKERRQ(ierr);} 315 if (mat->inode.size) {ierr = PetscFree(mat->inode.size);CHKERRQ(ierr);} 316 if (mat->icol) {ierr = ISDestroy(mat->icol);CHKERRQ(ierr);} 317 ierr = PetscFree(mat);CHKERRQ(ierr); 318 319 ierr = MapDestroy(A->rmap);CHKERRQ(ierr); 320 ierr = MapDestroy(A->cmap);CHKERRQ(ierr); 321 322 /* 323 This is horrible, horrible code. We need to keep the 324 A pointers for the bops and ops but copy everything 325 else from C. 326 */ 327 Abops = A->bops; 328 Aops = A->ops; 329 ierr = PetscMemcpy(A,C,sizeof(struct _p_Mat));CHKERRQ(ierr); 330 mat = (Mat_SeqAIJ *) A->data; 331 PLogObjectParent(A,mat->icol); 332 333 A->bops = Abops; 334 A->ops = Aops; 335 A->qlist = 0; 336 337 PetscHeaderDestroy(C); 338 PetscFunctionReturn(0); 339 } 340 /* ----------------------------------------------------------- */ 341 #undef __FUNC__ 342 #define __FUNC__ "MatSolve_SeqAIJ" 343 int MatSolve_SeqAIJ(Mat A,Vec bb, Vec xx) 344 { 345 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 346 IS iscol = a->col, isrow = a->row; 347 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 348 int nz,shift = a->indexshift,*rout,*cout; 349 Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 350 351 PetscFunctionBegin; 352 if (!n) PetscFunctionReturn(0); 353 354 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 355 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 356 tmp = a->solve_work; 357 358 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 359 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 360 361 /* forward solve the lower triangular */ 362 tmp[0] = b[*r++]; 363 tmps = tmp + shift; 364 for ( i=1; i<n; i++ ) { 365 v = aa + ai[i] + shift; 366 vi = aj + ai[i] + shift; 367 nz = a->diag[i] - ai[i]; 368 sum = b[*r++]; 369 while (nz--) sum -= *v++ * tmps[*vi++]; 370 tmp[i] = sum; 371 } 372 373 /* backward solve the upper triangular */ 374 for ( i=n-1; i>=0; i-- ){ 375 v = aa + a->diag[i] + (!shift); 376 vi = aj + a->diag[i] + (!shift); 377 nz = ai[i+1] - a->diag[i] - 1; 378 sum = tmp[i]; 379 while (nz--) sum -= *v++ * tmps[*vi++]; 380 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 381 } 382 383 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 384 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 385 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 386 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 387 PLogFlops(2*a->nz - a->n); 388 PetscFunctionReturn(0); 389 } 390 391 /* ----------------------------------------------------------- */ 392 #undef __FUNC__ 393 #define __FUNC__ "MatSolve_SeqAIJ_NaturalOrdering" 394 int MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb, Vec xx) 395 { 396 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 397 int n = a->m, *ai = a->i, *aj = a->j, *adiag = a->diag,ierr; 398 Scalar *x,*b, *aa = a->a, sum; 399 #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 400 int adiag_i,i,*vi,nz,ai_i; 401 Scalar *v; 402 #endif 403 404 PetscFunctionBegin; 405 if (!n) PetscFunctionReturn(0); 406 if (a->indexshift) { 407 ierr = MatSolve_SeqAIJ(A,bb,xx);CHKERRQ(ierr); 408 PetscFunctionReturn(0); 409 } 410 411 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 412 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 413 414 #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ) 415 fortransolveaij_(&n,x,ai,aj,adiag,aa,b); 416 #else 417 /* forward solve the lower triangular */ 418 x[0] = b[0]; 419 for ( i=1; i<n; i++ ) { 420 ai_i = ai[i]; 421 v = aa + ai_i; 422 vi = aj + ai_i; 423 nz = adiag[i] - ai_i; 424 sum = b[i]; 425 while (nz--) sum -= *v++ * x[*vi++]; 426 x[i] = sum; 427 } 428 429 /* backward solve the upper triangular */ 430 for ( i=n-1; i>=0; i-- ){ 431 adiag_i = adiag[i]; 432 v = aa + adiag_i + 1; 433 vi = aj + adiag_i + 1; 434 nz = ai[i+1] - adiag_i - 1; 435 sum = x[i]; 436 while (nz--) sum -= *v++ * x[*vi++]; 437 x[i] = sum*aa[adiag_i]; 438 } 439 #endif 440 PLogFlops(2*a->nz - a->n); 441 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 442 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 443 PetscFunctionReturn(0); 444 } 445 446 #undef __FUNC__ 447 #define __FUNC__ "MatSolveAdd_SeqAIJ" 448 int MatSolveAdd_SeqAIJ(Mat A,Vec bb, Vec yy, Vec xx) 449 { 450 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 451 IS iscol = a->col, isrow = a->row; 452 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 453 int nz, shift = a->indexshift,*rout,*cout; 454 Scalar *x,*b,*tmp, *aa = a->a, sum, *v; 455 456 PetscFunctionBegin; 457 if (yy != xx) {ierr = VecCopy(yy,xx);CHKERRQ(ierr);} 458 459 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 460 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 461 tmp = a->solve_work; 462 463 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 464 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout + (n-1); 465 466 /* forward solve the lower triangular */ 467 tmp[0] = b[*r++]; 468 for ( i=1; i<n; i++ ) { 469 v = aa + ai[i] + shift; 470 vi = aj + ai[i] + shift; 471 nz = a->diag[i] - ai[i]; 472 sum = b[*r++]; 473 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 474 tmp[i] = sum; 475 } 476 477 /* backward solve the upper triangular */ 478 for ( i=n-1; i>=0; i-- ){ 479 v = aa + a->diag[i] + (!shift); 480 vi = aj + a->diag[i] + (!shift); 481 nz = ai[i+1] - a->diag[i] - 1; 482 sum = tmp[i]; 483 while (nz--) sum -= *v++ * tmp[*vi++ + shift]; 484 tmp[i] = sum*aa[a->diag[i]+shift]; 485 x[*c--] += tmp[i]; 486 } 487 488 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 489 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 490 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 491 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 492 PLogFlops(2*a->nz); 493 494 PetscFunctionReturn(0); 495 } 496 /* -------------------------------------------------------------------*/ 497 #undef __FUNC__ 498 #define __FUNC__ "MatSolveTrans_SeqAIJ" 499 int MatSolveTrans_SeqAIJ(Mat A,Vec bb, Vec xx) 500 { 501 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 502 IS iscol = a->col, isrow = a->row; 503 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 504 int nz,shift = a->indexshift,*rout,*cout; 505 Scalar *x,*b,*tmp, *aa = a->a, *v; 506 507 PetscFunctionBegin; 508 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 509 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 510 tmp = a->solve_work; 511 512 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 513 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 514 515 /* copy the b into temp work space according to permutation */ 516 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 517 518 /* forward solve the U^T */ 519 for ( i=0; i<n; i++ ) { 520 v = aa + a->diag[i] + shift; 521 vi = aj + a->diag[i] + (!shift); 522 nz = ai[i+1] - a->diag[i] - 1; 523 tmp[i] *= *v++; 524 while (nz--) { 525 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 526 } 527 } 528 529 /* backward solve the L^T */ 530 for ( i=n-1; i>=0; i-- ){ 531 v = aa + a->diag[i] - 1 + shift; 532 vi = aj + a->diag[i] - 1 + shift; 533 nz = a->diag[i] - ai[i]; 534 while (nz--) { 535 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 536 } 537 } 538 539 /* copy tmp into x according to permutation */ 540 for ( i=0; i<n; i++ ) x[r[i]] = tmp[i]; 541 542 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 543 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 544 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 545 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 546 547 PLogFlops(2*a->nz-a->n); 548 PetscFunctionReturn(0); 549 } 550 551 #undef __FUNC__ 552 #define __FUNC__ "MatSolveTransAdd_SeqAIJ" 553 int MatSolveTransAdd_SeqAIJ(Mat A,Vec bb, Vec zz,Vec xx) 554 { 555 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 556 IS iscol = a->col, isrow = a->row; 557 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 558 int nz,shift = a->indexshift, *rout, *cout; 559 Scalar *x,*b,*tmp, *aa = a->a, *v; 560 561 PetscFunctionBegin; 562 if (zz != xx) VecCopy(zz,xx); 563 564 ierr = VecGetArray(bb,&b);CHKERRQ(ierr); 565 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 566 tmp = a->solve_work; 567 568 ierr = ISGetIndices(isrow,&rout);CHKERRQ(ierr); r = rout; 569 ierr = ISGetIndices(iscol,&cout);CHKERRQ(ierr); c = cout; 570 571 /* copy the b into temp work space according to permutation */ 572 for ( i=0; i<n; i++ ) tmp[i] = b[c[i]]; 573 574 /* forward solve the U^T */ 575 for ( i=0; i<n; i++ ) { 576 v = aa + a->diag[i] + shift; 577 vi = aj + a->diag[i] + (!shift); 578 nz = ai[i+1] - a->diag[i] - 1; 579 tmp[i] *= *v++; 580 while (nz--) { 581 tmp[*vi++ + shift] -= (*v++)*tmp[i]; 582 } 583 } 584 585 /* backward solve the L^T */ 586 for ( i=n-1; i>=0; i-- ){ 587 v = aa + a->diag[i] - 1 + shift; 588 vi = aj + a->diag[i] - 1 + shift; 589 nz = a->diag[i] - ai[i]; 590 while (nz--) { 591 tmp[*vi-- + shift] -= (*v--)*tmp[i]; 592 } 593 } 594 595 /* copy tmp into x according to permutation */ 596 for ( i=0; i<n; i++ ) x[r[i]] += tmp[i]; 597 598 ierr = ISRestoreIndices(isrow,&rout);CHKERRQ(ierr); 599 ierr = ISRestoreIndices(iscol,&cout);CHKERRQ(ierr); 600 ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr); 601 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 602 603 PLogFlops(2*a->nz); 604 PetscFunctionReturn(0); 605 } 606 /* ----------------------------------------------------------------*/ 607 extern int MatMissingDiag_SeqAIJ(Mat); 608 609 #undef __FUNC__ 610 #define __FUNC__ "MatILUFactorSymbolic_SeqAIJ" 611 int MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatILUInfo *info,Mat *fact) 612 { 613 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data, *b; 614 IS isicol; 615 int *r,*ic, ierr, prow, n = a->m, *ai = a->i, *aj = a->j; 616 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 617 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0, dcount = 0; 618 int incrlev,nnz,i,shift = a->indexshift,levels,diagonal_fill; 619 PetscTruth col_identity, row_identity; 620 double f; 621 622 PetscFunctionBegin; 623 if (info) { 624 f = info->fill; 625 levels = (int) info->levels; 626 diagonal_fill = (int) info->diagonal_fill; 627 } else { 628 f = 1.0; 629 levels = 0; 630 diagonal_fill = 0; 631 } 632 ierr = ISInvertPermutation(iscol,&isicol);CHKERRQ(ierr); 633 634 /* special case that simply copies fill pattern */ 635 ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr); 636 ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr); 637 if (levels == 0 && row_identity && col_identity) { 638 ierr = MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);CHKERRQ(ierr); 639 (*fact)->factor = FACTOR_LU; 640 b = (Mat_SeqAIJ *) (*fact)->data; 641 if (!b->diag) { 642 ierr = MatMarkDiag_SeqAIJ(*fact);CHKERRQ(ierr); 643 } 644 ierr = MatMissingDiag_SeqAIJ(*fact);CHKERRQ(ierr); 645 b->row = isrow; 646 b->col = iscol; 647 b->icol = isicol; 648 b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 649 (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering; 650 PetscFunctionReturn(0); 651 } 652 653 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 654 ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr); 655 656 /* get new row pointers */ 657 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) );CHKPTRQ(ainew); 658 ainew[0] = -shift; 659 /* don't know how many column pointers are needed so estimate */ 660 jmax = (int) (f*(ai[n]+!shift)); 661 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajnew); 662 /* ajfill is level of fill for each fill entry */ 663 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) );CHKPTRQ(ajfill); 664 /* fill is a linked list of nonzeros in active row */ 665 fill = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(fill); 666 /* im is level for each filled value */ 667 im = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(im); 668 /* dloc is location of diagonal in factor */ 669 dloc = (int *) PetscMalloc( (n+1)*sizeof(int));CHKPTRQ(dloc); 670 dloc[0] = 0; 671 for ( prow=0; prow<n; prow++ ) { 672 673 /* copy current row into linked list */ 674 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 675 if (!nz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,1,"Empty row in matrix"); 676 xi = aj + ai[r[prow]] + shift; 677 fill[n] = n; 678 fill[prow] = -1; /* marker to indicate if diagonal exists */ 679 while (nz--) { 680 fm = n; 681 idx = ic[*xi++ + shift]; 682 do { 683 m = fm; 684 fm = fill[m]; 685 } while (fm < idx); 686 fill[m] = idx; 687 fill[idx] = fm; 688 im[idx] = 0; 689 } 690 691 /* make sure diagonal entry is included */ 692 if (diagonal_fill && fill[prow] == -1) { 693 fm = n; 694 while (fill[fm] < prow) fm = fill[fm]; 695 fill[prow] = fill[fm]; /* insert diagonal into linked list */ 696 fill[fm] = prow; 697 im[prow] = 0; 698 nzf++; 699 dcount++; 700 } 701 702 nzi = 0; 703 row = fill[n]; 704 while ( row < prow ) { 705 incrlev = im[row] + 1; 706 nz = dloc[row]; 707 xi = ajnew + ainew[row] + shift + nz + 1; 708 flev = ajfill + ainew[row] + shift + nz + 1; 709 nnz = ainew[row+1] - ainew[row] - nz - 1; 710 fm = row; 711 while (nnz-- > 0) { 712 idx = *xi++ + shift; 713 if (*flev + incrlev > levels) { 714 flev++; 715 continue; 716 } 717 do { 718 m = fm; 719 fm = fill[m]; 720 } while (fm < idx); 721 if (fm != idx) { 722 im[idx] = *flev + incrlev; 723 fill[m] = idx; 724 fill[idx] = fm; 725 fm = idx; 726 nzf++; 727 } else { 728 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 729 } 730 flev++; 731 } 732 row = fill[row]; 733 nzi++; 734 } 735 /* copy new filled row into permanent storage */ 736 ainew[prow+1] = ainew[prow] + nzf; 737 if (ainew[prow+1] > jmax-shift) { 738 739 /* estimate how much additional space we will need */ 740 /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */ 741 /* just double the memory each time */ 742 /* maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); */ 743 int maxadd = jmax; 744 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 745 jmax += maxadd; 746 747 /* allocate a longer ajnew and ajfill */ 748 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 749 ierr = PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 750 ierr = PetscFree(ajnew);CHKERRQ(ierr); 751 ajnew = xi; 752 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 753 ierr = PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int));CHKERRQ(ierr); 754 ierr = PetscFree(ajfill);CHKERRQ(ierr); 755 ajfill = xi; 756 realloc++; /* count how many times we realloc */ 757 } 758 xi = ajnew + ainew[prow] + shift; 759 flev = ajfill + ainew[prow] + shift; 760 dloc[prow] = nzi; 761 fm = fill[n]; 762 while (nzf--) { 763 *xi++ = fm - shift; 764 *flev++ = im[fm]; 765 fm = fill[fm]; 766 } 767 /* make sure row has diagonal entry */ 768 if (ajnew[ainew[prow]+shift+dloc[prow]]+shift != prow) { 769 SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,1,"Row %d has missing diagonal in factored matrix\n\ 770 try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",prow); 771 } 772 } 773 ierr = PetscFree(ajfill); CHKERRQ(ierr); 774 ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr); 775 ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr); 776 ierr = PetscFree(fill);CHKERRQ(ierr); 777 ierr = PetscFree(im);CHKERRQ(ierr); 778 779 { 780 double af = ((double)ainew[n])/((double)ai[n]); 781 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 782 realloc,f,af); 783 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Run with -pc_ilu_fill %g or use \n",af); 784 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:PCILUSetFill(pc,%g);\n",af); 785 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:for best performance.\n"); 786 if (diagonal_fill) { 787 PLogInfo(A,"MatILUFactorSymbolic_SeqAIJ:Detected and replace %d missing diagonals",dcount); 788 } 789 } 790 791 /* put together the new matrix */ 792 ierr = MatCreateSeqAIJ(A->comm,n,n,0,PETSC_NULL,fact);CHKERRQ(ierr); 793 PLogObjectParent(*fact,isicol); 794 b = (Mat_SeqAIJ *) (*fact)->data; 795 ierr = PetscFree(b->imax);CHKERRQ(ierr); 796 b->singlemalloc = 0; 797 len = (ainew[n] + shift)*sizeof(Scalar); 798 /* the next line frees the default space generated by the Create() */ 799 ierr = PetscFree(b->a);CHKERRQ(ierr); 800 ierr = PetscFree(b->ilen);CHKERRQ(ierr); 801 b->a = (Scalar *) PetscMalloc( len+1 );CHKPTRQ(b->a); 802 b->j = ajnew; 803 b->i = ainew; 804 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 805 b->diag = dloc; 806 b->ilen = 0; 807 b->imax = 0; 808 b->row = isrow; 809 b->col = iscol; 810 b->icol = isicol; 811 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 812 /* In b structure: Free imax, ilen, old a, old j. 813 Allocate dloc, solve_work, new a, new j */ 814 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 815 b->maxnz = b->nz = ainew[n] + shift; 816 (*fact)->factor = FACTOR_LU; 817 818 (*fact)->info.factor_mallocs = realloc; 819 (*fact)->info.fill_ratio_given = f; 820 (*fact)->info.fill_ratio_needed = ((double)ainew[n])/((double)ai[prow]); 821 (*fact)->factor = FACTOR_LU;; 822 823 PetscFunctionReturn(0); 824 } 825 826 827 828 829