1 #ifndef lint 2 static char vcid[] = "$Id: baijfact.c,v 1.1 1996/02/13 22:33:11 bsmith Exp bsmith $"; 3 #endif 4 /* 5 Factorization code for BAIJ format. 6 */ 7 8 #include "baij.h" 9 10 /* 11 The symbolic factorization code is identical to that for AIJ format, 12 except for very small changes since this is now a SeqBAIJ datastructure. 13 NOT good code reuse. 14 */ 15 int MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 16 { 17 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b; 18 IS isicol; 19 int *r,*ic, ierr, i, n = a->mbs, *ai = a->i, *aj = a->j; 20 int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 21 int *idnew, idx, row,m,fm, nnz, nzi,len, realloc = 0,nzbd,*im; 22 int bs = a->bs; 23 24 if (a->m != a->n) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must be square"); 25 if (!isrow) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must have row permutation"); 26 if (!iscol) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must have col. permutation"); 27 28 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 29 ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 30 31 /* get new row pointers */ 32 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew); 33 ainew[0] = -shift; 34 /* don't know how many column pointers are needed so estimate */ 35 jmax = (int) (f*ai[n]+(!shift)); 36 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew); 37 /* fill is a linked list of nonzeros in active row */ 38 fill = (int *) PetscMalloc( (2*n+1)*sizeof(int)); CHKPTRQ(fill); 39 im = fill + n + 1; 40 /* idnew is location of diagonal in factor */ 41 idnew = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(idnew); 42 idnew[0] = -shift; 43 44 for ( i=0; i<n; i++ ) { 45 /* first copy previous fill into linked list */ 46 nnz = nz = ai[r[i]+1] - ai[r[i]]; 47 ajtmp = aj + ai[r[i]] + shift; 48 fill[n] = n; 49 while (nz--) { 50 fm = n; 51 idx = ic[*ajtmp++ + shift]; 52 do { 53 m = fm; 54 fm = fill[m]; 55 } while (fm < idx); 56 fill[m] = idx; 57 fill[idx] = fm; 58 } 59 row = fill[n]; 60 while ( row < i ) { 61 ajtmp = ajnew + idnew[row] + (!shift); 62 nzbd = 1 + idnew[row] - ainew[row]; 63 nz = im[row] - nzbd; 64 fm = row; 65 while (nz-- > 0) { 66 idx = *ajtmp++ + shift; 67 nzbd++; 68 if (idx == i) im[row] = nzbd; 69 do { 70 m = fm; 71 fm = fill[m]; 72 } while (fm < idx); 73 if (fm != idx) { 74 fill[m] = idx; 75 fill[idx] = fm; 76 fm = idx; 77 nnz++; 78 } 79 } 80 row = fill[row]; 81 } 82 /* copy new filled row into permanent storage */ 83 ainew[i+1] = ainew[i] + nnz; 84 if (ainew[i+1] > jmax+1) { 85 /* allocate a longer ajnew */ 86 int maxadd; 87 maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); 88 if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 89 jmax += maxadd; 90 ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 91 PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int)); 92 PetscFree(ajnew); 93 ajnew = ajtmp; 94 realloc++; /* count how many times we realloc */ 95 } 96 ajtmp = ajnew + ainew[i] + shift; 97 fm = fill[n]; 98 nzi = 0; 99 im[i] = nnz; 100 while (nnz--) { 101 if (fm < i) nzi++; 102 *ajtmp++ = fm - shift; 103 fm = fill[fm]; 104 } 105 idnew[i] = ainew[i] + nzi; 106 } 107 108 PLogInfo((PetscObject)A, 109 "Info:MatLUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 110 realloc,f,((double)ainew[n])/((double)ai[i])); 111 112 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 113 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 114 115 PetscFree(fill); 116 117 /* put together the new matrix */ 118 ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,B); CHKERRQ(ierr); 119 PLogObjectParent(*B,isicol); 120 ierr = ISDestroy(isicol); CHKERRQ(ierr); 121 b = (Mat_SeqBAIJ *) (*B)->data; 122 PetscFree(b->imax); 123 b->singlemalloc = 0; 124 len = (ainew[n] + shift)*sizeof(Scalar); 125 /* the next line frees the default space generated by the Create() */ 126 PetscFree(b->a); PetscFree(b->ilen); 127 b->a = (Scalar *) PetscMalloc( len*bs*bs ); CHKPTRQ(b->a); 128 b->j = ajnew; 129 b->i = ainew; 130 b->diag = idnew; 131 b->ilen = 0; 132 b->imax = 0; 133 b->row = isrow; 134 b->col = iscol; 135 b->solve_work = (Scalar *) PetscMalloc( n*sizeof(Scalar)); 136 CHKPTRQ(b->solve_work); 137 /* In b structure: Free imax, ilen, old a, old j. 138 Allocate idnew, solve_work, new a, new j */ 139 PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 140 b->maxnz = b->nz = ainew[n] + shift; 141 142 return 0; 143 } 144 145 #include "pinclude/plapack.h" 146 147 #define BlockZero(v,idx) {PetscMemzero(v+bs2*(idx),bs2*sizeof(Scalar));} 148 149 #define BlockCopy(v_in,v_out,idx_in,idx_out) \ 150 {PetscMemcpy(v_out + bs2*(idx_out),v_in + bs2*(idx_in),bs2*sizeof(Scalar));} 151 152 #define BlockInvert(vv,idx) \ 153 { int _i,info; Scalar *w = vv + bs2*idx; \ 154 printf("vv idx %g %d \n",*vv,idx); \ 155 LAgetrf_(&bs,&bs,w,&bs,v_pivots,&info); CHKERRQ(info); \ 156 printf("bs %d w %g\n",bs,*w);\ 157 PetscMemzero(v_work,bs2*sizeof(Scalar)); \ 158 printf("vwork %g\n",*v_work);\ 159 for ( _i=0; _i<bs; _i++ ) { v_work[_i + bs*_i] = 1.0; } \ 160 printf("vwork %g\n",*v_work);\ 161 LAgetrs_("N",&bs,&bs,w,&bs,v_pivots,v_work,&bs, &info);CHKERRQ(info);\ 162 printf("w %g vwork %g\n",*w,*v_work);\ 163 PetscMemcpy(w,v_work,bs2*sizeof(Scalar)); \ 164 printf("w %g vwork %g\n",*w,*v_work);\ 165 } 166 167 #define BlockMult(v1,v2,v3 168 /* ----------------------------------------------------------- */ 169 int MatLUFactorNumeric_SeqBAIJ(Mat A,Mat *B) 170 { 171 Mat C = *B; 172 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b = (Mat_SeqBAIJ *)C->data; 173 IS iscol = b->col, isrow = b->row, isicol; 174 int *r,*ic, ierr, i, j, n = a->mbs, *ai = b->i, *aj = b->j; 175 int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 176 int *diag_offset = b->diag,diag,bs = a->bs,bs2 = bs*bs, *v_pivots; 177 Scalar *rtmp,*v, *pc, multiplier,*v_work; 178 /* These declarations are for optimizations. They reduce the number of 179 memory references that are made by locally storing information; the 180 word "register" used here with pointers can be viewed as "private" or 181 "known only to me" 182 */ 183 register Scalar *pv, *rtmps; 184 register int *pj; 185 186 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 187 PLogObjectParent(*B,isicol); 188 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 189 ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 190 rtmp = (Scalar *) PetscMalloc(bs2*(n+1)*sizeof(Scalar));CHKPTRQ(rtmp); 191 rtmps = rtmp + shift; ics = ic + shift; 192 193 /* generate work space needed by dense LU factorization */ 194 v_work = (Scalar *) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(v_work); 195 v_pivots = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(v_pivots); 196 197 for ( i=0; i<n; i++ ) { 198 nz = ai[i+1] - ai[i]; 199 ajtmp = aj + ai[i] + shift; 200 for ( j=0; j<nz; j++ ) BlockZero(rtmps,ajtmp[j]); 201 malloc_verify(); 202 /* load in initial (unfactored row) */ 203 nz = a->i[r[i]+1] - a->i[r[i]]; 204 ajtmpold = a->j + a->i[r[i]] + shift; 205 v = a->a + bs2*a->i[r[i]] + shift; 206 for ( j=0; j<nz; j++ ) BlockCopy(v,rtmp,j,ics[ajtmpold[j]]); 207 malloc_verify(); 208 209 row = *ajtmp++ + shift; 210 while (row < i) { 211 pc = rtmp + row; 212 if (*pc != 0.0) { 213 pv = b->a + bs2*diag_offset[row] + shift; 214 pj = b->j + diag_offset[row] + (!shift); 215 multiplier = *pc * (*pv); 216 *pc = multiplier; 217 nz = ai[row+1] - diag_offset[row] - 1; 218 for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 219 pv += bs2; 220 PLogFlops(2*nz); 221 } 222 row = *ajtmp++ + shift; 223 } 224 /* finished row so stick it into b->a */ 225 pv = b->a + bs2*ai[i] + shift; 226 pj = b->j + ai[i] + shift; 227 nz = ai[i+1] - ai[i]; 228 for ( j=0; j<nz; j++ ) BlockCopy(rtmps,pv,pj[j],j); 229 malloc_verify(); 230 diag = diag_offset[i] - ai[i]; 231 /* invert diagonal block */ 232 BlockInvert(pv,diag); 233 malloc_verify(); 234 } 235 236 PetscFree(rtmp); 237 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 238 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 239 ierr = ISDestroy(isicol); CHKERRQ(ierr); 240 C->factor = FACTOR_LU; 241 C->assembled = PETSC_TRUE; 242 PLogFlops(b->n); 243 return 0; 244 } 245 /* ----------------------------------------------------------- */ 246 int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,double f) 247 { 248 Mat_SeqBAIJ *mat = (Mat_SeqBAIJ *) A->data; 249 int ierr; 250 Mat C; 251 252 ierr = MatLUFactorSymbolic_SeqBAIJ(A,row,col,f,&C); CHKERRQ(ierr); 253 ierr = MatLUFactorNumeric_SeqBAIJ(A,&C); CHKERRQ(ierr); 254 255 /* free all the data structures from mat */ 256 PetscFree(mat->a); 257 if (!mat->singlemalloc) {PetscFree(mat->i); PetscFree(mat->j);} 258 if (mat->diag) PetscFree(mat->diag); 259 if (mat->ilen) PetscFree(mat->ilen); 260 if (mat->imax) PetscFree(mat->imax); 261 if (mat->solve_work) PetscFree(mat->solve_work); 262 PetscFree(mat); 263 264 PetscMemcpy(A,C,sizeof(struct _Mat)); 265 PetscHeaderDestroy(C); 266 return 0; 267 } 268 /* ----------------------------------------------------------- */ 269 int MatSolve_SeqBAIJ(Mat A,Vec bb, Vec xx) 270 { 271 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 272 IS iscol = a->col, isrow = a->row; 273 int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 274 int nz,shift = a->indexshift; 275 Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 276 277 if (A->factor != FACTOR_LU) SETERRQ(1,"MatSolve_SeqBAIJ:Not for unfactored matrix"); 278 279 ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 280 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 281 tmp = a->solve_work; 282 283 ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 284 ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); c = c + (n-1); 285 286 /* forward solve the lower triangular */ 287 tmp[0] = b[*r++]; 288 tmps = tmp + shift; 289 for ( i=1; i<n; i++ ) { 290 v = aa + ai[i] + shift; 291 vi = aj + ai[i] + shift; 292 nz = a->diag[i] - ai[i]; 293 sum = b[*r++]; 294 while (nz--) sum -= *v++ * tmps[*vi++]; 295 tmp[i] = sum; 296 } 297 298 /* backward solve the upper triangular */ 299 for ( i=n-1; i>=0; i-- ){ 300 v = aa + a->diag[i] + (!shift); 301 vi = aj + a->diag[i] + (!shift); 302 nz = ai[i+1] - a->diag[i] - 1; 303 sum = tmp[i]; 304 while (nz--) sum -= *v++ * tmps[*vi++]; 305 x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 306 } 307 308 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 309 ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr); 310 PLogFlops(2*a->nz - a->n); 311 return 0; 312 } 313 314 /* ----------------------------------------------------------------*/ 315 /* 316 This code is virtually identical to MatILUFactorSymbolic_SeqAIJ 317 except that the data structure of Mat_SeqAIJ is slightly different. 318 Not a good example of code reuse. 319 */ 320 int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,int levels, 321 Mat *fact) 322 { 323 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b; 324 IS isicol; 325 int *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j; 326 int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 327 int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0; 328 int incrlev,nnz,i,shift = a->indexshift,bs = a->bs; 329 330 if (a->m != a->n) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Matrix must be square"); 331 if (!isrow) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Must have row permutation"); 332 if (!iscol) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Must have column permutation"); 333 334 /* special case that simply copies fill pattern */ 335 if (levels == 0 && ISIsIdentity(isrow) && ISIsIdentity(iscol)) { 336 ierr = MatConvertSameType_SeqBAIJ(A,fact,DO_NOT_COPY_VALUES); CHKERRQ(ierr); 337 (*fact)->factor = FACTOR_LU; 338 b = (Mat_SeqBAIJ *) (*fact)->data; 339 if (!b->diag) { 340 ierr = MatMarkDiag_SeqBAIJ(*fact); CHKERRQ(ierr); 341 } 342 b->row = isrow; 343 b->col = iscol; 344 b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 345 return 0; 346 } 347 348 ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 349 ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 350 ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 351 352 /* get new row pointers */ 353 ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew); 354 ainew[0] = -shift; 355 /* don't know how many column pointers are needed so estimate */ 356 jmax = (int) (f*(ai[n]+!shift)); 357 ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew); 358 /* ajfill is level of fill for each fill entry */ 359 ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill); 360 /* fill is a linked list of nonzeros in active row */ 361 fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill); 362 /* im is level for each filled value */ 363 im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im); 364 /* dloc is location of diagonal in factor */ 365 dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc); 366 dloc[0] = 0; 367 for ( prow=0; prow<n; prow++ ) { 368 /* first copy previous fill into linked list */ 369 nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 370 xi = aj + ai[r[prow]] + shift; 371 fill[n] = n; 372 while (nz--) { 373 fm = n; 374 idx = ic[*xi++ + shift]; 375 do { 376 m = fm; 377 fm = fill[m]; 378 } while (fm < idx); 379 fill[m] = idx; 380 fill[idx] = fm; 381 im[idx] = 0; 382 } 383 nzi = 0; 384 row = fill[n]; 385 while ( row < prow ) { 386 incrlev = im[row] + 1; 387 nz = dloc[row]; 388 xi = ajnew + ainew[row] + shift + nz; 389 flev = ajfill + ainew[row] + shift + nz + 1; 390 nnz = ainew[row+1] - ainew[row] - nz - 1; 391 if (*xi++ + shift != row) { 392 SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:zero pivot"); 393 } 394 fm = row; 395 while (nnz-- > 0) { 396 idx = *xi++ + shift; 397 if (*flev + incrlev > levels) { 398 flev++; 399 continue; 400 } 401 do { 402 m = fm; 403 fm = fill[m]; 404 } while (fm < idx); 405 if (fm != idx) { 406 im[idx] = *flev + incrlev; 407 fill[m] = idx; 408 fill[idx] = fm; 409 fm = idx; 410 nzf++; 411 } 412 else { 413 if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 414 } 415 flev++; 416 } 417 row = fill[row]; 418 nzi++; 419 } 420 /* copy new filled row into permanent storage */ 421 ainew[prow+1] = ainew[prow] + nzf; 422 if (ainew[prow+1] > jmax-shift) { 423 /* allocate a longer ajnew */ 424 int maxadd; 425 maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); 426 if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 427 jmax += maxadd; 428 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 429 PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int)); 430 PetscFree(ajnew); 431 ajnew = xi; 432 /* allocate a longer ajfill */ 433 xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 434 PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int)); 435 PetscFree(ajfill); 436 ajfill = xi; 437 realloc++; 438 } 439 xi = ajnew + ainew[prow] + shift; 440 flev = ajfill + ainew[prow] + shift; 441 dloc[prow] = nzi; 442 fm = fill[n]; 443 while (nzf--) { 444 *xi++ = fm - shift; 445 *flev++ = im[fm]; 446 fm = fill[fm]; 447 } 448 } 449 PetscFree(ajfill); 450 ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 451 ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 452 ierr = ISDestroy(isicol); CHKERRQ(ierr); 453 PetscFree(fill); PetscFree(im); 454 455 PLogInfo((PetscObject)A, 456 "Info:MatILUFactorSymbolic_SeqBAIJ:Realloc %d Fill ratio:given %g needed %g\n", 457 realloc,f,((double)ainew[n])/((double)ai[prow])); 458 459 /* put together the new matrix */ 460 ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr); 461 b = (Mat_SeqBAIJ *) (*fact)->data; 462 PetscFree(b->imax); 463 b->singlemalloc = 0; 464 len = (ainew[n] + shift)*sizeof(Scalar); 465 /* the next line frees the default space generated by the Create() */ 466 PetscFree(b->a); PetscFree(b->ilen); 467 b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 468 b->j = ajnew; 469 b->i = ainew; 470 for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 471 b->diag = dloc; 472 b->ilen = 0; 473 b->imax = 0; 474 b->row = isrow; 475 b->col = iscol; 476 b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar)); 477 CHKPTRQ(b->solve_work); 478 /* In b structure: Free imax, ilen, old a, old j. 479 Allocate dloc, solve_work, new a, new j */ 480 PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 481 b->maxnz = b->nz = ainew[n] + shift; 482 (*fact)->factor = FACTOR_LU; 483 return 0; 484 } 485 486 487 488 489