15d517e7eSBarry Smith #ifndef lint 2*de6a44a3SBarry Smith static char vcid[] = "$Id: baijfact.c,v 1.2 1996/02/15 04:38:54 bsmith Exp bsmith $"; 35d517e7eSBarry Smith #endif 45d517e7eSBarry Smith /* 5ec3a8b7bSBarry Smith Factorization code for BAIJ format. 65d517e7eSBarry Smith */ 75d517e7eSBarry Smith 8ec3a8b7bSBarry Smith #include "baij.h" 9ec3a8b7bSBarry Smith 10ec3a8b7bSBarry Smith /* 11ec3a8b7bSBarry Smith The symbolic factorization code is identical to that for AIJ format, 12ec3a8b7bSBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 13ec3a8b7bSBarry Smith NOT good code reuse. 14ec3a8b7bSBarry Smith */ 15ec3a8b7bSBarry Smith int MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 165d517e7eSBarry Smith { 17ec3a8b7bSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b; 185d517e7eSBarry Smith IS isicol; 19ec3a8b7bSBarry Smith int *r,*ic, ierr, i, n = a->mbs, *ai = a->i, *aj = a->j; 20*de6a44a3SBarry Smith int *ainew,*ajnew, jmax,*fill, *ajtmp, nz, bs = a->bs; 215d517e7eSBarry Smith int *idnew, idx, row,m,fm, nnz, nzi,len, realloc = 0,nzbd,*im; 225d517e7eSBarry Smith 23ec3a8b7bSBarry Smith if (a->m != a->n) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must be square"); 24ec3a8b7bSBarry Smith if (!isrow) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must have row permutation"); 25ec3a8b7bSBarry Smith if (!iscol) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must have col. permutation"); 265d517e7eSBarry Smith 275d517e7eSBarry Smith ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 285d517e7eSBarry Smith ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 295d517e7eSBarry Smith 305d517e7eSBarry Smith /* get new row pointers */ 315d517e7eSBarry Smith ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew); 32*de6a44a3SBarry Smith ainew[0] = 0; 335d517e7eSBarry Smith /* don't know how many column pointers are needed so estimate */ 34*de6a44a3SBarry Smith jmax = (int) (f*ai[n] + 1); 355d517e7eSBarry Smith ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew); 365d517e7eSBarry Smith /* fill is a linked list of nonzeros in active row */ 375d517e7eSBarry Smith fill = (int *) PetscMalloc( (2*n+1)*sizeof(int)); CHKPTRQ(fill); 385d517e7eSBarry Smith im = fill + n + 1; 395d517e7eSBarry Smith /* idnew is location of diagonal in factor */ 405d517e7eSBarry Smith idnew = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(idnew); 41*de6a44a3SBarry Smith idnew[0] = 0; 425d517e7eSBarry Smith 435d517e7eSBarry Smith for ( i=0; i<n; i++ ) { 445d517e7eSBarry Smith /* first copy previous fill into linked list */ 455d517e7eSBarry Smith nnz = nz = ai[r[i]+1] - ai[r[i]]; 46*de6a44a3SBarry Smith ajtmp = aj + ai[r[i]]; 475d517e7eSBarry Smith fill[n] = n; 485d517e7eSBarry Smith while (nz--) { 495d517e7eSBarry Smith fm = n; 50*de6a44a3SBarry Smith idx = ic[*ajtmp++]; 515d517e7eSBarry Smith do { 525d517e7eSBarry Smith m = fm; 535d517e7eSBarry Smith fm = fill[m]; 545d517e7eSBarry Smith } while (fm < idx); 555d517e7eSBarry Smith fill[m] = idx; 565d517e7eSBarry Smith fill[idx] = fm; 575d517e7eSBarry Smith } 585d517e7eSBarry Smith row = fill[n]; 595d517e7eSBarry Smith while ( row < i ) { 60*de6a44a3SBarry Smith ajtmp = ajnew + idnew[row] + 1; 615d517e7eSBarry Smith nzbd = 1 + idnew[row] - ainew[row]; 625d517e7eSBarry Smith nz = im[row] - nzbd; 635d517e7eSBarry Smith fm = row; 645d517e7eSBarry Smith while (nz-- > 0) { 65*de6a44a3SBarry Smith idx = *ajtmp++; 665d517e7eSBarry Smith nzbd++; 675d517e7eSBarry Smith if (idx == i) im[row] = nzbd; 685d517e7eSBarry Smith do { 695d517e7eSBarry Smith m = fm; 705d517e7eSBarry Smith fm = fill[m]; 715d517e7eSBarry Smith } while (fm < idx); 725d517e7eSBarry Smith if (fm != idx) { 735d517e7eSBarry Smith fill[m] = idx; 745d517e7eSBarry Smith fill[idx] = fm; 755d517e7eSBarry Smith fm = idx; 765d517e7eSBarry Smith nnz++; 775d517e7eSBarry Smith } 785d517e7eSBarry Smith } 795d517e7eSBarry Smith row = fill[row]; 805d517e7eSBarry Smith } 815d517e7eSBarry Smith /* copy new filled row into permanent storage */ 825d517e7eSBarry Smith ainew[i+1] = ainew[i] + nnz; 835d517e7eSBarry Smith if (ainew[i+1] > jmax+1) { 845d517e7eSBarry Smith /* allocate a longer ajnew */ 855d517e7eSBarry Smith int maxadd; 86*de6a44a3SBarry Smith maxadd = (int) ((f*(ai[n]+1)*(n-i+5))/n); 875d517e7eSBarry Smith if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 885d517e7eSBarry Smith jmax += maxadd; 895d517e7eSBarry Smith ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 90*de6a44a3SBarry Smith PetscMemcpy(ajtmp,ajnew,ainew[i]*sizeof(int)); 915d517e7eSBarry Smith PetscFree(ajnew); 925d517e7eSBarry Smith ajnew = ajtmp; 935d517e7eSBarry Smith realloc++; /* count how many times we realloc */ 945d517e7eSBarry Smith } 95*de6a44a3SBarry Smith ajtmp = ajnew + ainew[i]; 965d517e7eSBarry Smith fm = fill[n]; 975d517e7eSBarry Smith nzi = 0; 985d517e7eSBarry Smith im[i] = nnz; 995d517e7eSBarry Smith while (nnz--) { 1005d517e7eSBarry Smith if (fm < i) nzi++; 101*de6a44a3SBarry Smith *ajtmp++ = fm; 1025d517e7eSBarry Smith fm = fill[fm]; 1035d517e7eSBarry Smith } 1045d517e7eSBarry Smith idnew[i] = ainew[i] + nzi; 1055d517e7eSBarry Smith } 1065d517e7eSBarry Smith 1075d517e7eSBarry Smith PLogInfo((PetscObject)A, 108ec3a8b7bSBarry Smith "Info:MatLUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 1095d517e7eSBarry Smith realloc,f,((double)ainew[n])/((double)ai[i])); 1105d517e7eSBarry Smith 1115d517e7eSBarry Smith ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 1125d517e7eSBarry Smith ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 1135d517e7eSBarry Smith 1145d517e7eSBarry Smith PetscFree(fill); 1155d517e7eSBarry Smith 1165d517e7eSBarry Smith /* put together the new matrix */ 117ec3a8b7bSBarry Smith ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,B); CHKERRQ(ierr); 1185d517e7eSBarry Smith PLogObjectParent(*B,isicol); 1195d517e7eSBarry Smith ierr = ISDestroy(isicol); CHKERRQ(ierr); 120ec3a8b7bSBarry Smith b = (Mat_SeqBAIJ *) (*B)->data; 1215d517e7eSBarry Smith PetscFree(b->imax); 1225d517e7eSBarry Smith b->singlemalloc = 0; 123*de6a44a3SBarry Smith len = ainew[n]*sizeof(Scalar); 1245d517e7eSBarry Smith /* the next line frees the default space generated by the Create() */ 1255d517e7eSBarry Smith PetscFree(b->a); PetscFree(b->ilen); 126ec3a8b7bSBarry Smith b->a = (Scalar *) PetscMalloc( len*bs*bs ); CHKPTRQ(b->a); 1275d517e7eSBarry Smith b->j = ajnew; 1285d517e7eSBarry Smith b->i = ainew; 1295d517e7eSBarry Smith b->diag = idnew; 1305d517e7eSBarry Smith b->ilen = 0; 1315d517e7eSBarry Smith b->imax = 0; 1325d517e7eSBarry Smith b->row = isrow; 1335d517e7eSBarry Smith b->col = iscol; 134*de6a44a3SBarry Smith b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar)); 1355d517e7eSBarry Smith CHKPTRQ(b->solve_work); 1365d517e7eSBarry Smith /* In b structure: Free imax, ilen, old a, old j. 1375d517e7eSBarry Smith Allocate idnew, solve_work, new a, new j */ 138*de6a44a3SBarry Smith PLogObjectMemory(*B,(ainew[n]-n)*(sizeof(int)+sizeof(Scalar))); 139*de6a44a3SBarry Smith b->maxnz = b->nz = ainew[n]; 1405d517e7eSBarry Smith 1415d517e7eSBarry Smith return 0; 1425d517e7eSBarry Smith } 1435d517e7eSBarry Smith 144ec3a8b7bSBarry Smith #include "pinclude/plapack.h" 145ec3a8b7bSBarry Smith 146ec3a8b7bSBarry Smith #define BlockZero(v,idx) {PetscMemzero(v+bs2*(idx),bs2*sizeof(Scalar));} 147ec3a8b7bSBarry Smith 148ec3a8b7bSBarry Smith #define BlockCopy(v_in,v_out,idx_in,idx_out) \ 149ec3a8b7bSBarry Smith {PetscMemcpy(v_out + bs2*(idx_out),v_in + bs2*(idx_in),bs2*sizeof(Scalar));} 150ec3a8b7bSBarry Smith 151ec3a8b7bSBarry Smith #define BlockInvert(vv,idx) \ 152ec3a8b7bSBarry Smith { int _i,info; Scalar *w = vv + bs2*idx; \ 153ec3a8b7bSBarry Smith LAgetrf_(&bs,&bs,w,&bs,v_pivots,&info); CHKERRQ(info); \ 154ec3a8b7bSBarry Smith PetscMemzero(v_work,bs2*sizeof(Scalar)); \ 155ec3a8b7bSBarry Smith for ( _i=0; _i<bs; _i++ ) { v_work[_i + bs*_i] = 1.0; } \ 156ec3a8b7bSBarry Smith LAgetrs_("N",&bs,&bs,w,&bs,v_pivots,v_work,&bs, &info);CHKERRQ(info);\ 157ec3a8b7bSBarry Smith PetscMemcpy(w,v_work,bs2*sizeof(Scalar)); \ 158ec3a8b7bSBarry Smith } 159ec3a8b7bSBarry Smith 160*de6a44a3SBarry Smith #define BlockMult(v1,v2,v3) \ 161*de6a44a3SBarry Smith {Scalar _DOne=1.0, _DZero=0.0;\ 162*de6a44a3SBarry Smith BLgemm_("N","N",&bs,&bs,&bs,&_DOne,v1,&bs,v2,&bs,&_DZero,v3,&bs);} 163*de6a44a3SBarry Smith 164*de6a44a3SBarry Smith #define BlockMultSub(v1,v2,v3,idx2,idx3) \ 165*de6a44a3SBarry Smith {Scalar _DOne=1.0, _DMOne=-1.0;\ 166*de6a44a3SBarry Smith BLgemm_("N","N",&bs,&bs,&bs,&_DMOne,v1,&bs,v2+bs2*idx2,&bs,&_DOne,v3+bs2*idx3,&bs);} 167*de6a44a3SBarry Smith 168ec3a8b7bSBarry Smith /* ----------------------------------------------------------- */ 169ec3a8b7bSBarry Smith int MatLUFactorNumeric_SeqBAIJ(Mat A,Mat *B) 1705d517e7eSBarry Smith { 1715d517e7eSBarry Smith Mat C = *B; 172ec3a8b7bSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b = (Mat_SeqBAIJ *)C->data; 1735d517e7eSBarry Smith IS iscol = b->col, isrow = b->row, isicol; 174ec3a8b7bSBarry Smith int *r,*ic, ierr, i, j, n = a->mbs, *ai = b->i, *aj = b->j; 175*de6a44a3SBarry Smith int *ajtmpold, *ajtmp, nz, row, bslog; 176ec3a8b7bSBarry Smith int *diag_offset = b->diag,diag,bs = a->bs,bs2 = bs*bs, *v_pivots; 177*de6a44a3SBarry Smith register Scalar *pv,*v,*rtmp,*multiplier,*v_work,*pc; 1785d517e7eSBarry Smith register int *pj; 1795d517e7eSBarry Smith 1805d517e7eSBarry Smith ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 1815d517e7eSBarry Smith PLogObjectParent(*B,isicol); 1825d517e7eSBarry Smith ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 1835d517e7eSBarry Smith ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 184ec3a8b7bSBarry Smith rtmp = (Scalar *) PetscMalloc(bs2*(n+1)*sizeof(Scalar));CHKPTRQ(rtmp); 1855d517e7eSBarry Smith 186ec3a8b7bSBarry Smith /* generate work space needed by dense LU factorization */ 187*de6a44a3SBarry Smith v_work = (Scalar *) PetscMalloc((bs+2*bs2)*sizeof(Scalar));CHKPTRQ(v_work); 188*de6a44a3SBarry Smith multiplier = v_work + bs2; 189*de6a44a3SBarry Smith v_pivots = (int *) (multiplier + bs2); 190*de6a44a3SBarry Smith 191*de6a44a3SBarry Smith /* flops in while loop */ 192*de6a44a3SBarry Smith bslog = 4*bs*bs2 - bs; 1935d517e7eSBarry Smith 1945d517e7eSBarry Smith for ( i=0; i<n; i++ ) { 1955d517e7eSBarry Smith nz = ai[i+1] - ai[i]; 196*de6a44a3SBarry Smith ajtmp = aj + ai[i]; 197*de6a44a3SBarry Smith for ( j=0; j<nz; j++ ) BlockZero(rtmp,ajtmp[j]); 1985d517e7eSBarry Smith /* load in initial (unfactored row) */ 1995d517e7eSBarry Smith nz = a->i[r[i]+1] - a->i[r[i]]; 200*de6a44a3SBarry Smith ajtmpold = a->j + a->i[r[i]]; 201*de6a44a3SBarry Smith v = a->a + bs2*a->i[r[i]]; 202*de6a44a3SBarry Smith for ( j=0; j<nz; j++ ) BlockCopy(v,rtmp,j,ic[ajtmpold[j]]); 203*de6a44a3SBarry Smith row = *ajtmp++; 2045d517e7eSBarry Smith while (row < i) { 205*de6a44a3SBarry Smith pc = rtmp + bs2*row; 206*de6a44a3SBarry Smith /* if (*pc) { */ 207*de6a44a3SBarry Smith pv = b->a + bs2*diag_offset[row]; 208*de6a44a3SBarry Smith pj = b->j + diag_offset[row] + 1; 209*de6a44a3SBarry Smith BlockMult(pc,pv,multiplier); 210*de6a44a3SBarry Smith BlockCopy(multiplier,pc,0,0); 2115d517e7eSBarry Smith nz = ai[row+1] - diag_offset[row] - 1; 212ec3a8b7bSBarry Smith pv += bs2; 213*de6a44a3SBarry Smith for (j=0; j<nz; j++) BlockMultSub(multiplier,pv,rtmp,j,pj[j]); 214*de6a44a3SBarry Smith PLogFlops(bslog*nz); 215*de6a44a3SBarry Smith /* } */ 216*de6a44a3SBarry Smith row = *ajtmp++; 2175d517e7eSBarry Smith } 2185d517e7eSBarry Smith /* finished row so stick it into b->a */ 219*de6a44a3SBarry Smith pv = b->a + bs2*ai[i]; 220*de6a44a3SBarry Smith pj = b->j + ai[i]; 2215d517e7eSBarry Smith nz = ai[i+1] - ai[i]; 222*de6a44a3SBarry Smith for ( j=0; j<nz; j++ ) BlockCopy(rtmp,pv,pj[j],j); 2235d517e7eSBarry Smith diag = diag_offset[i] - ai[i]; 224ec3a8b7bSBarry Smith /* invert diagonal block */ 225ec3a8b7bSBarry Smith BlockInvert(pv,diag); 2265d517e7eSBarry Smith } 2275d517e7eSBarry Smith 228*de6a44a3SBarry Smith PetscFree(rtmp); PetscFree(v_work); 2295d517e7eSBarry Smith ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 2305d517e7eSBarry Smith ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 2315d517e7eSBarry Smith ierr = ISDestroy(isicol); CHKERRQ(ierr); 2325d517e7eSBarry Smith C->factor = FACTOR_LU; 2335d517e7eSBarry Smith C->assembled = PETSC_TRUE; 234*de6a44a3SBarry Smith PLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */ 2355d517e7eSBarry Smith return 0; 2365d517e7eSBarry Smith } 2375d517e7eSBarry Smith /* ----------------------------------------------------------- */ 238ec3a8b7bSBarry Smith int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,double f) 2395d517e7eSBarry Smith { 240ec3a8b7bSBarry Smith Mat_SeqBAIJ *mat = (Mat_SeqBAIJ *) A->data; 2415d517e7eSBarry Smith int ierr; 2425d517e7eSBarry Smith Mat C; 2435d517e7eSBarry Smith 244ec3a8b7bSBarry Smith ierr = MatLUFactorSymbolic_SeqBAIJ(A,row,col,f,&C); CHKERRQ(ierr); 245ec3a8b7bSBarry Smith ierr = MatLUFactorNumeric_SeqBAIJ(A,&C); CHKERRQ(ierr); 2465d517e7eSBarry Smith 2475d517e7eSBarry Smith /* free all the data structures from mat */ 2485d517e7eSBarry Smith PetscFree(mat->a); 2495d517e7eSBarry Smith if (!mat->singlemalloc) {PetscFree(mat->i); PetscFree(mat->j);} 2505d517e7eSBarry Smith if (mat->diag) PetscFree(mat->diag); 2515d517e7eSBarry Smith if (mat->ilen) PetscFree(mat->ilen); 2525d517e7eSBarry Smith if (mat->imax) PetscFree(mat->imax); 2535d517e7eSBarry Smith if (mat->solve_work) PetscFree(mat->solve_work); 2545d517e7eSBarry Smith PetscFree(mat); 2555d517e7eSBarry Smith 2565d517e7eSBarry Smith PetscMemcpy(A,C,sizeof(struct _Mat)); 2575d517e7eSBarry Smith PetscHeaderDestroy(C); 2585d517e7eSBarry Smith return 0; 2595d517e7eSBarry Smith } 2605d517e7eSBarry Smith /* ----------------------------------------------------------- */ 261ec3a8b7bSBarry Smith int MatSolve_SeqBAIJ(Mat A,Vec bb, Vec xx) 2625d517e7eSBarry Smith { 263ec3a8b7bSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 2645d517e7eSBarry Smith IS iscol = a->col, isrow = a->row; 265*de6a44a3SBarry Smith int *r,*c, ierr, i, n = a->mbs, *vi, *ai = a->i, *aj = a->j; 266*de6a44a3SBarry Smith int nz,bs = a->bs,bs2 = bs*bs,idx,idt,idc, _One = 1; 267*de6a44a3SBarry Smith Scalar *xa,*ba,*aa = a->a, *sum, _DOne = 1.0, _DMOne = -1.0; 268*de6a44a3SBarry Smith Scalar _DZero = 0.0,sum1,sum2,sum3,sum4,sum5,x1,x2,x3,x4,x5; 269*de6a44a3SBarry Smith register Scalar *x, *b, *lsum, *tmp, *v; 2705d517e7eSBarry Smith 271ec3a8b7bSBarry Smith if (A->factor != FACTOR_LU) SETERRQ(1,"MatSolve_SeqBAIJ:Not for unfactored matrix"); 2725d517e7eSBarry Smith 273*de6a44a3SBarry Smith ierr = VecGetArray(bb,&ba); CHKERRQ(ierr); b = ba; 274*de6a44a3SBarry Smith ierr = VecGetArray(xx,&xa); CHKERRQ(ierr); x = xa; 2755d517e7eSBarry Smith tmp = a->solve_work; 2765d517e7eSBarry Smith 2775d517e7eSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2785d517e7eSBarry Smith ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); c = c + (n-1); 2795d517e7eSBarry Smith 280*de6a44a3SBarry Smith switch (bs) { 281*de6a44a3SBarry Smith case 1: 2825d517e7eSBarry Smith /* forward solve the lower triangular */ 2835d517e7eSBarry Smith tmp[0] = b[*r++]; 2845d517e7eSBarry Smith for ( i=1; i<n; i++ ) { 285*de6a44a3SBarry Smith v = aa + ai[i]; 286*de6a44a3SBarry Smith vi = aj + ai[i]; 2875d517e7eSBarry Smith nz = a->diag[i] - ai[i]; 288*de6a44a3SBarry Smith sum1 = b[*r++]; 289*de6a44a3SBarry Smith while (nz--) { 290*de6a44a3SBarry Smith sum1 -= (*v++)*tmp[*vi++]; 2915d517e7eSBarry Smith } 292*de6a44a3SBarry Smith tmp[i] = sum1; 293*de6a44a3SBarry Smith } 2945d517e7eSBarry Smith /* backward solve the upper triangular */ 2955d517e7eSBarry Smith for ( i=n-1; i>=0; i-- ){ 296*de6a44a3SBarry Smith v = aa + a->diag[i] + 1; 297*de6a44a3SBarry Smith vi = aj + a->diag[i] + 1; 2985d517e7eSBarry Smith nz = ai[i+1] - a->diag[i] - 1; 299*de6a44a3SBarry Smith sum1 = tmp[i]; 300*de6a44a3SBarry Smith while (nz--) { 301*de6a44a3SBarry Smith sum1 -= (*v++)*tmp[*vi++]; 302*de6a44a3SBarry Smith } 303*de6a44a3SBarry Smith x[*c--] = tmp[i] = aa[a->diag[i]]*sum1; 304*de6a44a3SBarry Smith } 305*de6a44a3SBarry Smith break; 306*de6a44a3SBarry Smith case 2: 307*de6a44a3SBarry Smith /* forward solve the lower triangular */ 308*de6a44a3SBarry Smith idx = 2*(*r++); 309*de6a44a3SBarry Smith tmp[0] = b[idx]; tmp[1] = b[1+idx]; 310*de6a44a3SBarry Smith for ( i=1; i<n; i++ ) { 311*de6a44a3SBarry Smith v = aa + 4*ai[i]; 312*de6a44a3SBarry Smith vi = aj + ai[i]; 313*de6a44a3SBarry Smith nz = a->diag[i] - ai[i]; 314*de6a44a3SBarry Smith idx = 2*(*r++); 315*de6a44a3SBarry Smith sum1 = b[idx]; sum2 = b[1+idx]; 316*de6a44a3SBarry Smith while (nz--) { 317*de6a44a3SBarry Smith idx = 2*(*vi++); 318*de6a44a3SBarry Smith x1 = tmp[idx]; x2 = tmp[1+idx]; 319*de6a44a3SBarry Smith sum1 -= v[0]*x1 + v[2]*x2; 320*de6a44a3SBarry Smith sum2 -= v[1]*x1 + v[3]*x2; 321*de6a44a3SBarry Smith v += 4; 322*de6a44a3SBarry Smith } 323*de6a44a3SBarry Smith idx = 2*i; 324*de6a44a3SBarry Smith tmp[idx] = sum1; tmp[1+idx] = sum2; 325*de6a44a3SBarry Smith } 326*de6a44a3SBarry Smith /* backward solve the upper triangular */ 327*de6a44a3SBarry Smith for ( i=n-1; i>=0; i-- ){ 328*de6a44a3SBarry Smith v = aa + 4*a->diag[i] + 4; 329*de6a44a3SBarry Smith vi = aj + a->diag[i] + 1; 330*de6a44a3SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 331*de6a44a3SBarry Smith idt = 2*i; 332*de6a44a3SBarry Smith sum1 = tmp[idt]; sum2 = tmp[1+idt]; 333*de6a44a3SBarry Smith while (nz--) { 334*de6a44a3SBarry Smith idx = 2*(*vi++); 335*de6a44a3SBarry Smith x1 = tmp[idx]; x2 = tmp[1+idx]; 336*de6a44a3SBarry Smith sum1 -= v[0]*x1 + v[2]*x2; 337*de6a44a3SBarry Smith sum2 -= v[1]*x1 + v[3]*x2; 338*de6a44a3SBarry Smith v += 4; 339*de6a44a3SBarry Smith } 340*de6a44a3SBarry Smith idc = 2*(*c--); 341*de6a44a3SBarry Smith v = aa + 4*a->diag[i]; 342*de6a44a3SBarry Smith x[idc] = tmp[idt] = v[0]*sum1 + v[2]*sum2; 343*de6a44a3SBarry Smith x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[3]*sum2; 344*de6a44a3SBarry Smith } 345*de6a44a3SBarry Smith break; 346*de6a44a3SBarry Smith case 3: 347*de6a44a3SBarry Smith /* forward solve the lower triangular */ 348*de6a44a3SBarry Smith idx = 3*(*r++); 349*de6a44a3SBarry Smith tmp[0] = b[idx]; tmp[1] = b[1+idx]; tmp[2] = b[2+idx]; 350*de6a44a3SBarry Smith for ( i=1; i<n; i++ ) { 351*de6a44a3SBarry Smith v = aa + 9*ai[i]; 352*de6a44a3SBarry Smith vi = aj + ai[i]; 353*de6a44a3SBarry Smith nz = a->diag[i] - ai[i]; 354*de6a44a3SBarry Smith idx = 3*(*r++); 355*de6a44a3SBarry Smith sum1 = b[idx]; sum2 = b[1+idx]; sum3 = b[2+idx]; 356*de6a44a3SBarry Smith while (nz--) { 357*de6a44a3SBarry Smith idx = 3*(*vi++); 358*de6a44a3SBarry Smith x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 359*de6a44a3SBarry Smith sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 360*de6a44a3SBarry Smith sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 361*de6a44a3SBarry Smith sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 362*de6a44a3SBarry Smith v += 9; 363*de6a44a3SBarry Smith } 364*de6a44a3SBarry Smith idx = 3*i; 365*de6a44a3SBarry Smith tmp[idx] = sum1; tmp[1+idx] = sum2; tmp[2+idx] = sum3; 366*de6a44a3SBarry Smith } 367*de6a44a3SBarry Smith /* backward solve the upper triangular */ 368*de6a44a3SBarry Smith for ( i=n-1; i>=0; i-- ){ 369*de6a44a3SBarry Smith v = aa + 9*a->diag[i] + 9; 370*de6a44a3SBarry Smith vi = aj + a->diag[i] + 1; 371*de6a44a3SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 372*de6a44a3SBarry Smith idt = 3*i; 373*de6a44a3SBarry Smith sum1 = tmp[idt]; sum2 = tmp[1+idt]; sum3 = tmp[2+idt]; 374*de6a44a3SBarry Smith while (nz--) { 375*de6a44a3SBarry Smith idx = 3*(*vi++); 376*de6a44a3SBarry Smith x1 = tmp[idx]; x2 = tmp[1+idx]; x3 = tmp[2+idx]; 377*de6a44a3SBarry Smith sum1 -= v[0]*x1 + v[3]*x2 + v[6]*x3; 378*de6a44a3SBarry Smith sum2 -= v[1]*x1 + v[4]*x2 + v[7]*x3; 379*de6a44a3SBarry Smith sum3 -= v[2]*x1 + v[5]*x2 + v[8]*x3; 380*de6a44a3SBarry Smith v += 9; 381*de6a44a3SBarry Smith } 382*de6a44a3SBarry Smith idc = 3*(*c--); 383*de6a44a3SBarry Smith v = aa + 9*a->diag[i]; 384*de6a44a3SBarry Smith x[idc] = tmp[idt] = v[0]*sum1 + v[3]*sum2 + v[6]*sum3; 385*de6a44a3SBarry Smith x[1+idc] = tmp[1+idt] = v[1]*sum1 + v[4]*sum2 + v[7]*sum3; 386*de6a44a3SBarry Smith x[2+idc] = tmp[2+idt] = v[2]*sum1 + v[5]*sum2 + v[8]*sum3; 387*de6a44a3SBarry Smith } 388*de6a44a3SBarry Smith break; 389*de6a44a3SBarry Smith case 4: 390*de6a44a3SBarry Smith /* forward solve the lower triangular */ 391*de6a44a3SBarry Smith idx = 4*(*r++); 392*de6a44a3SBarry Smith tmp[0] = b[idx]; tmp[1] = b[1+idx]; 393*de6a44a3SBarry Smith tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; 394*de6a44a3SBarry Smith for ( i=1; i<n; i++ ) { 395*de6a44a3SBarry Smith v = aa + 16*ai[i]; 396*de6a44a3SBarry Smith vi = aj + ai[i]; 397*de6a44a3SBarry Smith nz = a->diag[i] - ai[i]; 398*de6a44a3SBarry Smith idx = 4*(*r++); 399*de6a44a3SBarry Smith sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 400*de6a44a3SBarry Smith while (nz--) { 401*de6a44a3SBarry Smith idx = 4*(*vi++); 402*de6a44a3SBarry Smith x1 = tmp[idx];x2 = tmp[1+idx];x3 = tmp[2+idx];x4 = tmp[3+idx]; 403*de6a44a3SBarry Smith sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 404*de6a44a3SBarry Smith sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 405*de6a44a3SBarry Smith sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 406*de6a44a3SBarry Smith sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 407*de6a44a3SBarry Smith v += 16; 408*de6a44a3SBarry Smith } 409*de6a44a3SBarry Smith idx = 4*i; 410*de6a44a3SBarry Smith tmp[idx] = sum1;tmp[1+idx] = sum2; 411*de6a44a3SBarry Smith tmp[2+idx] = sum3;tmp[3+idx] = sum4; 412*de6a44a3SBarry Smith } 413*de6a44a3SBarry Smith /* backward solve the upper triangular */ 414*de6a44a3SBarry Smith for ( i=n-1; i>=0; i-- ){ 415*de6a44a3SBarry Smith v = aa + 16*a->diag[i] + 16; 416*de6a44a3SBarry Smith vi = aj + a->diag[i] + 1; 417*de6a44a3SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 418*de6a44a3SBarry Smith idt = 4*i; 419*de6a44a3SBarry Smith sum1 = tmp[idt]; sum2 = tmp[1+idt]; 420*de6a44a3SBarry Smith sum3 = tmp[2+idt];sum4 = tmp[3+idt]; 421*de6a44a3SBarry Smith while (nz--) { 422*de6a44a3SBarry Smith idx = 4*(*vi++); 423*de6a44a3SBarry Smith x1 = tmp[idx]; x2 = tmp[1+idx]; 424*de6a44a3SBarry Smith x3 = tmp[2+idx]; x4 = tmp[3+idx]; 425*de6a44a3SBarry Smith sum1 -= v[0]*x1 + v[4]*x2 + v[8]*x3 + v[12]*x4; 426*de6a44a3SBarry Smith sum2 -= v[1]*x1 + v[5]*x2 + v[9]*x3 + v[13]*x4; 427*de6a44a3SBarry Smith sum3 -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4; 428*de6a44a3SBarry Smith sum4 -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4; 429*de6a44a3SBarry Smith v += 16; 430*de6a44a3SBarry Smith } 431*de6a44a3SBarry Smith idc = 4*(*c--); 432*de6a44a3SBarry Smith v = aa + 16*a->diag[i]; 433*de6a44a3SBarry Smith x[idc] = tmp[idt] = v[0]*sum1+v[4]*sum2+v[8]*sum3+v[12]*sum4; 434*de6a44a3SBarry Smith x[1+idc] = tmp[1+idt] = v[1]*sum1+v[5]*sum2+v[9]*sum3+v[13]*sum4; 435*de6a44a3SBarry Smith x[2+idc] = tmp[2+idt] = v[2]*sum1+v[6]*sum2+v[10]*sum3+v[14]*sum4; 436*de6a44a3SBarry Smith x[3+idc] = tmp[3+idt] = v[3]*sum1+v[7]*sum2+v[11]*sum3+v[15]*sum4; 437*de6a44a3SBarry Smith } 438*de6a44a3SBarry Smith break; 439*de6a44a3SBarry Smith case 5: 440*de6a44a3SBarry Smith /* forward solve the lower triangular */ 441*de6a44a3SBarry Smith idx = 5*(*r++); 442*de6a44a3SBarry Smith tmp[0] = b[idx]; tmp[1] = b[1+idx]; 443*de6a44a3SBarry Smith tmp[2] = b[2+idx]; tmp[3] = b[3+idx]; tmp[4] = b[4+idx]; 444*de6a44a3SBarry Smith for ( i=1; i<n; i++ ) { 445*de6a44a3SBarry Smith v = aa + 25*ai[i]; 446*de6a44a3SBarry Smith vi = aj + ai[i]; 447*de6a44a3SBarry Smith nz = a->diag[i] - ai[i]; 448*de6a44a3SBarry Smith idx = 5*(*r++); 449*de6a44a3SBarry Smith sum1 = b[idx];sum2 = b[1+idx];sum3 = b[2+idx];sum4 = b[3+idx]; 450*de6a44a3SBarry Smith sum5 = b[4+idx]; 451*de6a44a3SBarry Smith while (nz--) { 452*de6a44a3SBarry Smith idx = 5*(*vi++); 453*de6a44a3SBarry Smith x1 = tmp[idx]; x2 = tmp[1+idx];x3 = tmp[2+idx]; 454*de6a44a3SBarry Smith x4 = tmp[3+idx];x5 = tmp[4+idx]; 455*de6a44a3SBarry Smith sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 456*de6a44a3SBarry Smith sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 457*de6a44a3SBarry Smith sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 458*de6a44a3SBarry Smith sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 459*de6a44a3SBarry Smith sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 460*de6a44a3SBarry Smith v += 25; 461*de6a44a3SBarry Smith } 462*de6a44a3SBarry Smith idx = 5*i; 463*de6a44a3SBarry Smith tmp[idx] = sum1;tmp[1+idx] = sum2; 464*de6a44a3SBarry Smith tmp[2+idx] = sum3;tmp[3+idx] = sum4; tmp[4+idx] = sum5; 465*de6a44a3SBarry Smith } 466*de6a44a3SBarry Smith /* backward solve the upper triangular */ 467*de6a44a3SBarry Smith for ( i=n-1; i>=0; i-- ){ 468*de6a44a3SBarry Smith v = aa + 25*a->diag[i] + 25; 469*de6a44a3SBarry Smith vi = aj + a->diag[i] + 1; 470*de6a44a3SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 471*de6a44a3SBarry Smith idt = 5*i; 472*de6a44a3SBarry Smith sum1 = tmp[idt]; sum2 = tmp[1+idt]; 473*de6a44a3SBarry Smith sum3 = tmp[2+idt];sum4 = tmp[3+idt]; sum5 = tmp[4+idt]; 474*de6a44a3SBarry Smith while (nz--) { 475*de6a44a3SBarry Smith idx = 5*(*vi++); 476*de6a44a3SBarry Smith x1 = tmp[idx]; x2 = tmp[1+idx]; 477*de6a44a3SBarry Smith x3 = tmp[2+idx]; x4 = tmp[3+idx]; x5 = tmp[4+idx]; 478*de6a44a3SBarry Smith sum1 -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5; 479*de6a44a3SBarry Smith sum2 -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5; 480*de6a44a3SBarry Smith sum3 -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5; 481*de6a44a3SBarry Smith sum4 -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5; 482*de6a44a3SBarry Smith sum5 -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5; 483*de6a44a3SBarry Smith v += 25; 484*de6a44a3SBarry Smith } 485*de6a44a3SBarry Smith idc = 5*(*c--); 486*de6a44a3SBarry Smith v = aa + 25*a->diag[i]; 487*de6a44a3SBarry Smith x[idc] = tmp[idt] = v[0]*sum1+v[5]*sum2+v[10]*sum3+ 488*de6a44a3SBarry Smith v[15]*sum4+v[20]*sum5; 489*de6a44a3SBarry Smith x[1+idc] = tmp[1+idt] = v[1]*sum1+v[6]*sum2+v[11]*sum3+ 490*de6a44a3SBarry Smith v[16]*sum4+v[21]*sum5; 491*de6a44a3SBarry Smith x[2+idc] = tmp[2+idt] = v[2]*sum1+v[7]*sum2+v[12]*sum3+ 492*de6a44a3SBarry Smith v[17]*sum4+v[22]*sum5; 493*de6a44a3SBarry Smith x[3+idc] = tmp[3+idt] = v[3]*sum1+v[8]*sum2+v[13]*sum3+ 494*de6a44a3SBarry Smith v[18]*sum4+v[23]*sum5; 495*de6a44a3SBarry Smith x[4+idc] = tmp[4+idt] = v[4]*sum1+v[9]*sum2+v[14]*sum3+ 496*de6a44a3SBarry Smith v[19]*sum4+v[24]*sum5; 497*de6a44a3SBarry Smith } 498*de6a44a3SBarry Smith break; 499*de6a44a3SBarry Smith default: { 500*de6a44a3SBarry Smith /* forward solve the lower triangular */ 501*de6a44a3SBarry Smith PetscMemcpy(tmp,b + bs*(*r++), bs*sizeof(Scalar)); 502*de6a44a3SBarry Smith for ( i=1; i<n; i++ ) { 503*de6a44a3SBarry Smith v = aa + bs2*ai[i]; 504*de6a44a3SBarry Smith vi = aj + ai[i]; 505*de6a44a3SBarry Smith nz = a->diag[i] - ai[i]; 506*de6a44a3SBarry Smith sum = tmp + bs*i; 507*de6a44a3SBarry Smith PetscMemcpy(sum,b+bs*(*r++),bs*sizeof(Scalar)); 508*de6a44a3SBarry Smith while (nz--) { 509*de6a44a3SBarry Smith LAgemv_("N",&bs,&bs,&_DMOne,v,&bs,tmp+bs*(*vi++),&_One,&_DOne,sum,&_One); 510*de6a44a3SBarry Smith v += bs2; 511*de6a44a3SBarry Smith } 512*de6a44a3SBarry Smith } 513*de6a44a3SBarry Smith /* backward solve the upper triangular */ 514*de6a44a3SBarry Smith lsum = a->solve_work + a->n; 515*de6a44a3SBarry Smith for ( i=n-1; i>=0; i-- ){ 516*de6a44a3SBarry Smith v = aa + bs2*(a->diag[i] + 1); 517*de6a44a3SBarry Smith vi = aj + a->diag[i] + 1; 518*de6a44a3SBarry Smith nz = ai[i+1] - a->diag[i] - 1; 519*de6a44a3SBarry Smith PetscMemcpy(lsum,tmp+i*bs,bs*sizeof(Scalar)); 520*de6a44a3SBarry Smith while (nz--) { 521*de6a44a3SBarry Smith LAgemv_("N",&bs,&bs,&_DMOne,v,&bs,tmp+bs*(*vi++),&_One,&_DOne,lsum,&_One); 522*de6a44a3SBarry Smith v += bs2; 523*de6a44a3SBarry Smith } 524*de6a44a3SBarry Smith LAgemv_("N",&bs,&bs,&_DOne,aa+bs2*a->diag[i],&bs,lsum,&_One,&_DZero, 525*de6a44a3SBarry Smith tmp+i*bs,&_One); 526*de6a44a3SBarry Smith PetscMemcpy(x + bs*(*c--),tmp+i*bs,bs*sizeof(Scalar)); 527*de6a44a3SBarry Smith } 528*de6a44a3SBarry Smith } 5295d517e7eSBarry Smith } 5305d517e7eSBarry Smith 5315d517e7eSBarry Smith ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 5325d517e7eSBarry Smith ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr); 533*de6a44a3SBarry Smith PLogFlops(2*bs2*a->nz - a->n); 5345d517e7eSBarry Smith return 0; 5355d517e7eSBarry Smith } 5365d517e7eSBarry Smith 5375d517e7eSBarry Smith /* ----------------------------------------------------------------*/ 538ec3a8b7bSBarry Smith /* 539ec3a8b7bSBarry Smith This code is virtually identical to MatILUFactorSymbolic_SeqAIJ 540ec3a8b7bSBarry Smith except that the data structure of Mat_SeqAIJ is slightly different. 541ec3a8b7bSBarry Smith Not a good example of code reuse. 542ec3a8b7bSBarry Smith */ 543ec3a8b7bSBarry Smith int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,int levels, 544ec3a8b7bSBarry Smith Mat *fact) 5455d517e7eSBarry Smith { 546ec3a8b7bSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b; 5475d517e7eSBarry Smith IS isicol; 548ec3a8b7bSBarry Smith int *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j; 5495d517e7eSBarry Smith int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 5505d517e7eSBarry Smith int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0; 551*de6a44a3SBarry Smith int incrlev,nnz,i,bs = a->bs; 5525d517e7eSBarry Smith 553ec3a8b7bSBarry Smith if (a->m != a->n) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Matrix must be square"); 554ec3a8b7bSBarry Smith if (!isrow) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Must have row permutation"); 555ec3a8b7bSBarry Smith if (!iscol) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Must have column permutation"); 5565d517e7eSBarry Smith 5575d517e7eSBarry Smith /* special case that simply copies fill pattern */ 5585d517e7eSBarry Smith if (levels == 0 && ISIsIdentity(isrow) && ISIsIdentity(iscol)) { 559ec3a8b7bSBarry Smith ierr = MatConvertSameType_SeqBAIJ(A,fact,DO_NOT_COPY_VALUES); CHKERRQ(ierr); 5605d517e7eSBarry Smith (*fact)->factor = FACTOR_LU; 561ec3a8b7bSBarry Smith b = (Mat_SeqBAIJ *) (*fact)->data; 5625d517e7eSBarry Smith if (!b->diag) { 563ec3a8b7bSBarry Smith ierr = MatMarkDiag_SeqBAIJ(*fact); CHKERRQ(ierr); 5645d517e7eSBarry Smith } 5655d517e7eSBarry Smith b->row = isrow; 5665d517e7eSBarry Smith b->col = iscol; 567*de6a44a3SBarry Smith b->solve_work = (Scalar *) PetscMalloc((b->m+1+b->bs)*sizeof(Scalar));CHKPTRQ(b->solve_work); 5685d517e7eSBarry Smith return 0; 5695d517e7eSBarry Smith } 5705d517e7eSBarry Smith 5715d517e7eSBarry Smith ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 5725d517e7eSBarry Smith ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 5735d517e7eSBarry Smith ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 5745d517e7eSBarry Smith 5755d517e7eSBarry Smith /* get new row pointers */ 5765d517e7eSBarry Smith ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew); 577*de6a44a3SBarry Smith ainew[0] = 0; 5785d517e7eSBarry Smith /* don't know how many column pointers are needed so estimate */ 579*de6a44a3SBarry Smith jmax = (int) (f*ai[n] + 1); 5805d517e7eSBarry Smith ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew); 5815d517e7eSBarry Smith /* ajfill is level of fill for each fill entry */ 5825d517e7eSBarry Smith ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill); 5835d517e7eSBarry Smith /* fill is a linked list of nonzeros in active row */ 5845d517e7eSBarry Smith fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill); 5855d517e7eSBarry Smith /* im is level for each filled value */ 5865d517e7eSBarry Smith im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im); 5875d517e7eSBarry Smith /* dloc is location of diagonal in factor */ 5885d517e7eSBarry Smith dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc); 5895d517e7eSBarry Smith dloc[0] = 0; 5905d517e7eSBarry Smith for ( prow=0; prow<n; prow++ ) { 5915d517e7eSBarry Smith /* first copy previous fill into linked list */ 5925d517e7eSBarry Smith nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 593*de6a44a3SBarry Smith xi = aj + ai[r[prow]]; 5945d517e7eSBarry Smith fill[n] = n; 5955d517e7eSBarry Smith while (nz--) { 5965d517e7eSBarry Smith fm = n; 597*de6a44a3SBarry Smith idx = ic[*xi++]; 5985d517e7eSBarry Smith do { 5995d517e7eSBarry Smith m = fm; 6005d517e7eSBarry Smith fm = fill[m]; 6015d517e7eSBarry Smith } while (fm < idx); 6025d517e7eSBarry Smith fill[m] = idx; 6035d517e7eSBarry Smith fill[idx] = fm; 6045d517e7eSBarry Smith im[idx] = 0; 6055d517e7eSBarry Smith } 6065d517e7eSBarry Smith nzi = 0; 6075d517e7eSBarry Smith row = fill[n]; 6085d517e7eSBarry Smith while ( row < prow ) { 6095d517e7eSBarry Smith incrlev = im[row] + 1; 6105d517e7eSBarry Smith nz = dloc[row]; 611*de6a44a3SBarry Smith xi = ajnew + ainew[row] + nz; 612*de6a44a3SBarry Smith flev = ajfill + ainew[row] + nz + 1; 6135d517e7eSBarry Smith nnz = ainew[row+1] - ainew[row] - nz - 1; 614*de6a44a3SBarry Smith if (*xi++ != row) { 615ec3a8b7bSBarry Smith SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:zero pivot"); 6165d517e7eSBarry Smith } 6175d517e7eSBarry Smith fm = row; 6185d517e7eSBarry Smith while (nnz-- > 0) { 619*de6a44a3SBarry Smith idx = *xi++; 6205d517e7eSBarry Smith if (*flev + incrlev > levels) { 6215d517e7eSBarry Smith flev++; 6225d517e7eSBarry Smith continue; 6235d517e7eSBarry Smith } 6245d517e7eSBarry Smith do { 6255d517e7eSBarry Smith m = fm; 6265d517e7eSBarry Smith fm = fill[m]; 6275d517e7eSBarry Smith } while (fm < idx); 6285d517e7eSBarry Smith if (fm != idx) { 6295d517e7eSBarry Smith im[idx] = *flev + incrlev; 6305d517e7eSBarry Smith fill[m] = idx; 6315d517e7eSBarry Smith fill[idx] = fm; 6325d517e7eSBarry Smith fm = idx; 6335d517e7eSBarry Smith nzf++; 6345d517e7eSBarry Smith } 6355d517e7eSBarry Smith else { 6365d517e7eSBarry Smith if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 6375d517e7eSBarry Smith } 6385d517e7eSBarry Smith flev++; 6395d517e7eSBarry Smith } 6405d517e7eSBarry Smith row = fill[row]; 6415d517e7eSBarry Smith nzi++; 6425d517e7eSBarry Smith } 6435d517e7eSBarry Smith /* copy new filled row into permanent storage */ 6445d517e7eSBarry Smith ainew[prow+1] = ainew[prow] + nzf; 645*de6a44a3SBarry Smith if (ainew[prow+1] > jmax) { 6465d517e7eSBarry Smith /* allocate a longer ajnew */ 6475d517e7eSBarry Smith int maxadd; 648*de6a44a3SBarry Smith maxadd = (int) (((f*ai[n]+1)*(n-prow+5))/n); 6495d517e7eSBarry Smith if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 6505d517e7eSBarry Smith jmax += maxadd; 6515d517e7eSBarry Smith xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 652*de6a44a3SBarry Smith PetscMemcpy(xi,ajnew,ainew[prow]*sizeof(int)); 6535d517e7eSBarry Smith PetscFree(ajnew); 6545d517e7eSBarry Smith ajnew = xi; 6555d517e7eSBarry Smith /* allocate a longer ajfill */ 6565d517e7eSBarry Smith xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 657*de6a44a3SBarry Smith PetscMemcpy(xi,ajfill,ainew[prow]*sizeof(int)); 6585d517e7eSBarry Smith PetscFree(ajfill); 6595d517e7eSBarry Smith ajfill = xi; 6605d517e7eSBarry Smith realloc++; 6615d517e7eSBarry Smith } 662*de6a44a3SBarry Smith xi = ajnew + ainew[prow]; 663*de6a44a3SBarry Smith flev = ajfill + ainew[prow]; 6645d517e7eSBarry Smith dloc[prow] = nzi; 6655d517e7eSBarry Smith fm = fill[n]; 6665d517e7eSBarry Smith while (nzf--) { 667*de6a44a3SBarry Smith *xi++ = fm; 6685d517e7eSBarry Smith *flev++ = im[fm]; 6695d517e7eSBarry Smith fm = fill[fm]; 6705d517e7eSBarry Smith } 6715d517e7eSBarry Smith } 6725d517e7eSBarry Smith PetscFree(ajfill); 6735d517e7eSBarry Smith ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 6745d517e7eSBarry Smith ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 6755d517e7eSBarry Smith ierr = ISDestroy(isicol); CHKERRQ(ierr); 6765d517e7eSBarry Smith PetscFree(fill); PetscFree(im); 6775d517e7eSBarry Smith 6785d517e7eSBarry Smith PLogInfo((PetscObject)A, 679ec3a8b7bSBarry Smith "Info:MatILUFactorSymbolic_SeqBAIJ:Realloc %d Fill ratio:given %g needed %g\n", 6805d517e7eSBarry Smith realloc,f,((double)ainew[n])/((double)ai[prow])); 6815d517e7eSBarry Smith 6825d517e7eSBarry Smith /* put together the new matrix */ 683ec3a8b7bSBarry Smith ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr); 684ec3a8b7bSBarry Smith b = (Mat_SeqBAIJ *) (*fact)->data; 6855d517e7eSBarry Smith PetscFree(b->imax); 6865d517e7eSBarry Smith b->singlemalloc = 0; 687*de6a44a3SBarry Smith len = bs*bs*ainew[n]*sizeof(Scalar); 6885d517e7eSBarry Smith /* the next line frees the default space generated by the Create() */ 6895d517e7eSBarry Smith PetscFree(b->a); PetscFree(b->ilen); 6905d517e7eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 6915d517e7eSBarry Smith b->j = ajnew; 6925d517e7eSBarry Smith b->i = ainew; 6935d517e7eSBarry Smith for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 6945d517e7eSBarry Smith b->diag = dloc; 6955d517e7eSBarry Smith b->ilen = 0; 6965d517e7eSBarry Smith b->imax = 0; 6975d517e7eSBarry Smith b->row = isrow; 6985d517e7eSBarry Smith b->col = iscol; 699*de6a44a3SBarry Smith b->solve_work = (Scalar *) PetscMalloc( (bs*n+bs)*sizeof(Scalar)); 7005d517e7eSBarry Smith CHKPTRQ(b->solve_work); 7015d517e7eSBarry Smith /* In b structure: Free imax, ilen, old a, old j. 7025d517e7eSBarry Smith Allocate dloc, solve_work, new a, new j */ 703*de6a44a3SBarry Smith PLogObjectMemory(*fact,(ainew[n]-n)*(sizeof(int))+bs*bs*ainew[n]*sizeof(Scalar)); 704*de6a44a3SBarry Smith b->maxnz = b->nz = ainew[n]; 7055d517e7eSBarry Smith (*fact)->factor = FACTOR_LU; 7065d517e7eSBarry Smith return 0; 7075d517e7eSBarry Smith } 7085d517e7eSBarry Smith 7095d517e7eSBarry Smith 7105d517e7eSBarry Smith 7115d517e7eSBarry Smith 712