15d517e7eSBarry Smith #ifndef lint 2*ec3a8b7bSBarry Smith static char vcid[] = "$Id: baijfact.c,v 1.1 1996/02/13 22:33:11 bsmith Exp bsmith $"; 35d517e7eSBarry Smith #endif 45d517e7eSBarry Smith /* 5*ec3a8b7bSBarry Smith Factorization code for BAIJ format. 65d517e7eSBarry Smith */ 75d517e7eSBarry Smith 8*ec3a8b7bSBarry Smith #include "baij.h" 9*ec3a8b7bSBarry Smith 10*ec3a8b7bSBarry Smith /* 11*ec3a8b7bSBarry Smith The symbolic factorization code is identical to that for AIJ format, 12*ec3a8b7bSBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 13*ec3a8b7bSBarry Smith NOT good code reuse. 14*ec3a8b7bSBarry Smith */ 15*ec3a8b7bSBarry Smith int MatLUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,Mat *B) 165d517e7eSBarry Smith { 17*ec3a8b7bSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b; 185d517e7eSBarry Smith IS isicol; 19*ec3a8b7bSBarry Smith int *r,*ic, ierr, i, n = a->mbs, *ai = a->i, *aj = a->j; 205d517e7eSBarry Smith int *ainew,*ajnew, jmax,*fill, *ajtmp, nz,shift = a->indexshift; 215d517e7eSBarry Smith int *idnew, idx, row,m,fm, nnz, nzi,len, realloc = 0,nzbd,*im; 22*ec3a8b7bSBarry Smith int bs = a->bs; 235d517e7eSBarry Smith 24*ec3a8b7bSBarry Smith if (a->m != a->n) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must be square"); 25*ec3a8b7bSBarry Smith if (!isrow) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must have row permutation"); 26*ec3a8b7bSBarry Smith if (!iscol) SETERRQ(1,"MatLUFactorSymbolic_SeqBAIJ:Must have col. permutation"); 275d517e7eSBarry Smith 285d517e7eSBarry Smith ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 295d517e7eSBarry Smith ISGetIndices(isrow,&r); ISGetIndices(isicol,&ic); 305d517e7eSBarry Smith 315d517e7eSBarry Smith /* get new row pointers */ 325d517e7eSBarry Smith ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew); 335d517e7eSBarry Smith ainew[0] = -shift; 345d517e7eSBarry Smith /* don't know how many column pointers are needed so estimate */ 355d517e7eSBarry Smith jmax = (int) (f*ai[n]+(!shift)); 365d517e7eSBarry Smith ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew); 375d517e7eSBarry Smith /* fill is a linked list of nonzeros in active row */ 385d517e7eSBarry Smith fill = (int *) PetscMalloc( (2*n+1)*sizeof(int)); CHKPTRQ(fill); 395d517e7eSBarry Smith im = fill + n + 1; 405d517e7eSBarry Smith /* idnew is location of diagonal in factor */ 415d517e7eSBarry Smith idnew = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(idnew); 425d517e7eSBarry Smith idnew[0] = -shift; 435d517e7eSBarry Smith 445d517e7eSBarry Smith for ( i=0; i<n; i++ ) { 455d517e7eSBarry Smith /* first copy previous fill into linked list */ 465d517e7eSBarry Smith nnz = nz = ai[r[i]+1] - ai[r[i]]; 475d517e7eSBarry Smith ajtmp = aj + ai[r[i]] + shift; 485d517e7eSBarry Smith fill[n] = n; 495d517e7eSBarry Smith while (nz--) { 505d517e7eSBarry Smith fm = n; 515d517e7eSBarry Smith idx = ic[*ajtmp++ + shift]; 525d517e7eSBarry Smith do { 535d517e7eSBarry Smith m = fm; 545d517e7eSBarry Smith fm = fill[m]; 555d517e7eSBarry Smith } while (fm < idx); 565d517e7eSBarry Smith fill[m] = idx; 575d517e7eSBarry Smith fill[idx] = fm; 585d517e7eSBarry Smith } 595d517e7eSBarry Smith row = fill[n]; 605d517e7eSBarry Smith while ( row < i ) { 615d517e7eSBarry Smith ajtmp = ajnew + idnew[row] + (!shift); 625d517e7eSBarry Smith nzbd = 1 + idnew[row] - ainew[row]; 635d517e7eSBarry Smith nz = im[row] - nzbd; 645d517e7eSBarry Smith fm = row; 655d517e7eSBarry Smith while (nz-- > 0) { 665d517e7eSBarry Smith idx = *ajtmp++ + shift; 675d517e7eSBarry Smith nzbd++; 685d517e7eSBarry Smith if (idx == i) im[row] = nzbd; 695d517e7eSBarry Smith do { 705d517e7eSBarry Smith m = fm; 715d517e7eSBarry Smith fm = fill[m]; 725d517e7eSBarry Smith } while (fm < idx); 735d517e7eSBarry Smith if (fm != idx) { 745d517e7eSBarry Smith fill[m] = idx; 755d517e7eSBarry Smith fill[idx] = fm; 765d517e7eSBarry Smith fm = idx; 775d517e7eSBarry Smith nnz++; 785d517e7eSBarry Smith } 795d517e7eSBarry Smith } 805d517e7eSBarry Smith row = fill[row]; 815d517e7eSBarry Smith } 825d517e7eSBarry Smith /* copy new filled row into permanent storage */ 835d517e7eSBarry Smith ainew[i+1] = ainew[i] + nnz; 845d517e7eSBarry Smith if (ainew[i+1] > jmax+1) { 855d517e7eSBarry Smith /* allocate a longer ajnew */ 865d517e7eSBarry Smith int maxadd; 875d517e7eSBarry Smith maxadd = (int) ((f*(ai[n]+(!shift))*(n-i+5))/n); 885d517e7eSBarry Smith if (maxadd < nnz) maxadd = (n-i)*(nnz+1); 895d517e7eSBarry Smith jmax += maxadd; 905d517e7eSBarry Smith ajtmp = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(ajtmp); 915d517e7eSBarry Smith PetscMemcpy(ajtmp,ajnew,(ainew[i]+shift)*sizeof(int)); 925d517e7eSBarry Smith PetscFree(ajnew); 935d517e7eSBarry Smith ajnew = ajtmp; 945d517e7eSBarry Smith realloc++; /* count how many times we realloc */ 955d517e7eSBarry Smith } 965d517e7eSBarry Smith ajtmp = ajnew + ainew[i] + shift; 975d517e7eSBarry Smith fm = fill[n]; 985d517e7eSBarry Smith nzi = 0; 995d517e7eSBarry Smith im[i] = nnz; 1005d517e7eSBarry Smith while (nnz--) { 1015d517e7eSBarry Smith if (fm < i) nzi++; 1025d517e7eSBarry Smith *ajtmp++ = fm - shift; 1035d517e7eSBarry Smith fm = fill[fm]; 1045d517e7eSBarry Smith } 1055d517e7eSBarry Smith idnew[i] = ainew[i] + nzi; 1065d517e7eSBarry Smith } 1075d517e7eSBarry Smith 1085d517e7eSBarry Smith PLogInfo((PetscObject)A, 109*ec3a8b7bSBarry Smith "Info:MatLUFactorSymbolic_SeqBAIJ:Reallocs %d Fill ratio:given %g needed %g\n", 1105d517e7eSBarry Smith realloc,f,((double)ainew[n])/((double)ai[i])); 1115d517e7eSBarry Smith 1125d517e7eSBarry Smith ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 1135d517e7eSBarry Smith ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 1145d517e7eSBarry Smith 1155d517e7eSBarry Smith PetscFree(fill); 1165d517e7eSBarry Smith 1175d517e7eSBarry Smith /* put together the new matrix */ 118*ec3a8b7bSBarry Smith ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,B); CHKERRQ(ierr); 1195d517e7eSBarry Smith PLogObjectParent(*B,isicol); 1205d517e7eSBarry Smith ierr = ISDestroy(isicol); CHKERRQ(ierr); 121*ec3a8b7bSBarry Smith b = (Mat_SeqBAIJ *) (*B)->data; 1225d517e7eSBarry Smith PetscFree(b->imax); 1235d517e7eSBarry Smith b->singlemalloc = 0; 1245d517e7eSBarry Smith len = (ainew[n] + shift)*sizeof(Scalar); 1255d517e7eSBarry Smith /* the next line frees the default space generated by the Create() */ 1265d517e7eSBarry Smith PetscFree(b->a); PetscFree(b->ilen); 127*ec3a8b7bSBarry Smith b->a = (Scalar *) PetscMalloc( len*bs*bs ); CHKPTRQ(b->a); 1285d517e7eSBarry Smith b->j = ajnew; 1295d517e7eSBarry Smith b->i = ainew; 1305d517e7eSBarry Smith b->diag = idnew; 1315d517e7eSBarry Smith b->ilen = 0; 1325d517e7eSBarry Smith b->imax = 0; 1335d517e7eSBarry Smith b->row = isrow; 1345d517e7eSBarry Smith b->col = iscol; 1355d517e7eSBarry Smith b->solve_work = (Scalar *) PetscMalloc( n*sizeof(Scalar)); 1365d517e7eSBarry Smith CHKPTRQ(b->solve_work); 1375d517e7eSBarry Smith /* In b structure: Free imax, ilen, old a, old j. 1385d517e7eSBarry Smith Allocate idnew, solve_work, new a, new j */ 1395d517e7eSBarry Smith PLogObjectMemory(*B,(ainew[n]+shift-n)*(sizeof(int)+sizeof(Scalar))); 1405d517e7eSBarry Smith b->maxnz = b->nz = ainew[n] + shift; 1415d517e7eSBarry Smith 1425d517e7eSBarry Smith return 0; 1435d517e7eSBarry Smith } 1445d517e7eSBarry Smith 145*ec3a8b7bSBarry Smith #include "pinclude/plapack.h" 146*ec3a8b7bSBarry Smith 147*ec3a8b7bSBarry Smith #define BlockZero(v,idx) {PetscMemzero(v+bs2*(idx),bs2*sizeof(Scalar));} 148*ec3a8b7bSBarry Smith 149*ec3a8b7bSBarry Smith #define BlockCopy(v_in,v_out,idx_in,idx_out) \ 150*ec3a8b7bSBarry Smith {PetscMemcpy(v_out + bs2*(idx_out),v_in + bs2*(idx_in),bs2*sizeof(Scalar));} 151*ec3a8b7bSBarry Smith 152*ec3a8b7bSBarry Smith #define BlockInvert(vv,idx) \ 153*ec3a8b7bSBarry Smith { int _i,info; Scalar *w = vv + bs2*idx; \ 154*ec3a8b7bSBarry Smith printf("vv idx %g %d \n",*vv,idx); \ 155*ec3a8b7bSBarry Smith LAgetrf_(&bs,&bs,w,&bs,v_pivots,&info); CHKERRQ(info); \ 156*ec3a8b7bSBarry Smith printf("bs %d w %g\n",bs,*w);\ 157*ec3a8b7bSBarry Smith PetscMemzero(v_work,bs2*sizeof(Scalar)); \ 158*ec3a8b7bSBarry Smith printf("vwork %g\n",*v_work);\ 159*ec3a8b7bSBarry Smith for ( _i=0; _i<bs; _i++ ) { v_work[_i + bs*_i] = 1.0; } \ 160*ec3a8b7bSBarry Smith printf("vwork %g\n",*v_work);\ 161*ec3a8b7bSBarry Smith LAgetrs_("N",&bs,&bs,w,&bs,v_pivots,v_work,&bs, &info);CHKERRQ(info);\ 162*ec3a8b7bSBarry Smith printf("w %g vwork %g\n",*w,*v_work);\ 163*ec3a8b7bSBarry Smith PetscMemcpy(w,v_work,bs2*sizeof(Scalar)); \ 164*ec3a8b7bSBarry Smith printf("w %g vwork %g\n",*w,*v_work);\ 165*ec3a8b7bSBarry Smith } 166*ec3a8b7bSBarry Smith 167*ec3a8b7bSBarry Smith #define BlockMult(v1,v2,v3 168*ec3a8b7bSBarry Smith /* ----------------------------------------------------------- */ 169*ec3a8b7bSBarry Smith int MatLUFactorNumeric_SeqBAIJ(Mat A,Mat *B) 1705d517e7eSBarry Smith { 1715d517e7eSBarry Smith Mat C = *B; 172*ec3a8b7bSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b = (Mat_SeqBAIJ *)C->data; 1735d517e7eSBarry Smith IS iscol = b->col, isrow = b->row, isicol; 174*ec3a8b7bSBarry Smith int *r,*ic, ierr, i, j, n = a->mbs, *ai = b->i, *aj = b->j; 1755d517e7eSBarry Smith int *ajtmpold, *ajtmp, nz, row, *ics, shift = a->indexshift; 176*ec3a8b7bSBarry Smith int *diag_offset = b->diag,diag,bs = a->bs,bs2 = bs*bs, *v_pivots; 177*ec3a8b7bSBarry Smith Scalar *rtmp,*v, *pc, multiplier,*v_work; 1785d517e7eSBarry Smith /* These declarations are for optimizations. They reduce the number of 1795d517e7eSBarry Smith memory references that are made by locally storing information; the 1805d517e7eSBarry Smith word "register" used here with pointers can be viewed as "private" or 1815d517e7eSBarry Smith "known only to me" 1825d517e7eSBarry Smith */ 183*ec3a8b7bSBarry Smith register Scalar *pv, *rtmps; 1845d517e7eSBarry Smith register int *pj; 1855d517e7eSBarry Smith 1865d517e7eSBarry Smith ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 1875d517e7eSBarry Smith PLogObjectParent(*B,isicol); 1885d517e7eSBarry Smith ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 1895d517e7eSBarry Smith ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 190*ec3a8b7bSBarry Smith rtmp = (Scalar *) PetscMalloc(bs2*(n+1)*sizeof(Scalar));CHKPTRQ(rtmp); 1915d517e7eSBarry Smith rtmps = rtmp + shift; ics = ic + shift; 1925d517e7eSBarry Smith 193*ec3a8b7bSBarry Smith /* generate work space needed by dense LU factorization */ 194*ec3a8b7bSBarry Smith v_work = (Scalar *) PetscMalloc(bs2*sizeof(Scalar)); CHKPTRQ(v_work); 195*ec3a8b7bSBarry Smith v_pivots = (int *) PetscMalloc(bs*sizeof(int)); CHKPTRQ(v_pivots); 1965d517e7eSBarry Smith 1975d517e7eSBarry Smith for ( i=0; i<n; i++ ) { 1985d517e7eSBarry Smith nz = ai[i+1] - ai[i]; 1995d517e7eSBarry Smith ajtmp = aj + ai[i] + shift; 200*ec3a8b7bSBarry Smith for ( j=0; j<nz; j++ ) BlockZero(rtmps,ajtmp[j]); 201*ec3a8b7bSBarry Smith malloc_verify(); 2025d517e7eSBarry Smith /* load in initial (unfactored row) */ 2035d517e7eSBarry Smith nz = a->i[r[i]+1] - a->i[r[i]]; 2045d517e7eSBarry Smith ajtmpold = a->j + a->i[r[i]] + shift; 205*ec3a8b7bSBarry Smith v = a->a + bs2*a->i[r[i]] + shift; 206*ec3a8b7bSBarry Smith for ( j=0; j<nz; j++ ) BlockCopy(v,rtmp,j,ics[ajtmpold[j]]); 207*ec3a8b7bSBarry Smith malloc_verify(); 2085d517e7eSBarry Smith 2095d517e7eSBarry Smith row = *ajtmp++ + shift; 2105d517e7eSBarry Smith while (row < i) { 2115d517e7eSBarry Smith pc = rtmp + row; 2125d517e7eSBarry Smith if (*pc != 0.0) { 213*ec3a8b7bSBarry Smith pv = b->a + bs2*diag_offset[row] + shift; 2145d517e7eSBarry Smith pj = b->j + diag_offset[row] + (!shift); 215*ec3a8b7bSBarry Smith multiplier = *pc * (*pv); 2165d517e7eSBarry Smith *pc = multiplier; 2175d517e7eSBarry Smith nz = ai[row+1] - diag_offset[row] - 1; 2185d517e7eSBarry Smith for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j]; 219*ec3a8b7bSBarry Smith pv += bs2; 2205d517e7eSBarry Smith PLogFlops(2*nz); 2215d517e7eSBarry Smith } 2225d517e7eSBarry Smith row = *ajtmp++ + shift; 2235d517e7eSBarry Smith } 2245d517e7eSBarry Smith /* finished row so stick it into b->a */ 225*ec3a8b7bSBarry Smith pv = b->a + bs2*ai[i] + shift; 2265d517e7eSBarry Smith pj = b->j + ai[i] + shift; 2275d517e7eSBarry Smith nz = ai[i+1] - ai[i]; 228*ec3a8b7bSBarry Smith for ( j=0; j<nz; j++ ) BlockCopy(rtmps,pv,pj[j],j); 229*ec3a8b7bSBarry Smith malloc_verify(); 2305d517e7eSBarry Smith diag = diag_offset[i] - ai[i]; 231*ec3a8b7bSBarry Smith /* invert diagonal block */ 232*ec3a8b7bSBarry Smith BlockInvert(pv,diag); 233*ec3a8b7bSBarry Smith malloc_verify(); 2345d517e7eSBarry Smith } 2355d517e7eSBarry Smith 2365d517e7eSBarry Smith PetscFree(rtmp); 2375d517e7eSBarry Smith ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 2385d517e7eSBarry Smith ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 2395d517e7eSBarry Smith ierr = ISDestroy(isicol); CHKERRQ(ierr); 2405d517e7eSBarry Smith C->factor = FACTOR_LU; 2415d517e7eSBarry Smith C->assembled = PETSC_TRUE; 2425d517e7eSBarry Smith PLogFlops(b->n); 2435d517e7eSBarry Smith return 0; 2445d517e7eSBarry Smith } 2455d517e7eSBarry Smith /* ----------------------------------------------------------- */ 246*ec3a8b7bSBarry Smith int MatLUFactor_SeqBAIJ(Mat A,IS row,IS col,double f) 2475d517e7eSBarry Smith { 248*ec3a8b7bSBarry Smith Mat_SeqBAIJ *mat = (Mat_SeqBAIJ *) A->data; 2495d517e7eSBarry Smith int ierr; 2505d517e7eSBarry Smith Mat C; 2515d517e7eSBarry Smith 252*ec3a8b7bSBarry Smith ierr = MatLUFactorSymbolic_SeqBAIJ(A,row,col,f,&C); CHKERRQ(ierr); 253*ec3a8b7bSBarry Smith ierr = MatLUFactorNumeric_SeqBAIJ(A,&C); CHKERRQ(ierr); 2545d517e7eSBarry Smith 2555d517e7eSBarry Smith /* free all the data structures from mat */ 2565d517e7eSBarry Smith PetscFree(mat->a); 2575d517e7eSBarry Smith if (!mat->singlemalloc) {PetscFree(mat->i); PetscFree(mat->j);} 2585d517e7eSBarry Smith if (mat->diag) PetscFree(mat->diag); 2595d517e7eSBarry Smith if (mat->ilen) PetscFree(mat->ilen); 2605d517e7eSBarry Smith if (mat->imax) PetscFree(mat->imax); 2615d517e7eSBarry Smith if (mat->solve_work) PetscFree(mat->solve_work); 2625d517e7eSBarry Smith PetscFree(mat); 2635d517e7eSBarry Smith 2645d517e7eSBarry Smith PetscMemcpy(A,C,sizeof(struct _Mat)); 2655d517e7eSBarry Smith PetscHeaderDestroy(C); 2665d517e7eSBarry Smith return 0; 2675d517e7eSBarry Smith } 2685d517e7eSBarry Smith /* ----------------------------------------------------------- */ 269*ec3a8b7bSBarry Smith int MatSolve_SeqBAIJ(Mat A,Vec bb, Vec xx) 2705d517e7eSBarry Smith { 271*ec3a8b7bSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data; 2725d517e7eSBarry Smith IS iscol = a->col, isrow = a->row; 2735d517e7eSBarry Smith int *r,*c, ierr, i, n = a->m, *vi, *ai = a->i, *aj = a->j; 2745d517e7eSBarry Smith int nz,shift = a->indexshift; 2755d517e7eSBarry Smith Scalar *x,*b,*tmp, *tmps, *aa = a->a, sum, *v; 2765d517e7eSBarry Smith 277*ec3a8b7bSBarry Smith if (A->factor != FACTOR_LU) SETERRQ(1,"MatSolve_SeqBAIJ:Not for unfactored matrix"); 2785d517e7eSBarry Smith 2795d517e7eSBarry Smith ierr = VecGetArray(bb,&b); CHKERRQ(ierr); 2805d517e7eSBarry Smith ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 2815d517e7eSBarry Smith tmp = a->solve_work; 2825d517e7eSBarry Smith 2835d517e7eSBarry Smith ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr); 2845d517e7eSBarry Smith ierr = ISGetIndices(iscol,&c);CHKERRQ(ierr); c = c + (n-1); 2855d517e7eSBarry Smith 2865d517e7eSBarry Smith /* forward solve the lower triangular */ 2875d517e7eSBarry Smith tmp[0] = b[*r++]; 2885d517e7eSBarry Smith tmps = tmp + shift; 2895d517e7eSBarry Smith for ( i=1; i<n; i++ ) { 2905d517e7eSBarry Smith v = aa + ai[i] + shift; 2915d517e7eSBarry Smith vi = aj + ai[i] + shift; 2925d517e7eSBarry Smith nz = a->diag[i] - ai[i]; 2935d517e7eSBarry Smith sum = b[*r++]; 2945d517e7eSBarry Smith while (nz--) sum -= *v++ * tmps[*vi++]; 2955d517e7eSBarry Smith tmp[i] = sum; 2965d517e7eSBarry Smith } 2975d517e7eSBarry Smith 2985d517e7eSBarry Smith /* backward solve the upper triangular */ 2995d517e7eSBarry Smith for ( i=n-1; i>=0; i-- ){ 3005d517e7eSBarry Smith v = aa + a->diag[i] + (!shift); 3015d517e7eSBarry Smith vi = aj + a->diag[i] + (!shift); 3025d517e7eSBarry Smith nz = ai[i+1] - a->diag[i] - 1; 3035d517e7eSBarry Smith sum = tmp[i]; 3045d517e7eSBarry Smith while (nz--) sum -= *v++ * tmps[*vi++]; 3055d517e7eSBarry Smith x[*c--] = tmp[i] = sum*aa[a->diag[i]+shift]; 3065d517e7eSBarry Smith } 3075d517e7eSBarry Smith 3085d517e7eSBarry Smith ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 3095d517e7eSBarry Smith ierr = ISRestoreIndices(iscol,&c); CHKERRQ(ierr); 3105d517e7eSBarry Smith PLogFlops(2*a->nz - a->n); 3115d517e7eSBarry Smith return 0; 3125d517e7eSBarry Smith } 3135d517e7eSBarry Smith 3145d517e7eSBarry Smith /* ----------------------------------------------------------------*/ 315*ec3a8b7bSBarry Smith /* 316*ec3a8b7bSBarry Smith This code is virtually identical to MatILUFactorSymbolic_SeqAIJ 317*ec3a8b7bSBarry Smith except that the data structure of Mat_SeqAIJ is slightly different. 318*ec3a8b7bSBarry Smith Not a good example of code reuse. 319*ec3a8b7bSBarry Smith */ 320*ec3a8b7bSBarry Smith int MatILUFactorSymbolic_SeqBAIJ(Mat A,IS isrow,IS iscol,double f,int levels, 321*ec3a8b7bSBarry Smith Mat *fact) 3225d517e7eSBarry Smith { 323*ec3a8b7bSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *) A->data, *b; 3245d517e7eSBarry Smith IS isicol; 325*ec3a8b7bSBarry Smith int *r,*ic, ierr, prow, n = a->mbs, *ai = a->i, *aj = a->j; 3265d517e7eSBarry Smith int *ainew,*ajnew, jmax,*fill, *xi, nz, *im,*ajfill,*flev; 3275d517e7eSBarry Smith int *dloc, idx, row,m,fm, nzf, nzi,len, realloc = 0; 328*ec3a8b7bSBarry Smith int incrlev,nnz,i,shift = a->indexshift,bs = a->bs; 3295d517e7eSBarry Smith 330*ec3a8b7bSBarry Smith if (a->m != a->n) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Matrix must be square"); 331*ec3a8b7bSBarry Smith if (!isrow) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Must have row permutation"); 332*ec3a8b7bSBarry Smith if (!iscol) SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:Must have column permutation"); 3335d517e7eSBarry Smith 3345d517e7eSBarry Smith /* special case that simply copies fill pattern */ 3355d517e7eSBarry Smith if (levels == 0 && ISIsIdentity(isrow) && ISIsIdentity(iscol)) { 336*ec3a8b7bSBarry Smith ierr = MatConvertSameType_SeqBAIJ(A,fact,DO_NOT_COPY_VALUES); CHKERRQ(ierr); 3375d517e7eSBarry Smith (*fact)->factor = FACTOR_LU; 338*ec3a8b7bSBarry Smith b = (Mat_SeqBAIJ *) (*fact)->data; 3395d517e7eSBarry Smith if (!b->diag) { 340*ec3a8b7bSBarry Smith ierr = MatMarkDiag_SeqBAIJ(*fact); CHKERRQ(ierr); 3415d517e7eSBarry Smith } 3425d517e7eSBarry Smith b->row = isrow; 3435d517e7eSBarry Smith b->col = iscol; 3445d517e7eSBarry Smith b->solve_work = (Scalar *) PetscMalloc((b->m+1)*sizeof(Scalar));CHKPTRQ(b->solve_work); 3455d517e7eSBarry Smith return 0; 3465d517e7eSBarry Smith } 3475d517e7eSBarry Smith 3485d517e7eSBarry Smith ierr = ISInvertPermutation(iscol,&isicol); CHKERRQ(ierr); 3495d517e7eSBarry Smith ierr = ISGetIndices(isrow,&r); CHKERRQ(ierr); 3505d517e7eSBarry Smith ierr = ISGetIndices(isicol,&ic); CHKERRQ(ierr); 3515d517e7eSBarry Smith 3525d517e7eSBarry Smith /* get new row pointers */ 3535d517e7eSBarry Smith ainew = (int *) PetscMalloc( (n+1)*sizeof(int) ); CHKPTRQ(ainew); 3545d517e7eSBarry Smith ainew[0] = -shift; 3555d517e7eSBarry Smith /* don't know how many column pointers are needed so estimate */ 3565d517e7eSBarry Smith jmax = (int) (f*(ai[n]+!shift)); 3575d517e7eSBarry Smith ajnew = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajnew); 3585d517e7eSBarry Smith /* ajfill is level of fill for each fill entry */ 3595d517e7eSBarry Smith ajfill = (int *) PetscMalloc( (jmax)*sizeof(int) ); CHKPTRQ(ajfill); 3605d517e7eSBarry Smith /* fill is a linked list of nonzeros in active row */ 3615d517e7eSBarry Smith fill = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(fill); 3625d517e7eSBarry Smith /* im is level for each filled value */ 3635d517e7eSBarry Smith im = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(im); 3645d517e7eSBarry Smith /* dloc is location of diagonal in factor */ 3655d517e7eSBarry Smith dloc = (int *) PetscMalloc( (n+1)*sizeof(int)); CHKPTRQ(dloc); 3665d517e7eSBarry Smith dloc[0] = 0; 3675d517e7eSBarry Smith for ( prow=0; prow<n; prow++ ) { 3685d517e7eSBarry Smith /* first copy previous fill into linked list */ 3695d517e7eSBarry Smith nzf = nz = ai[r[prow]+1] - ai[r[prow]]; 3705d517e7eSBarry Smith xi = aj + ai[r[prow]] + shift; 3715d517e7eSBarry Smith fill[n] = n; 3725d517e7eSBarry Smith while (nz--) { 3735d517e7eSBarry Smith fm = n; 3745d517e7eSBarry Smith idx = ic[*xi++ + shift]; 3755d517e7eSBarry Smith do { 3765d517e7eSBarry Smith m = fm; 3775d517e7eSBarry Smith fm = fill[m]; 3785d517e7eSBarry Smith } while (fm < idx); 3795d517e7eSBarry Smith fill[m] = idx; 3805d517e7eSBarry Smith fill[idx] = fm; 3815d517e7eSBarry Smith im[idx] = 0; 3825d517e7eSBarry Smith } 3835d517e7eSBarry Smith nzi = 0; 3845d517e7eSBarry Smith row = fill[n]; 3855d517e7eSBarry Smith while ( row < prow ) { 3865d517e7eSBarry Smith incrlev = im[row] + 1; 3875d517e7eSBarry Smith nz = dloc[row]; 3885d517e7eSBarry Smith xi = ajnew + ainew[row] + shift + nz; 3895d517e7eSBarry Smith flev = ajfill + ainew[row] + shift + nz + 1; 3905d517e7eSBarry Smith nnz = ainew[row+1] - ainew[row] - nz - 1; 3915d517e7eSBarry Smith if (*xi++ + shift != row) { 392*ec3a8b7bSBarry Smith SETERRQ(1,"MatILUFactorSymbolic_SeqBAIJ:zero pivot"); 3935d517e7eSBarry Smith } 3945d517e7eSBarry Smith fm = row; 3955d517e7eSBarry Smith while (nnz-- > 0) { 3965d517e7eSBarry Smith idx = *xi++ + shift; 3975d517e7eSBarry Smith if (*flev + incrlev > levels) { 3985d517e7eSBarry Smith flev++; 3995d517e7eSBarry Smith continue; 4005d517e7eSBarry Smith } 4015d517e7eSBarry Smith do { 4025d517e7eSBarry Smith m = fm; 4035d517e7eSBarry Smith fm = fill[m]; 4045d517e7eSBarry Smith } while (fm < idx); 4055d517e7eSBarry Smith if (fm != idx) { 4065d517e7eSBarry Smith im[idx] = *flev + incrlev; 4075d517e7eSBarry Smith fill[m] = idx; 4085d517e7eSBarry Smith fill[idx] = fm; 4095d517e7eSBarry Smith fm = idx; 4105d517e7eSBarry Smith nzf++; 4115d517e7eSBarry Smith } 4125d517e7eSBarry Smith else { 4135d517e7eSBarry Smith if (im[idx] > *flev + incrlev) im[idx] = *flev+incrlev; 4145d517e7eSBarry Smith } 4155d517e7eSBarry Smith flev++; 4165d517e7eSBarry Smith } 4175d517e7eSBarry Smith row = fill[row]; 4185d517e7eSBarry Smith nzi++; 4195d517e7eSBarry Smith } 4205d517e7eSBarry Smith /* copy new filled row into permanent storage */ 4215d517e7eSBarry Smith ainew[prow+1] = ainew[prow] + nzf; 4225d517e7eSBarry Smith if (ainew[prow+1] > jmax-shift) { 4235d517e7eSBarry Smith /* allocate a longer ajnew */ 4245d517e7eSBarry Smith int maxadd; 4255d517e7eSBarry Smith maxadd = (int) ((f*(ai[n]+!shift)*(n-prow+5))/n); 4265d517e7eSBarry Smith if (maxadd < nzf) maxadd = (n-prow)*(nzf+1); 4275d517e7eSBarry Smith jmax += maxadd; 4285d517e7eSBarry Smith xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 4295d517e7eSBarry Smith PetscMemcpy(xi,ajnew,(ainew[prow]+shift)*sizeof(int)); 4305d517e7eSBarry Smith PetscFree(ajnew); 4315d517e7eSBarry Smith ajnew = xi; 4325d517e7eSBarry Smith /* allocate a longer ajfill */ 4335d517e7eSBarry Smith xi = (int *) PetscMalloc( jmax*sizeof(int) );CHKPTRQ(xi); 4345d517e7eSBarry Smith PetscMemcpy(xi,ajfill,(ainew[prow]+shift)*sizeof(int)); 4355d517e7eSBarry Smith PetscFree(ajfill); 4365d517e7eSBarry Smith ajfill = xi; 4375d517e7eSBarry Smith realloc++; 4385d517e7eSBarry Smith } 4395d517e7eSBarry Smith xi = ajnew + ainew[prow] + shift; 4405d517e7eSBarry Smith flev = ajfill + ainew[prow] + shift; 4415d517e7eSBarry Smith dloc[prow] = nzi; 4425d517e7eSBarry Smith fm = fill[n]; 4435d517e7eSBarry Smith while (nzf--) { 4445d517e7eSBarry Smith *xi++ = fm - shift; 4455d517e7eSBarry Smith *flev++ = im[fm]; 4465d517e7eSBarry Smith fm = fill[fm]; 4475d517e7eSBarry Smith } 4485d517e7eSBarry Smith } 4495d517e7eSBarry Smith PetscFree(ajfill); 4505d517e7eSBarry Smith ierr = ISRestoreIndices(isrow,&r); CHKERRQ(ierr); 4515d517e7eSBarry Smith ierr = ISRestoreIndices(isicol,&ic); CHKERRQ(ierr); 4525d517e7eSBarry Smith ierr = ISDestroy(isicol); CHKERRQ(ierr); 4535d517e7eSBarry Smith PetscFree(fill); PetscFree(im); 4545d517e7eSBarry Smith 4555d517e7eSBarry Smith PLogInfo((PetscObject)A, 456*ec3a8b7bSBarry Smith "Info:MatILUFactorSymbolic_SeqBAIJ:Realloc %d Fill ratio:given %g needed %g\n", 4575d517e7eSBarry Smith realloc,f,((double)ainew[n])/((double)ai[prow])); 4585d517e7eSBarry Smith 4595d517e7eSBarry Smith /* put together the new matrix */ 460*ec3a8b7bSBarry Smith ierr = MatCreateSeqBAIJ(A->comm,bs,bs*n,bs*n,0,PETSC_NULL,fact);CHKERRQ(ierr); 461*ec3a8b7bSBarry Smith b = (Mat_SeqBAIJ *) (*fact)->data; 4625d517e7eSBarry Smith PetscFree(b->imax); 4635d517e7eSBarry Smith b->singlemalloc = 0; 4645d517e7eSBarry Smith len = (ainew[n] + shift)*sizeof(Scalar); 4655d517e7eSBarry Smith /* the next line frees the default space generated by the Create() */ 4665d517e7eSBarry Smith PetscFree(b->a); PetscFree(b->ilen); 4675d517e7eSBarry Smith b->a = (Scalar *) PetscMalloc( len ); CHKPTRQ(b->a); 4685d517e7eSBarry Smith b->j = ajnew; 4695d517e7eSBarry Smith b->i = ainew; 4705d517e7eSBarry Smith for ( i=0; i<n; i++ ) dloc[i] += ainew[i]; 4715d517e7eSBarry Smith b->diag = dloc; 4725d517e7eSBarry Smith b->ilen = 0; 4735d517e7eSBarry Smith b->imax = 0; 4745d517e7eSBarry Smith b->row = isrow; 4755d517e7eSBarry Smith b->col = iscol; 4765d517e7eSBarry Smith b->solve_work = (Scalar *) PetscMalloc( (n+1)*sizeof(Scalar)); 4775d517e7eSBarry Smith CHKPTRQ(b->solve_work); 4785d517e7eSBarry Smith /* In b structure: Free imax, ilen, old a, old j. 4795d517e7eSBarry Smith Allocate dloc, solve_work, new a, new j */ 4805d517e7eSBarry Smith PLogObjectMemory(*fact,(ainew[n]+shift-n) * (sizeof(int)+sizeof(Scalar))); 4815d517e7eSBarry Smith b->maxnz = b->nz = ainew[n] + shift; 4825d517e7eSBarry Smith (*fact)->factor = FACTOR_LU; 4835d517e7eSBarry Smith return 0; 4845d517e7eSBarry Smith } 4855d517e7eSBarry Smith 4865d517e7eSBarry Smith 4875d517e7eSBarry Smith 4885d517e7eSBarry Smith 489