1bba1ac68SSatish Balay /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 22593348eSBarry Smith 32593348eSBarry Smith /* 4b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 52593348eSBarry Smith matrix storage format. 62593348eSBarry Smith */ 770f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 81a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 91a6a6d98SBarry Smith #include "src/inline/spops.h" 10325e03aeSBarry Smith #include "petscsys.h" /*I "petscmat.h" I*/ 113b547af2SSatish Balay 12*af674e45SBarry Smith /* 13*af674e45SBarry Smith Special version for Fun3d sequential benchmark 14*af674e45SBarry Smith */ 15*af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 16*af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17*af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18*af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 19*af674e45SBarry Smith #endif 20*af674e45SBarry Smith 21*af674e45SBarry Smith #undef __FUNCT__ 22*af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_" 23*af674e45SBarry Smith void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,MatScalar *v,InsertMode *is,int *err) 24*af674e45SBarry Smith { 25*af674e45SBarry Smith Mat A = *AA; 26*af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27*af674e45SBarry Smith int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,m = *mm,n = *nn; 28*af674e45SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 29*af674e45SBarry Smith int *aj=a->j,nonew=a->nonew,stepval,ierr; 30*af674e45SBarry Smith MatScalar *value = v,*ap,*aa = a->a,*bap; 31*af674e45SBarry Smith 32*af674e45SBarry Smith PetscFunctionBegin; 33*af674e45SBarry Smith stepval = (n-1)*4; 34*af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 35*af674e45SBarry Smith row = im[k]; 36*af674e45SBarry Smith rp = aj + ai[row]; 37*af674e45SBarry Smith ap = aa + 16*ai[row]; 38*af674e45SBarry Smith rmax = imax[row]; 39*af674e45SBarry Smith nrow = ailen[row]; 40*af674e45SBarry Smith low = 0; 41*af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 42*af674e45SBarry Smith col = in[l]; 43*af674e45SBarry Smith value = v + k*(stepval+4)*4 + l*4; 44*af674e45SBarry Smith low = 0; high = nrow; 45*af674e45SBarry Smith while (high-low > 7) { 46*af674e45SBarry Smith t = (low+high)/2; 47*af674e45SBarry Smith if (rp[t] > col) high = t; 48*af674e45SBarry Smith else low = t; 49*af674e45SBarry Smith } 50*af674e45SBarry Smith for (i=low; i<high; i++) { 51*af674e45SBarry Smith if (rp[i] > col) break; 52*af674e45SBarry Smith if (rp[i] == col) { 53*af674e45SBarry Smith bap = ap + 16*i; 54*af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 55*af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 56*af674e45SBarry Smith bap[jj] += *value++; 57*af674e45SBarry Smith } 58*af674e45SBarry Smith } 59*af674e45SBarry Smith goto noinsert2; 60*af674e45SBarry Smith } 61*af674e45SBarry Smith } 62*af674e45SBarry Smith N = nrow++ - 1; 63*af674e45SBarry Smith /* shift up all the later entries in this row */ 64*af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 65*af674e45SBarry Smith rp[ii+1] = rp[ii]; 66*af674e45SBarry Smith ierr = PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 67*af674e45SBarry Smith } 68*af674e45SBarry Smith if (N >= i) { 69*af674e45SBarry Smith ierr = PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 70*af674e45SBarry Smith } 71*af674e45SBarry Smith rp[i] = col; 72*af674e45SBarry Smith bap = ap + 16*i; 73*af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 74*af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 75*af674e45SBarry Smith bap[jj] = *value++; 76*af674e45SBarry Smith } 77*af674e45SBarry Smith } 78*af674e45SBarry Smith noinsert2:; 79*af674e45SBarry Smith low = i; 80*af674e45SBarry Smith } 81*af674e45SBarry Smith ailen[row] = nrow; 82*af674e45SBarry Smith } 83*af674e45SBarry Smith } 84*af674e45SBarry Smith 85*af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 86*af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 87*af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 88*af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 89*af674e45SBarry Smith #endif 90*af674e45SBarry Smith 91*af674e45SBarry Smith #undef __FUNCT__ 92*af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_" 93*af674e45SBarry Smith int matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v,InsertMode *is,int *err) 94*af674e45SBarry Smith { 95*af674e45SBarry Smith Mat A = *AA; 96*af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 97*af674e45SBarry Smith int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted,n = *nn,m = *mm; 98*af674e45SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 99*af674e45SBarry Smith int *aj=a->j,brow,bcol; 100*af674e45SBarry Smith int ridx,cidx,ierr; 101*af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 102*af674e45SBarry Smith 103*af674e45SBarry Smith PetscFunctionBegin; 104*af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 105*af674e45SBarry Smith row = im[k]; brow = row/4; 106*af674e45SBarry Smith rp = aj + ai[brow]; 107*af674e45SBarry Smith ap = aa + 16*ai[brow]; 108*af674e45SBarry Smith rmax = imax[brow]; 109*af674e45SBarry Smith nrow = ailen[brow]; 110*af674e45SBarry Smith low = 0; 111*af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 112*af674e45SBarry Smith col = in[l]; bcol = col/4; 113*af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 114*af674e45SBarry Smith value = v[l + k*n]; 115*af674e45SBarry Smith low = 0; high = nrow; 116*af674e45SBarry Smith while (high-low > 7) { 117*af674e45SBarry Smith t = (low+high)/2; 118*af674e45SBarry Smith if (rp[t] > bcol) high = t; 119*af674e45SBarry Smith else low = t; 120*af674e45SBarry Smith } 121*af674e45SBarry Smith for (i=low; i<high; i++) { 122*af674e45SBarry Smith if (rp[i] > bcol) break; 123*af674e45SBarry Smith if (rp[i] == bcol) { 124*af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 125*af674e45SBarry Smith *bap += value; 126*af674e45SBarry Smith goto noinsert1; 127*af674e45SBarry Smith } 128*af674e45SBarry Smith } 129*af674e45SBarry Smith N = nrow++ - 1; 130*af674e45SBarry Smith /* shift up all the later entries in this row */ 131*af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 132*af674e45SBarry Smith rp[ii+1] = rp[ii]; 133*af674e45SBarry Smith ierr = PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));CHKERRQ(ierr); 134*af674e45SBarry Smith } 135*af674e45SBarry Smith if (N>=i) { 136*af674e45SBarry Smith ierr = PetscMemzero(ap+16*i,16*sizeof(MatScalar));CHKERRQ(ierr); 137*af674e45SBarry Smith } 138*af674e45SBarry Smith rp[i] = bcol; 139*af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 140*af674e45SBarry Smith noinsert1:; 141*af674e45SBarry Smith low = i; 142*af674e45SBarry Smith } 143*af674e45SBarry Smith ailen[brow] = nrow; 144*af674e45SBarry Smith } 145*af674e45SBarry Smith } 146*af674e45SBarry Smith 14795d5f7c2SBarry Smith /* UGLY, ugly, ugly 14887828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 1493477af42SKris Buschelman not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 15095d5f7c2SBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 15195d5f7c2SBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 15295d5f7c2SBarry Smith into the single precision data structures. 15395d5f7c2SBarry Smith */ 15495d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 155ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 15695d5f7c2SBarry Smith #else 15795d5f7c2SBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 15895d5f7c2SBarry Smith #endif 15904929863SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 16004929863SHong Zhang EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 16104929863SHong Zhang #endif 16295d5f7c2SBarry Smith 1632d61bbb3SSatish Balay #define CHUNKSIZE 10 164de6a44a3SBarry Smith 165be5855fcSBarry Smith /* 166be5855fcSBarry Smith Checks for missing diagonals 167be5855fcSBarry Smith */ 1684a2ae208SSatish Balay #undef __FUNCT__ 1694a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 170c4992f7dSBarry Smith int MatMissingDiagonal_SeqBAIJ(Mat A) 171be5855fcSBarry Smith { 172be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 173883fce79SBarry Smith int *diag,*jj = a->j,i,ierr; 174be5855fcSBarry Smith 175be5855fcSBarry Smith PetscFunctionBegin; 176c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 177883fce79SBarry Smith diag = a->diag; 1780e8e8aceSBarry Smith for (i=0; i<a->mbs; i++) { 179be5855fcSBarry Smith if (jj[diag[i]] != i) { 18029bbc08cSBarry Smith SETERRQ1(1,"Matrix is missing diagonal number %d",i); 181be5855fcSBarry Smith } 182be5855fcSBarry Smith } 183be5855fcSBarry Smith PetscFunctionReturn(0); 184be5855fcSBarry Smith } 185be5855fcSBarry Smith 1864a2ae208SSatish Balay #undef __FUNCT__ 1874a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 188c4992f7dSBarry Smith int MatMarkDiagonal_SeqBAIJ(Mat A) 189de6a44a3SBarry Smith { 190de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 19182502324SSatish Balay int i,j,*diag,m = a->mbs,ierr; 192de6a44a3SBarry Smith 1933a40ed3dSBarry Smith PetscFunctionBegin; 194883fce79SBarry Smith if (a->diag) PetscFunctionReturn(0); 195883fce79SBarry Smith 196b0a32e0cSBarry Smith ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 197b0a32e0cSBarry Smith PetscLogObjectMemory(A,(m+1)*sizeof(int)); 1987fc0212eSBarry Smith for (i=0; i<m; i++) { 199ecc615b2SBarry Smith diag[i] = a->i[i+1]; 200de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 201de6a44a3SBarry Smith if (a->j[j] == i) { 202de6a44a3SBarry Smith diag[i] = j; 203de6a44a3SBarry Smith break; 204de6a44a3SBarry Smith } 205de6a44a3SBarry Smith } 206de6a44a3SBarry Smith } 207de6a44a3SBarry Smith a->diag = diag; 2083a40ed3dSBarry Smith PetscFunctionReturn(0); 209de6a44a3SBarry Smith } 2102593348eSBarry Smith 2112593348eSBarry Smith 212ca44d042SBarry Smith EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 2133b2fbd54SBarry Smith 2144a2ae208SSatish Balay #undef __FUNCT__ 2154a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 2160e6d2581SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 2173b2fbd54SBarry Smith { 2183b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2193b2fbd54SBarry Smith int ierr,n = a->mbs,i; 2203b2fbd54SBarry Smith 2213a40ed3dSBarry Smith PetscFunctionBegin; 2223b2fbd54SBarry Smith *nn = n; 2233a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 2243b2fbd54SBarry Smith if (symmetric) { 2253b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 2263b2fbd54SBarry Smith } else if (oshift == 1) { 2273b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 228435da068SBarry Smith int nz = a->i[n]; 2293b2fbd54SBarry Smith for (i=0; i<nz; i++) a->j[i]++; 2303b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]++; 2313b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 2323b2fbd54SBarry Smith } else { 2333b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 2343b2fbd54SBarry Smith } 2353b2fbd54SBarry Smith 2363a40ed3dSBarry Smith PetscFunctionReturn(0); 2373b2fbd54SBarry Smith } 2383b2fbd54SBarry Smith 2394a2ae208SSatish Balay #undef __FUNCT__ 2404a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 241435da068SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 2423b2fbd54SBarry Smith { 2433b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 244606d414cSSatish Balay int i,n = a->mbs,ierr; 2453b2fbd54SBarry Smith 2463a40ed3dSBarry Smith PetscFunctionBegin; 2473a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 2483b2fbd54SBarry Smith if (symmetric) { 249606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 250606d414cSSatish Balay ierr = PetscFree(*ja);CHKERRQ(ierr); 251af5da2bfSBarry Smith } else if (oshift == 1) { 252435da068SBarry Smith int nz = a->i[n]-1; 2533b2fbd54SBarry Smith for (i=0; i<nz; i++) a->j[i]--; 2543b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]--; 2553b2fbd54SBarry Smith } 2563a40ed3dSBarry Smith PetscFunctionReturn(0); 2573b2fbd54SBarry Smith } 2583b2fbd54SBarry Smith 2594a2ae208SSatish Balay #undef __FUNCT__ 2604a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 2612d61bbb3SSatish Balay int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 2622d61bbb3SSatish Balay { 2632d61bbb3SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2642d61bbb3SSatish Balay 2652d61bbb3SSatish Balay PetscFunctionBegin; 2662d61bbb3SSatish Balay *bs = baij->bs; 2672d61bbb3SSatish Balay PetscFunctionReturn(0); 2682d61bbb3SSatish Balay } 2692d61bbb3SSatish Balay 2702d61bbb3SSatish Balay 2714a2ae208SSatish Balay #undef __FUNCT__ 2724a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ" 273e1311b90SBarry Smith int MatDestroy_SeqBAIJ(Mat A) 2742d61bbb3SSatish Balay { 2752d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 276e51c0b9cSSatish Balay int ierr; 2772d61bbb3SSatish Balay 278433994e6SBarry Smith PetscFunctionBegin; 279aa482453SBarry Smith #if defined(PETSC_USE_LOG) 280b0a32e0cSBarry Smith PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 2812d61bbb3SSatish Balay #endif 282606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 283606d414cSSatish Balay if (!a->singlemalloc) { 284606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 285606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 286606d414cSSatish Balay } 287c38d4ed2SBarry Smith if (a->row) { 288c38d4ed2SBarry Smith ierr = ISDestroy(a->row);CHKERRQ(ierr); 289c38d4ed2SBarry Smith } 290c38d4ed2SBarry Smith if (a->col) { 291c38d4ed2SBarry Smith ierr = ISDestroy(a->col);CHKERRQ(ierr); 292c38d4ed2SBarry Smith } 293606d414cSSatish Balay if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 294606d414cSSatish Balay if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 295606d414cSSatish Balay if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 296606d414cSSatish Balay if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 297606d414cSSatish Balay if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 298e51c0b9cSSatish Balay if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 299606d414cSSatish Balay if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 300563b5814SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 301563b5814SBarry Smith if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 302563b5814SBarry Smith #endif 303606d414cSSatish Balay ierr = PetscFree(a);CHKERRQ(ierr); 3042d61bbb3SSatish Balay PetscFunctionReturn(0); 3052d61bbb3SSatish Balay } 3062d61bbb3SSatish Balay 3074a2ae208SSatish Balay #undef __FUNCT__ 3084a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ" 3092d61bbb3SSatish Balay int MatSetOption_SeqBAIJ(Mat A,MatOption op) 3102d61bbb3SSatish Balay { 3112d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 3122d61bbb3SSatish Balay 3132d61bbb3SSatish Balay PetscFunctionBegin; 314aa275fccSKris Buschelman switch (op) { 315aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 316aa275fccSKris Buschelman a->roworiented = PETSC_TRUE; 317aa275fccSKris Buschelman break; 318aa275fccSKris Buschelman case MAT_COLUMN_ORIENTED: 319aa275fccSKris Buschelman a->roworiented = PETSC_FALSE; 320aa275fccSKris Buschelman break; 321aa275fccSKris Buschelman case MAT_COLUMNS_SORTED: 322aa275fccSKris Buschelman a->sorted = PETSC_TRUE; 323aa275fccSKris Buschelman break; 324aa275fccSKris Buschelman case MAT_COLUMNS_UNSORTED: 325aa275fccSKris Buschelman a->sorted = PETSC_FALSE; 326aa275fccSKris Buschelman break; 327aa275fccSKris Buschelman case MAT_KEEP_ZEROED_ROWS: 328aa275fccSKris Buschelman a->keepzeroedrows = PETSC_TRUE; 329aa275fccSKris Buschelman break; 330aa275fccSKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 331aa275fccSKris Buschelman a->nonew = 1; 332aa275fccSKris Buschelman break; 333aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 334aa275fccSKris Buschelman a->nonew = -1; 335aa275fccSKris Buschelman break; 336aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 337aa275fccSKris Buschelman a->nonew = -2; 338aa275fccSKris Buschelman break; 339aa275fccSKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 340aa275fccSKris Buschelman a->nonew = 0; 341aa275fccSKris Buschelman break; 342aa275fccSKris Buschelman case MAT_ROWS_SORTED: 343aa275fccSKris Buschelman case MAT_ROWS_UNSORTED: 344aa275fccSKris Buschelman case MAT_YES_NEW_DIAGONALS: 345aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 346aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 347b0a32e0cSBarry Smith PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 348aa275fccSKris Buschelman break; 349aa275fccSKris Buschelman case MAT_NO_NEW_DIAGONALS: 35029bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 351bd648c37SKris Buschelman case MAT_USE_SINGLE_PRECISION_SOLVES: 352bd648c37SKris Buschelman if (a->bs==4) { 35394ee7fc8SKris Buschelman PetscTruth sse_enabled_local,sse_enabled_global; 354bd648c37SKris Buschelman int ierr; 35594ee7fc8SKris Buschelman 35694ee7fc8SKris Buschelman sse_enabled_local = PETSC_FALSE; 35794ee7fc8SKris Buschelman sse_enabled_global = PETSC_FALSE; 35894ee7fc8SKris Buschelman 35994ee7fc8SKris Buschelman ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 360e64df4b9SKris Buschelman #if defined(PETSC_HAVE_SSE) 36194ee7fc8SKris Buschelman if (sse_enabled_local) { 36254138f6bSKris Buschelman a->single_precision_solves = PETSC_TRUE; 36354138f6bSKris Buschelman A->ops->solve = MatSolve_SeqBAIJ_Update; 36454138f6bSKris Buschelman A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 365cf242676SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 36654138f6bSKris Buschelman break; 36794ee7fc8SKris Buschelman } else { 36894ee7fc8SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 3698661488fSKris Buschelman } 370e64df4b9SKris Buschelman #else 371bd648c37SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 372e64df4b9SKris Buschelman #endif 373bd648c37SKris Buschelman } 374bd648c37SKris Buschelman break; 375aa275fccSKris Buschelman default: 37629bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 3772d61bbb3SSatish Balay } 3782d61bbb3SSatish Balay PetscFunctionReturn(0); 3792d61bbb3SSatish Balay } 3802d61bbb3SSatish Balay 3814a2ae208SSatish Balay #undef __FUNCT__ 3824a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ" 38387828ca2SBarry Smith int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 3842d61bbb3SSatish Balay { 3852d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 38682502324SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 3873f1db9ecSBarry Smith MatScalar *aa,*aa_i; 38887828ca2SBarry Smith PetscScalar *v_i; 3892d61bbb3SSatish Balay 3902d61bbb3SSatish Balay PetscFunctionBegin; 3912d61bbb3SSatish Balay bs = a->bs; 3922d61bbb3SSatish Balay ai = a->i; 3932d61bbb3SSatish Balay aj = a->j; 3942d61bbb3SSatish Balay aa = a->a; 3952d61bbb3SSatish Balay bs2 = a->bs2; 3962d61bbb3SSatish Balay 397273d9f13SBarry Smith if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 3982d61bbb3SSatish Balay 3992d61bbb3SSatish Balay bn = row/bs; /* Block number */ 4002d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 4012d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 4022d61bbb3SSatish Balay *nz = bs*M; 4032d61bbb3SSatish Balay 4042d61bbb3SSatish Balay if (v) { 4052d61bbb3SSatish Balay *v = 0; 4062d61bbb3SSatish Balay if (*nz) { 40787828ca2SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 4082d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 4092d61bbb3SSatish Balay v_i = *v + i*bs; 4102d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 4112d61bbb3SSatish Balay for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 4122d61bbb3SSatish Balay } 4132d61bbb3SSatish Balay } 4142d61bbb3SSatish Balay } 4152d61bbb3SSatish Balay 4162d61bbb3SSatish Balay if (idx) { 4172d61bbb3SSatish Balay *idx = 0; 4182d61bbb3SSatish Balay if (*nz) { 419b0a32e0cSBarry Smith ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 4202d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 4212d61bbb3SSatish Balay idx_i = *idx + i*bs; 4222d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 4232d61bbb3SSatish Balay for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 4242d61bbb3SSatish Balay } 4252d61bbb3SSatish Balay } 4262d61bbb3SSatish Balay } 4272d61bbb3SSatish Balay PetscFunctionReturn(0); 4282d61bbb3SSatish Balay } 4292d61bbb3SSatish Balay 4304a2ae208SSatish Balay #undef __FUNCT__ 4314a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 43287828ca2SBarry Smith int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 4332d61bbb3SSatish Balay { 434606d414cSSatish Balay int ierr; 435606d414cSSatish Balay 4362d61bbb3SSatish Balay PetscFunctionBegin; 437606d414cSSatish Balay if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 438606d414cSSatish Balay if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 4392d61bbb3SSatish Balay PetscFunctionReturn(0); 4402d61bbb3SSatish Balay } 4412d61bbb3SSatish Balay 4424a2ae208SSatish Balay #undef __FUNCT__ 4434a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ" 4442d61bbb3SSatish Balay int MatTranspose_SeqBAIJ(Mat A,Mat *B) 4452d61bbb3SSatish Balay { 4462d61bbb3SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 4472d61bbb3SSatish Balay Mat C; 4482d61bbb3SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 449273d9f13SBarry Smith int *rows,*cols,bs2=a->bs2; 45087828ca2SBarry Smith PetscScalar *array; 4512d61bbb3SSatish Balay 4522d61bbb3SSatish Balay PetscFunctionBegin; 45329bbc08cSBarry Smith if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 454b0a32e0cSBarry Smith ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 455549d3d68SSatish Balay ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 4562d61bbb3SSatish Balay 457375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 45887828ca2SBarry Smith ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 45987828ca2SBarry Smith for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 460375fe846SBarry Smith #else 4613eda8832SBarry Smith array = a->a; 462375fe846SBarry Smith #endif 463375fe846SBarry Smith 4642d61bbb3SSatish Balay for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 465273d9f13SBarry Smith ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 466606d414cSSatish Balay ierr = PetscFree(col);CHKERRQ(ierr); 467b0a32e0cSBarry Smith ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 4682d61bbb3SSatish Balay cols = rows + bs; 4692d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 4702d61bbb3SSatish Balay cols[0] = i*bs; 4712d61bbb3SSatish Balay for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 4722d61bbb3SSatish Balay len = ai[i+1] - ai[i]; 4732d61bbb3SSatish Balay for (j=0; j<len; j++) { 4742d61bbb3SSatish Balay rows[0] = (*aj++)*bs; 4752d61bbb3SSatish Balay for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 4762d61bbb3SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 4772d61bbb3SSatish Balay array += bs2; 4782d61bbb3SSatish Balay } 4792d61bbb3SSatish Balay } 480606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 481375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 482375fe846SBarry Smith ierr = PetscFree(array); 483375fe846SBarry Smith #endif 4842d61bbb3SSatish Balay 4852d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4862d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4872d61bbb3SSatish Balay 488c4992f7dSBarry Smith if (B) { 4892d61bbb3SSatish Balay *B = C; 4902d61bbb3SSatish Balay } else { 491273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 4922d61bbb3SSatish Balay } 4932d61bbb3SSatish Balay PetscFunctionReturn(0); 4942d61bbb3SSatish Balay } 4952d61bbb3SSatish Balay 4964a2ae208SSatish Balay #undef __FUNCT__ 4974a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary" 498b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 4992593348eSBarry Smith { 500b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 5019df24120SSatish Balay int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 50287828ca2SBarry Smith PetscScalar *aa; 503ce6f0cecSBarry Smith FILE *file; 5042593348eSBarry Smith 5053a40ed3dSBarry Smith PetscFunctionBegin; 506b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 507b0a32e0cSBarry Smith ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 508552e946dSBarry Smith col_lens[0] = MAT_FILE_COOKIE; 5093b2fbd54SBarry Smith 510273d9f13SBarry Smith col_lens[1] = A->m; 511273d9f13SBarry Smith col_lens[2] = A->n; 5127e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 5132593348eSBarry Smith 5142593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 515b6490206SBarry Smith count = 0; 516b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 517b6490206SBarry Smith for (j=0; j<bs; j++) { 518b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 519b6490206SBarry Smith } 5202593348eSBarry Smith } 521273d9f13SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 522606d414cSSatish Balay ierr = PetscFree(col_lens);CHKERRQ(ierr); 5232593348eSBarry Smith 5242593348eSBarry Smith /* store column indices (zero start index) */ 525b0a32e0cSBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 526b6490206SBarry Smith count = 0; 527b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 528b6490206SBarry Smith for (j=0; j<bs; j++) { 529b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 530b6490206SBarry Smith for (l=0; l<bs; l++) { 531b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 5322593348eSBarry Smith } 5332593348eSBarry Smith } 534b6490206SBarry Smith } 535b6490206SBarry Smith } 5360752156aSBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 537606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 5382593348eSBarry Smith 5392593348eSBarry Smith /* store nonzero values */ 54087828ca2SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 541b6490206SBarry Smith count = 0; 542b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 543b6490206SBarry Smith for (j=0; j<bs; j++) { 544b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 545b6490206SBarry Smith for (l=0; l<bs; l++) { 5467e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 547b6490206SBarry Smith } 548b6490206SBarry Smith } 549b6490206SBarry Smith } 550b6490206SBarry Smith } 5510752156aSBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 552606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 553ce6f0cecSBarry Smith 554b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 555ce6f0cecSBarry Smith if (file) { 556ce6f0cecSBarry Smith fprintf(file,"-matload_block_size %d\n",a->bs); 557ce6f0cecSBarry Smith } 5583a40ed3dSBarry Smith PetscFunctionReturn(0); 5592593348eSBarry Smith } 5602593348eSBarry Smith 56104929863SHong Zhang extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 56204929863SHong Zhang 5634a2ae208SSatish Balay #undef __FUNCT__ 5644a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 565b0a32e0cSBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 5662593348eSBarry Smith { 567b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 568fb9695e5SSatish Balay int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 569f3ef73ceSBarry Smith PetscViewerFormat format; 5702593348eSBarry Smith 5713a40ed3dSBarry Smith PetscFunctionBegin; 572b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 573fb9695e5SSatish Balay if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 574b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 575fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 576bcd9e38bSBarry Smith Mat aij; 577bcd9e38bSBarry Smith ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 578bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 579bcd9e38bSBarry Smith ierr = MatDestroy(aij);CHKERRQ(ierr); 58004929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 58104929863SHong Zhang #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 58204929863SHong Zhang ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 58304929863SHong Zhang #endif 58404929863SHong Zhang PetscFunctionReturn(0); 585fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 586b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 58744cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 58844cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 589b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 59044cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 59144cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 592aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 5930e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 59462b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 5950e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 5960e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 59762b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 5980e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 5990e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 60062b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6010ef38995SBarry Smith } 60244cd7ae7SLois Curfman McInnes #else 6030ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 60462b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 6050ef38995SBarry Smith } 60644cd7ae7SLois Curfman McInnes #endif 60744cd7ae7SLois Curfman McInnes } 60844cd7ae7SLois Curfman McInnes } 609b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 61044cd7ae7SLois Curfman McInnes } 61144cd7ae7SLois Curfman McInnes } 612b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 6130ef38995SBarry Smith } else { 614b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 615b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 616b6490206SBarry Smith for (j=0; j<bs; j++) { 617b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 618b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 619b6490206SBarry Smith for (l=0; l<bs; l++) { 620aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 6210e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 62262b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 6230e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6240e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 62562b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 6260e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6270ef38995SBarry Smith } else { 62862b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 62988685aaeSLois Curfman McInnes } 63088685aaeSLois Curfman McInnes #else 63162b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 63288685aaeSLois Curfman McInnes #endif 6332593348eSBarry Smith } 6342593348eSBarry Smith } 635b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 6362593348eSBarry Smith } 6372593348eSBarry Smith } 638b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 639b6490206SBarry Smith } 640b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6413a40ed3dSBarry Smith PetscFunctionReturn(0); 6422593348eSBarry Smith } 6432593348eSBarry Smith 6444a2ae208SSatish Balay #undef __FUNCT__ 6454a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 646b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 6473270192aSSatish Balay { 64877ed5343SBarry Smith Mat A = (Mat) Aa; 6493270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 650b65db4caSBarry Smith int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 6510e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 6523f1db9ecSBarry Smith MatScalar *aa; 653b0a32e0cSBarry Smith PetscViewer viewer; 6543270192aSSatish Balay 6553a40ed3dSBarry Smith PetscFunctionBegin; 6563270192aSSatish Balay 657b65db4caSBarry Smith /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 65877ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 65977ed5343SBarry Smith 660b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 66177ed5343SBarry Smith 6623270192aSSatish Balay /* loop over matrix elements drawing boxes */ 663b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 6643270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6653270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 666273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6673270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6683270192aSSatish Balay aa = a->a + j*bs2; 6693270192aSSatish Balay for (k=0; k<bs; k++) { 6703270192aSSatish Balay for (l=0; l<bs; l++) { 6710e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 672b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 6733270192aSSatish Balay } 6743270192aSSatish Balay } 6753270192aSSatish Balay } 6763270192aSSatish Balay } 677b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 6783270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6793270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 680273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6813270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6823270192aSSatish Balay aa = a->a + j*bs2; 6833270192aSSatish Balay for (k=0; k<bs; k++) { 6843270192aSSatish Balay for (l=0; l<bs; l++) { 6850e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 686b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 6873270192aSSatish Balay } 6883270192aSSatish Balay } 6893270192aSSatish Balay } 6903270192aSSatish Balay } 6913270192aSSatish Balay 692b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 6933270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6943270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 695273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6963270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6973270192aSSatish Balay aa = a->a + j*bs2; 6983270192aSSatish Balay for (k=0; k<bs; k++) { 6993270192aSSatish Balay for (l=0; l<bs; l++) { 7000e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 701b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 7023270192aSSatish Balay } 7033270192aSSatish Balay } 7043270192aSSatish Balay } 7053270192aSSatish Balay } 70677ed5343SBarry Smith PetscFunctionReturn(0); 70777ed5343SBarry Smith } 7083270192aSSatish Balay 7094a2ae208SSatish Balay #undef __FUNCT__ 7104a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw" 711b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 71277ed5343SBarry Smith { 7137dce120fSSatish Balay int ierr; 7140e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 715b0a32e0cSBarry Smith PetscDraw draw; 71677ed5343SBarry Smith PetscTruth isnull; 7173270192aSSatish Balay 71877ed5343SBarry Smith PetscFunctionBegin; 71977ed5343SBarry Smith 720b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 721b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 72277ed5343SBarry Smith 72377ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 724273d9f13SBarry Smith xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 72577ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 726b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 727b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 72877ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 7293a40ed3dSBarry Smith PetscFunctionReturn(0); 7303270192aSSatish Balay } 7313270192aSSatish Balay 7324a2ae208SSatish Balay #undef __FUNCT__ 7334a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ" 734b0a32e0cSBarry Smith int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 7352593348eSBarry Smith { 73619bcc07fSBarry Smith int ierr; 7376831982aSBarry Smith PetscTruth issocket,isascii,isbinary,isdraw; 7382593348eSBarry Smith 7393a40ed3dSBarry Smith PetscFunctionBegin; 740b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 741b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 742fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 743fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 7440f5bd95cSBarry Smith if (issocket) { 74529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 7460f5bd95cSBarry Smith } else if (isascii){ 7473a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 7480f5bd95cSBarry Smith } else if (isbinary) { 7493a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 7500f5bd95cSBarry Smith } else if (isdraw) { 7513a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 7525cd90555SBarry Smith } else { 75329bbc08cSBarry Smith SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 7542593348eSBarry Smith } 7553a40ed3dSBarry Smith PetscFunctionReturn(0); 7562593348eSBarry Smith } 757b6490206SBarry Smith 758cd0e1443SSatish Balay 7594a2ae208SSatish Balay #undef __FUNCT__ 7604a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ" 76187828ca2SBarry Smith int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 762cd0e1443SSatish Balay { 763cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 7642d61bbb3SSatish Balay int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 7652d61bbb3SSatish Balay int *ai = a->i,*ailen = a->ilen; 7662d61bbb3SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 7673f1db9ecSBarry Smith MatScalar *ap,*aa = a->a,zero = 0.0; 768cd0e1443SSatish Balay 7693a40ed3dSBarry Smith PetscFunctionBegin; 7702d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 771cd0e1443SSatish Balay row = im[k]; brow = row/bs; 77229bbc08cSBarry Smith if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 773273d9f13SBarry Smith if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 7742d61bbb3SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 7752c3acbe9SBarry Smith nrow = ailen[brow]; 7762d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 77729bbc08cSBarry Smith if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 778273d9f13SBarry Smith if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 7792d61bbb3SSatish Balay col = in[l] ; 7802d61bbb3SSatish Balay bcol = col/bs; 7812d61bbb3SSatish Balay cidx = col%bs; 7822d61bbb3SSatish Balay ridx = row%bs; 7832d61bbb3SSatish Balay high = nrow; 7842d61bbb3SSatish Balay low = 0; /* assume unsorted */ 7852d61bbb3SSatish Balay while (high-low > 5) { 786cd0e1443SSatish Balay t = (low+high)/2; 787cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 788cd0e1443SSatish Balay else low = t; 789cd0e1443SSatish Balay } 790cd0e1443SSatish Balay for (i=low; i<high; i++) { 791cd0e1443SSatish Balay if (rp[i] > bcol) break; 792cd0e1443SSatish Balay if (rp[i] == bcol) { 7932d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 7942d61bbb3SSatish Balay goto finished; 795cd0e1443SSatish Balay } 796cd0e1443SSatish Balay } 7972d61bbb3SSatish Balay *v++ = zero; 7982d61bbb3SSatish Balay finished:; 799cd0e1443SSatish Balay } 800cd0e1443SSatish Balay } 8013a40ed3dSBarry Smith PetscFunctionReturn(0); 802cd0e1443SSatish Balay } 803cd0e1443SSatish Balay 80495d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 8054a2ae208SSatish Balay #undef __FUNCT__ 8064a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 80787828ca2SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 80895d5f7c2SBarry Smith { 809563b5814SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 810563b5814SBarry Smith int ierr,i,N = m*n*b->bs2; 811563b5814SBarry Smith MatScalar *vsingle; 81295d5f7c2SBarry Smith 81395d5f7c2SBarry Smith PetscFunctionBegin; 814563b5814SBarry Smith if (N > b->setvalueslen) { 815563b5814SBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 816b0a32e0cSBarry Smith ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 817563b5814SBarry Smith b->setvalueslen = N; 818563b5814SBarry Smith } 819563b5814SBarry Smith vsingle = b->setvaluescopy; 82095d5f7c2SBarry Smith for (i=0; i<N; i++) { 82195d5f7c2SBarry Smith vsingle[i] = v[i]; 82295d5f7c2SBarry Smith } 82395d5f7c2SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 82495d5f7c2SBarry Smith PetscFunctionReturn(0); 82595d5f7c2SBarry Smith } 82695d5f7c2SBarry Smith #endif 82795d5f7c2SBarry Smith 8282d61bbb3SSatish Balay 8294a2ae208SSatish Balay #undef __FUNCT__ 8304a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 83195d5f7c2SBarry Smith int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 83292c4ed94SBarry Smith { 83392c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 8348a84c255SSatish Balay int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 835273d9f13SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 836549d3d68SSatish Balay int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 837273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 83895d5f7c2SBarry Smith MatScalar *value = v,*ap,*aa = a->a,*bap; 83992c4ed94SBarry Smith 8403a40ed3dSBarry Smith PetscFunctionBegin; 8410e324ae4SSatish Balay if (roworiented) { 8420e324ae4SSatish Balay stepval = (n-1)*bs; 8430e324ae4SSatish Balay } else { 8440e324ae4SSatish Balay stepval = (m-1)*bs; 8450e324ae4SSatish Balay } 84692c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 84792c4ed94SBarry Smith row = im[k]; 8485ef9f2a5SBarry Smith if (row < 0) continue; 849aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 85029bbc08cSBarry Smith if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 85192c4ed94SBarry Smith #endif 85292c4ed94SBarry Smith rp = aj + ai[row]; 85392c4ed94SBarry Smith ap = aa + bs2*ai[row]; 85492c4ed94SBarry Smith rmax = imax[row]; 85592c4ed94SBarry Smith nrow = ailen[row]; 85692c4ed94SBarry Smith low = 0; 85792c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 8585ef9f2a5SBarry Smith if (in[l] < 0) continue; 859aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 86029bbc08cSBarry Smith if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 86192c4ed94SBarry Smith #endif 86292c4ed94SBarry Smith col = in[l]; 86392c4ed94SBarry Smith if (roworiented) { 8640e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 8650e324ae4SSatish Balay } else { 8660e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 86792c4ed94SBarry Smith } 86892c4ed94SBarry Smith if (!sorted) low = 0; high = nrow; 86992c4ed94SBarry Smith while (high-low > 7) { 87092c4ed94SBarry Smith t = (low+high)/2; 87192c4ed94SBarry Smith if (rp[t] > col) high = t; 87292c4ed94SBarry Smith else low = t; 87392c4ed94SBarry Smith } 87492c4ed94SBarry Smith for (i=low; i<high; i++) { 87592c4ed94SBarry Smith if (rp[i] > col) break; 87692c4ed94SBarry Smith if (rp[i] == col) { 8778a84c255SSatish Balay bap = ap + bs2*i; 8780e324ae4SSatish Balay if (roworiented) { 8798a84c255SSatish Balay if (is == ADD_VALUES) { 880dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 881dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 8828a84c255SSatish Balay bap[jj] += *value++; 883dd9472c6SBarry Smith } 884dd9472c6SBarry Smith } 8850e324ae4SSatish Balay } else { 886dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 887dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 8880e324ae4SSatish Balay bap[jj] = *value++; 8898a84c255SSatish Balay } 890dd9472c6SBarry Smith } 891dd9472c6SBarry Smith } 8920e324ae4SSatish Balay } else { 8930e324ae4SSatish Balay if (is == ADD_VALUES) { 894dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 895dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 8960e324ae4SSatish Balay *bap++ += *value++; 897dd9472c6SBarry Smith } 898dd9472c6SBarry Smith } 8990e324ae4SSatish Balay } else { 900dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 901dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 9020e324ae4SSatish Balay *bap++ = *value++; 9030e324ae4SSatish Balay } 9048a84c255SSatish Balay } 905dd9472c6SBarry Smith } 906dd9472c6SBarry Smith } 907f1241b54SBarry Smith goto noinsert2; 90892c4ed94SBarry Smith } 90992c4ed94SBarry Smith } 91089280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 91129bbc08cSBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 91292c4ed94SBarry Smith if (nrow >= rmax) { 91392c4ed94SBarry Smith /* there is no extra room in row, therefore enlarge */ 91492c4ed94SBarry Smith int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 9153f1db9ecSBarry Smith MatScalar *new_a; 91692c4ed94SBarry Smith 91729bbc08cSBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 91889280ab3SLois Curfman McInnes 91992c4ed94SBarry Smith /* malloc new storage space */ 9203f1db9ecSBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 921b0a32e0cSBarry Smith ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 92292c4ed94SBarry Smith new_j = (int*)(new_a + bs2*new_nz); 92392c4ed94SBarry Smith new_i = new_j + new_nz; 92492c4ed94SBarry Smith 92592c4ed94SBarry Smith /* copy over old data into new slots */ 92692c4ed94SBarry Smith for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 92792c4ed94SBarry Smith for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 928549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 92992c4ed94SBarry Smith len = (new_nz - CHUNKSIZE - ai[row] - nrow); 930549d3d68SSatish Balay ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 931549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 932549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 933549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 93492c4ed94SBarry Smith /* free up old matrix storage */ 935606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 936606d414cSSatish Balay if (!a->singlemalloc) { 937606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 938606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 939606d414cSSatish Balay } 94092c4ed94SBarry Smith aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 941c4992f7dSBarry Smith a->singlemalloc = PETSC_TRUE; 94292c4ed94SBarry Smith 94392c4ed94SBarry Smith rp = aj + ai[row]; ap = aa + bs2*ai[row]; 94492c4ed94SBarry Smith rmax = imax[row] = imax[row] + CHUNKSIZE; 945b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 94692c4ed94SBarry Smith a->maxnz += bs2*CHUNKSIZE; 94792c4ed94SBarry Smith a->reallocs++; 94892c4ed94SBarry Smith a->nz++; 94992c4ed94SBarry Smith } 95092c4ed94SBarry Smith N = nrow++ - 1; 95192c4ed94SBarry Smith /* shift up all the later entries in this row */ 95292c4ed94SBarry Smith for (ii=N; ii>=i; ii--) { 95392c4ed94SBarry Smith rp[ii+1] = rp[ii]; 954549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 95592c4ed94SBarry Smith } 956549d3d68SSatish Balay if (N >= i) { 957549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 958549d3d68SSatish Balay } 95992c4ed94SBarry Smith rp[i] = col; 9608a84c255SSatish Balay bap = ap + bs2*i; 9610e324ae4SSatish Balay if (roworiented) { 962dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 963dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 9640e324ae4SSatish Balay bap[jj] = *value++; 965dd9472c6SBarry Smith } 966dd9472c6SBarry Smith } 9670e324ae4SSatish Balay } else { 968dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 969dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 9700e324ae4SSatish Balay *bap++ = *value++; 9710e324ae4SSatish Balay } 972dd9472c6SBarry Smith } 973dd9472c6SBarry Smith } 974f1241b54SBarry Smith noinsert2:; 97592c4ed94SBarry Smith low = i; 97692c4ed94SBarry Smith } 97792c4ed94SBarry Smith ailen[row] = nrow; 97892c4ed94SBarry Smith } 9793a40ed3dSBarry Smith PetscFunctionReturn(0); 98092c4ed94SBarry Smith } 98192c4ed94SBarry Smith 9824a2ae208SSatish Balay #undef __FUNCT__ 9834a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 9848f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 985584200bdSSatish Balay { 986584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 987584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 988273d9f13SBarry Smith int m = A->m,*ip,N,*ailen = a->ilen; 989549d3d68SSatish Balay int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 9903f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 991a56a16c8SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 992a56a16c8SHong Zhang PetscTruth flag; 993a56a16c8SHong Zhang #endif 994584200bdSSatish Balay 9953a40ed3dSBarry Smith PetscFunctionBegin; 9963a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 997584200bdSSatish Balay 99843ee02c3SBarry Smith if (m) rmax = ailen[0]; 999584200bdSSatish Balay for (i=1; i<mbs; i++) { 1000584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 1001584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 1002d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 1003584200bdSSatish Balay if (fshift) { 1004a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1005584200bdSSatish Balay N = ailen[i]; 1006584200bdSSatish Balay for (j=0; j<N; j++) { 1007584200bdSSatish Balay ip[j-fshift] = ip[j]; 1008549d3d68SSatish Balay ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1009584200bdSSatish Balay } 1010584200bdSSatish Balay } 1011584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 1012584200bdSSatish Balay } 1013584200bdSSatish Balay if (mbs) { 1014584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1015584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1016584200bdSSatish Balay } 1017584200bdSSatish Balay /* reset ilen and imax for each row */ 1018584200bdSSatish Balay for (i=0; i<mbs; i++) { 1019584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 1020584200bdSSatish Balay } 1021a7c10996SSatish Balay a->nz = ai[mbs]; 1022584200bdSSatish Balay 1023584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 1024584200bdSSatish Balay if (fshift && a->diag) { 1025606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 1026b424e231SHong Zhang PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1027584200bdSSatish Balay a->diag = 0; 1028584200bdSSatish Balay } 1029bba1ac68SSatish Balay PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1030bba1ac68SSatish Balay PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1031b0a32e0cSBarry Smith PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1032e2f3b5e9SSatish Balay a->reallocs = 0; 10330e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 1034cf4441caSHong Zhang 1035a56a16c8SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 1036a56a16c8SHong Zhang ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1037a56a16c8SHong Zhang if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1038a56a16c8SHong Zhang #endif 1039a56a16c8SHong Zhang 10403a40ed3dSBarry Smith PetscFunctionReturn(0); 1041584200bdSSatish Balay } 1042584200bdSSatish Balay 10435a838052SSatish Balay 1044bea157c4SSatish Balay 1045bea157c4SSatish Balay /* 1046bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 1047bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1048bea157c4SSatish Balay then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1049bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 1050bea157c4SSatish Balay */ 10514a2ae208SSatish Balay #undef __FUNCT__ 10524a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1053bea157c4SSatish Balay static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1054d9b7c43dSSatish Balay { 1055bea157c4SSatish Balay int i,j,k,row; 1056bea157c4SSatish Balay PetscTruth flg; 10573a40ed3dSBarry Smith 1058433994e6SBarry Smith PetscFunctionBegin; 1059bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 1060bea157c4SSatish Balay row = idx[i]; 1061bea157c4SSatish Balay if (row%bs!=0) { /* Not the begining of a block */ 1062bea157c4SSatish Balay sizes[j] = 1; 1063bea157c4SSatish Balay i++; 1064e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1065bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1066bea157c4SSatish Balay i++; 1067bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 1068bea157c4SSatish Balay flg = PETSC_TRUE; 1069bea157c4SSatish Balay for (k=1; k<bs; k++) { 1070bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 1071bea157c4SSatish Balay flg = PETSC_FALSE; 1072bea157c4SSatish Balay break; 1073d9b7c43dSSatish Balay } 1074bea157c4SSatish Balay } 1075bea157c4SSatish Balay if (flg == PETSC_TRUE) { /* No break in the bs */ 1076bea157c4SSatish Balay sizes[j] = bs; 1077bea157c4SSatish Balay i+= bs; 1078bea157c4SSatish Balay } else { 1079bea157c4SSatish Balay sizes[j] = 1; 1080bea157c4SSatish Balay i++; 1081bea157c4SSatish Balay } 1082bea157c4SSatish Balay } 1083bea157c4SSatish Balay } 1084bea157c4SSatish Balay *bs_max = j; 10853a40ed3dSBarry Smith PetscFunctionReturn(0); 1086d9b7c43dSSatish Balay } 1087d9b7c43dSSatish Balay 10884a2ae208SSatish Balay #undef __FUNCT__ 10894a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ" 109087828ca2SBarry Smith int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1091d9b7c43dSSatish Balay { 1092d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1093b6815fffSBarry Smith int ierr,i,j,k,count,is_n,*is_idx,*rows; 1094bea157c4SSatish Balay int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 109587828ca2SBarry Smith PetscScalar zero = 0.0; 10963f1db9ecSBarry Smith MatScalar *aa; 1097d9b7c43dSSatish Balay 10983a40ed3dSBarry Smith PetscFunctionBegin; 1099d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1100b9b97703SBarry Smith ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1101d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1102d9b7c43dSSatish Balay 1103bea157c4SSatish Balay /* allocate memory for rows,sizes */ 1104b0a32e0cSBarry Smith ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1105bea157c4SSatish Balay sizes = rows + is_n; 1106bea157c4SSatish Balay 1107563b5814SBarry Smith /* copy IS values to rows, and sort them */ 1108bea157c4SSatish Balay for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1109bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1110dffd3267SBarry Smith if (baij->keepzeroedrows) { 1111dffd3267SBarry Smith for (i=0; i<is_n; i++) { sizes[i] = 1; } 1112dffd3267SBarry Smith bs_max = is_n; 1113dffd3267SBarry Smith } else { 1114bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1115dffd3267SBarry Smith } 1116b97a56abSSatish Balay ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1117bea157c4SSatish Balay 1118bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1119bea157c4SSatish Balay row = rows[j]; 1120273d9f13SBarry Smith if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1121bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1122bea157c4SSatish Balay aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1123dffd3267SBarry Smith if (sizes[i] == bs && !baij->keepzeroedrows) { 1124bea157c4SSatish Balay if (diag) { 1125bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 1126bea157c4SSatish Balay baij->ilen[row/bs] = 1; 1127bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 1128bea157c4SSatish Balay ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1129a07cd24cSSatish Balay } 1130563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 1131bea157c4SSatish Balay for (k=0; k<bs; k++) { 1132bea157c4SSatish Balay ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1133bea157c4SSatish Balay } 1134bea157c4SSatish Balay } else { /* (!diag) */ 1135bea157c4SSatish Balay baij->ilen[row/bs] = 0; 1136bea157c4SSatish Balay } /* end (!diag) */ 1137bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 1138aa482453SBarry Smith #if defined (PETSC_USE_DEBUG) 113929bbc08cSBarry Smith if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1140bea157c4SSatish Balay #endif 1141bea157c4SSatish Balay for (k=0; k<count; k++) { 1142d9b7c43dSSatish Balay aa[0] = zero; 1143d9b7c43dSSatish Balay aa += bs; 1144d9b7c43dSSatish Balay } 1145d9b7c43dSSatish Balay if (diag) { 1146f830108cSBarry Smith ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1147d9b7c43dSSatish Balay } 1148d9b7c43dSSatish Balay } 1149bea157c4SSatish Balay } 1150bea157c4SSatish Balay 1151606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 11529a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11533a40ed3dSBarry Smith PetscFunctionReturn(0); 1154d9b7c43dSSatish Balay } 11551c351548SSatish Balay 11564a2ae208SSatish Balay #undef __FUNCT__ 11574a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ" 115887828ca2SBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 11592d61bbb3SSatish Balay { 11602d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11612d61bbb3SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1162273d9f13SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 11632d61bbb3SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1164549d3d68SSatish Balay int ridx,cidx,bs2=a->bs2,ierr; 1165273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 11663f1db9ecSBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 11672d61bbb3SSatish Balay 11682d61bbb3SSatish Balay PetscFunctionBegin; 11692d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 11702d61bbb3SSatish Balay row = im[k]; brow = row/bs; 11715ef9f2a5SBarry Smith if (row < 0) continue; 1172aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1173273d9f13SBarry Smith if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 11742d61bbb3SSatish Balay #endif 11752d61bbb3SSatish Balay rp = aj + ai[brow]; 11762d61bbb3SSatish Balay ap = aa + bs2*ai[brow]; 11772d61bbb3SSatish Balay rmax = imax[brow]; 11782d61bbb3SSatish Balay nrow = ailen[brow]; 11792d61bbb3SSatish Balay low = 0; 11802d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 11815ef9f2a5SBarry Smith if (in[l] < 0) continue; 1182aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1183273d9f13SBarry Smith if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 11842d61bbb3SSatish Balay #endif 11852d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 11862d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 11872d61bbb3SSatish Balay if (roworiented) { 11885ef9f2a5SBarry Smith value = v[l + k*n]; 11892d61bbb3SSatish Balay } else { 11902d61bbb3SSatish Balay value = v[k + l*m]; 11912d61bbb3SSatish Balay } 11922d61bbb3SSatish Balay if (!sorted) low = 0; high = nrow; 11932d61bbb3SSatish Balay while (high-low > 7) { 11942d61bbb3SSatish Balay t = (low+high)/2; 11952d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 11962d61bbb3SSatish Balay else low = t; 11972d61bbb3SSatish Balay } 11982d61bbb3SSatish Balay for (i=low; i<high; i++) { 11992d61bbb3SSatish Balay if (rp[i] > bcol) break; 12002d61bbb3SSatish Balay if (rp[i] == bcol) { 12012d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 12022d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 12032d61bbb3SSatish Balay else *bap = value; 12042d61bbb3SSatish Balay goto noinsert1; 12052d61bbb3SSatish Balay } 12062d61bbb3SSatish Balay } 12072d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 120829bbc08cSBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 12092d61bbb3SSatish Balay if (nrow >= rmax) { 12102d61bbb3SSatish Balay /* there is no extra room in row, therefore enlarge */ 12112d61bbb3SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 12123f1db9ecSBarry Smith MatScalar *new_a; 12132d61bbb3SSatish Balay 121429bbc08cSBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 12152d61bbb3SSatish Balay 12162d61bbb3SSatish Balay /* Malloc new storage space */ 12173f1db9ecSBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1218b0a32e0cSBarry Smith ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 12192d61bbb3SSatish Balay new_j = (int*)(new_a + bs2*new_nz); 12202d61bbb3SSatish Balay new_i = new_j + new_nz; 12212d61bbb3SSatish Balay 12222d61bbb3SSatish Balay /* copy over old data into new slots */ 12232d61bbb3SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 12242d61bbb3SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1225549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 12262d61bbb3SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1227549d3d68SSatish Balay ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1228549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1229549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1230549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 12312d61bbb3SSatish Balay /* free up old matrix storage */ 1232606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 1233606d414cSSatish Balay if (!a->singlemalloc) { 1234606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 1235606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 1236606d414cSSatish Balay } 12372d61bbb3SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1238c4992f7dSBarry Smith a->singlemalloc = PETSC_TRUE; 12392d61bbb3SSatish Balay 12402d61bbb3SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 12412d61bbb3SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1242b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 12432d61bbb3SSatish Balay a->maxnz += bs2*CHUNKSIZE; 12442d61bbb3SSatish Balay a->reallocs++; 12452d61bbb3SSatish Balay a->nz++; 12462d61bbb3SSatish Balay } 12472d61bbb3SSatish Balay N = nrow++ - 1; 12482d61bbb3SSatish Balay /* shift up all the later entries in this row */ 12492d61bbb3SSatish Balay for (ii=N; ii>=i; ii--) { 12502d61bbb3SSatish Balay rp[ii+1] = rp[ii]; 1251549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 12522d61bbb3SSatish Balay } 1253549d3d68SSatish Balay if (N>=i) { 1254549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1255549d3d68SSatish Balay } 12562d61bbb3SSatish Balay rp[i] = bcol; 12572d61bbb3SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 12582d61bbb3SSatish Balay noinsert1:; 12592d61bbb3SSatish Balay low = i; 12602d61bbb3SSatish Balay } 12612d61bbb3SSatish Balay ailen[brow] = nrow; 12622d61bbb3SSatish Balay } 12632d61bbb3SSatish Balay PetscFunctionReturn(0); 12642d61bbb3SSatish Balay } 12652d61bbb3SSatish Balay 12662d61bbb3SSatish Balay 12674a2ae208SSatish Balay #undef __FUNCT__ 12684a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ" 12695ef9f2a5SBarry Smith int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 12702d61bbb3SSatish Balay { 12712d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 12722d61bbb3SSatish Balay Mat outA; 12732d61bbb3SSatish Balay int ierr; 1274667159a5SBarry Smith PetscTruth row_identity,col_identity; 12752d61bbb3SSatish Balay 12762d61bbb3SSatish Balay PetscFunctionBegin; 127729bbc08cSBarry Smith if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1278667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1279667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1280667159a5SBarry Smith if (!row_identity || !col_identity) { 128129bbc08cSBarry Smith SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1282667159a5SBarry Smith } 12832d61bbb3SSatish Balay 12842d61bbb3SSatish Balay outA = inA; 12852d61bbb3SSatish Balay inA->factor = FACTOR_LU; 12862d61bbb3SSatish Balay 12872d61bbb3SSatish Balay if (!a->diag) { 1288c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 12892d61bbb3SSatish Balay } 1290cf242676SKris Buschelman 1291c38d4ed2SBarry Smith a->row = row; 1292c38d4ed2SBarry Smith a->col = col; 1293c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1294c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1295c38d4ed2SBarry Smith 1296c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 12974c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1298b0a32e0cSBarry Smith PetscLogObjectParent(inA,a->icol); 1299c38d4ed2SBarry Smith 1300cf242676SKris Buschelman /* 1301cf242676SKris Buschelman Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1302cf242676SKris Buschelman for ILU(0) factorization with natural ordering 1303cf242676SKris Buschelman */ 1304cf242676SKris Buschelman if (a->bs < 8) { 1305cf242676SKris Buschelman ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1306cf242676SKris Buschelman } else { 1307c38d4ed2SBarry Smith if (!a->solve_work) { 130887828ca2SBarry Smith ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 130987828ca2SBarry Smith PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1310c38d4ed2SBarry Smith } 13112d61bbb3SSatish Balay } 13122d61bbb3SSatish Balay 1313667159a5SBarry Smith ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1314667159a5SBarry Smith 13152d61bbb3SSatish Balay PetscFunctionReturn(0); 13162d61bbb3SSatish Balay } 13174a2ae208SSatish Balay #undef __FUNCT__ 13184a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1319ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 1320ba4ca20aSSatish Balay { 1321c38d4ed2SBarry Smith static PetscTruth called = PETSC_FALSE; 1322ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 1323d132466eSBarry Smith int ierr; 1324ba4ca20aSSatish Balay 13253a40ed3dSBarry Smith PetscFunctionBegin; 1326c38d4ed2SBarry Smith if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1327d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1328d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 13293a40ed3dSBarry Smith PetscFunctionReturn(0); 1330ba4ca20aSSatish Balay } 1331d9b7c43dSSatish Balay 1332fb2e594dSBarry Smith EXTERN_C_BEGIN 13334a2ae208SSatish Balay #undef __FUNCT__ 13344a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 133527a8da17SBarry Smith int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 133627a8da17SBarry Smith { 133727a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 133814db4f74SSatish Balay int i,nz,nbs; 133927a8da17SBarry Smith 134027a8da17SBarry Smith PetscFunctionBegin; 134114db4f74SSatish Balay nz = baij->maxnz/baij->bs2; 134214db4f74SSatish Balay nbs = baij->nbs; 134327a8da17SBarry Smith for (i=0; i<nz; i++) { 134427a8da17SBarry Smith baij->j[i] = indices[i]; 134527a8da17SBarry Smith } 134627a8da17SBarry Smith baij->nz = nz; 134714db4f74SSatish Balay for (i=0; i<nbs; i++) { 134827a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 134927a8da17SBarry Smith } 135027a8da17SBarry Smith 135127a8da17SBarry Smith PetscFunctionReturn(0); 135227a8da17SBarry Smith } 1353fb2e594dSBarry Smith EXTERN_C_END 135427a8da17SBarry Smith 13554a2ae208SSatish Balay #undef __FUNCT__ 13564a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 135727a8da17SBarry Smith /*@ 135827a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 135927a8da17SBarry Smith in the matrix. 136027a8da17SBarry Smith 136127a8da17SBarry Smith Input Parameters: 136227a8da17SBarry Smith + mat - the SeqBAIJ matrix 136327a8da17SBarry Smith - indices - the column indices 136427a8da17SBarry Smith 136515091d37SBarry Smith Level: advanced 136615091d37SBarry Smith 136727a8da17SBarry Smith Notes: 136827a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 136927a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 137027a8da17SBarry Smith of the MatSetValues() operation. 137127a8da17SBarry Smith 137227a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 137327a8da17SBarry Smith MatCreateSeqBAIJ(). 137427a8da17SBarry Smith 137527a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 137627a8da17SBarry Smith 137727a8da17SBarry Smith @*/ 137827a8da17SBarry Smith int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 137927a8da17SBarry Smith { 138027a8da17SBarry Smith int ierr,(*f)(Mat,int *); 138127a8da17SBarry Smith 138227a8da17SBarry Smith PetscFunctionBegin; 138327a8da17SBarry Smith PetscValidHeaderSpecific(mat,MAT_COOKIE); 1384c134de8dSSatish Balay ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 138527a8da17SBarry Smith if (f) { 138627a8da17SBarry Smith ierr = (*f)(mat,indices);CHKERRQ(ierr); 138727a8da17SBarry Smith } else { 138829bbc08cSBarry Smith SETERRQ(1,"Wrong type of matrix to set column indices"); 138927a8da17SBarry Smith } 139027a8da17SBarry Smith PetscFunctionReturn(0); 139127a8da17SBarry Smith } 139227a8da17SBarry Smith 13934a2ae208SSatish Balay #undef __FUNCT__ 13944a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1395273d9f13SBarry Smith int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1396273d9f13SBarry Smith { 1397273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1398273d9f13SBarry Smith int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1399273d9f13SBarry Smith PetscReal atmp; 140087828ca2SBarry Smith PetscScalar *x,zero = 0.0; 1401273d9f13SBarry Smith MatScalar *aa; 1402273d9f13SBarry Smith int ncols,brow,krow,kcol; 1403273d9f13SBarry Smith 1404273d9f13SBarry Smith PetscFunctionBegin; 1405273d9f13SBarry Smith if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1406273d9f13SBarry Smith bs = a->bs; 1407273d9f13SBarry Smith aa = a->a; 1408273d9f13SBarry Smith ai = a->i; 1409273d9f13SBarry Smith aj = a->j; 1410273d9f13SBarry Smith mbs = a->mbs; 1411273d9f13SBarry Smith 1412273d9f13SBarry Smith ierr = VecSet(&zero,v);CHKERRQ(ierr); 1413273d9f13SBarry Smith ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1414273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1415273d9f13SBarry Smith if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1416273d9f13SBarry Smith for (i=0; i<mbs; i++) { 1417273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 1418273d9f13SBarry Smith brow = bs*i; 1419273d9f13SBarry Smith for (j=0; j<ncols; j++){ 1420273d9f13SBarry Smith /* bcol = bs*(*aj); */ 1421273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++){ 1422273d9f13SBarry Smith for (krow=0; krow<bs; krow++){ 1423273d9f13SBarry Smith atmp = PetscAbsScalar(*aa); aa++; 1424273d9f13SBarry Smith row = brow + krow; /* row index */ 1425273d9f13SBarry Smith /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1426273d9f13SBarry Smith if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1427273d9f13SBarry Smith } 1428273d9f13SBarry Smith } 1429273d9f13SBarry Smith aj++; 1430273d9f13SBarry Smith } 1431273d9f13SBarry Smith } 1432273d9f13SBarry Smith ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1433273d9f13SBarry Smith PetscFunctionReturn(0); 1434273d9f13SBarry Smith } 1435273d9f13SBarry Smith 14364a2ae208SSatish Balay #undef __FUNCT__ 14374a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1438273d9f13SBarry Smith int MatSetUpPreallocation_SeqBAIJ(Mat A) 1439273d9f13SBarry Smith { 1440273d9f13SBarry Smith int ierr; 1441273d9f13SBarry Smith 1442273d9f13SBarry Smith PetscFunctionBegin; 1443273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1444273d9f13SBarry Smith PetscFunctionReturn(0); 1445273d9f13SBarry Smith } 1446273d9f13SBarry Smith 14474a2ae208SSatish Balay #undef __FUNCT__ 14484a2ae208SSatish Balay #define __FUNCT__ "MatGetArray_SeqBAIJ" 144987828ca2SBarry Smith int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1450f2a5309cSSatish Balay { 1451f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1452f2a5309cSSatish Balay PetscFunctionBegin; 1453f2a5309cSSatish Balay *array = a->a; 1454f2a5309cSSatish Balay PetscFunctionReturn(0); 1455f2a5309cSSatish Balay } 1456f2a5309cSSatish Balay 14574a2ae208SSatish Balay #undef __FUNCT__ 14584a2ae208SSatish Balay #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 145987828ca2SBarry Smith int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1460f2a5309cSSatish Balay { 1461f2a5309cSSatish Balay PetscFunctionBegin; 1462f2a5309cSSatish Balay PetscFunctionReturn(0); 1463f2a5309cSSatish Balay } 1464f2a5309cSSatish Balay 14652593348eSBarry Smith /* -------------------------------------------------------------------*/ 1466cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1467cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 1468cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 1469cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 1470cc2dc46cSBarry Smith MatMultAdd_SeqBAIJ_N, 14717c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 14727c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 1473cc2dc46cSBarry Smith MatSolve_SeqBAIJ_N, 1474cc2dc46cSBarry Smith 0, 1475cc2dc46cSBarry Smith 0, 1476cc2dc46cSBarry Smith 0, 1477cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 1478cc2dc46cSBarry Smith 0, 1479b6490206SBarry Smith 0, 1480f2501298SSatish Balay MatTranspose_SeqBAIJ, 1481cc2dc46cSBarry Smith MatGetInfo_SeqBAIJ, 1482cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 1483cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 1484cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 1485cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 1486b6490206SBarry Smith 0, 1487cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 1488cc2dc46cSBarry Smith 0, 1489cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 1490cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 1491cc2dc46cSBarry Smith MatZeroRows_SeqBAIJ, 1492cc2dc46cSBarry Smith MatLUFactorSymbolic_SeqBAIJ, 1493cc2dc46cSBarry Smith MatLUFactorNumeric_SeqBAIJ_N, 1494cc2dc46cSBarry Smith 0, 1495cc2dc46cSBarry Smith 0, 1496273d9f13SBarry Smith MatSetUpPreallocation_SeqBAIJ, 1497cc2dc46cSBarry Smith MatILUFactorSymbolic_SeqBAIJ, 1498cc2dc46cSBarry Smith 0, 1499f2a5309cSSatish Balay MatGetArray_SeqBAIJ, 1500f2a5309cSSatish Balay MatRestoreArray_SeqBAIJ, 15012e8a6d31SBarry Smith MatDuplicate_SeqBAIJ, 1502cc2dc46cSBarry Smith 0, 1503cc2dc46cSBarry Smith 0, 1504cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 1505cc2dc46cSBarry Smith 0, 1506cc2dc46cSBarry Smith 0, 1507cc2dc46cSBarry Smith MatGetSubMatrices_SeqBAIJ, 1508cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 1509cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 1510cc2dc46cSBarry Smith 0, 1511cc2dc46cSBarry Smith MatPrintHelp_SeqBAIJ, 1512cc2dc46cSBarry Smith MatScale_SeqBAIJ, 1513cc2dc46cSBarry Smith 0, 1514cc2dc46cSBarry Smith 0, 1515cc2dc46cSBarry Smith 0, 1516cc2dc46cSBarry Smith MatGetBlockSize_SeqBAIJ, 15173b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 151892c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 1519cc2dc46cSBarry Smith 0, 1520cc2dc46cSBarry Smith 0, 1521cc2dc46cSBarry Smith 0, 1522cc2dc46cSBarry Smith 0, 1523cc2dc46cSBarry Smith 0, 1524cc2dc46cSBarry Smith 0, 1525d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 1526cc2dc46cSBarry Smith MatGetSubMatrix_SeqBAIJ, 1527b9b97703SBarry Smith MatDestroy_SeqBAIJ, 1528b9b97703SBarry Smith MatView_SeqBAIJ, 15293a64cc32SBarry Smith MatGetPetscMaps_Petsc, 1530273d9f13SBarry Smith 0, 1531273d9f13SBarry Smith 0, 1532273d9f13SBarry Smith 0, 1533273d9f13SBarry Smith 0, 1534273d9f13SBarry Smith 0, 1535273d9f13SBarry Smith 0, 1536273d9f13SBarry Smith MatGetRowMax_SeqBAIJ, 1537273d9f13SBarry Smith MatConvert_Basic}; 15382593348eSBarry Smith 15393e90b805SBarry Smith EXTERN_C_BEGIN 15404a2ae208SSatish Balay #undef __FUNCT__ 15414a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ" 15423e90b805SBarry Smith int MatStoreValues_SeqBAIJ(Mat mat) 15433e90b805SBarry Smith { 15443e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1545273d9f13SBarry Smith int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1546d132466eSBarry Smith int ierr; 15473e90b805SBarry Smith 15483e90b805SBarry Smith PetscFunctionBegin; 15493e90b805SBarry Smith if (aij->nonew != 1) { 155029bbc08cSBarry Smith SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 15513e90b805SBarry Smith } 15523e90b805SBarry Smith 15533e90b805SBarry Smith /* allocate space for values if not already there */ 15543e90b805SBarry Smith if (!aij->saved_values) { 155587828ca2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 15563e90b805SBarry Smith } 15573e90b805SBarry Smith 15583e90b805SBarry Smith /* copy values over */ 155987828ca2SBarry Smith ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 15603e90b805SBarry Smith PetscFunctionReturn(0); 15613e90b805SBarry Smith } 15623e90b805SBarry Smith EXTERN_C_END 15633e90b805SBarry Smith 15643e90b805SBarry Smith EXTERN_C_BEGIN 15654a2ae208SSatish Balay #undef __FUNCT__ 15664a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 156732e87ba3SBarry Smith int MatRetrieveValues_SeqBAIJ(Mat mat) 15683e90b805SBarry Smith { 15693e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1570273d9f13SBarry Smith int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 15713e90b805SBarry Smith 15723e90b805SBarry Smith PetscFunctionBegin; 15733e90b805SBarry Smith if (aij->nonew != 1) { 157429bbc08cSBarry Smith SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 15753e90b805SBarry Smith } 15763e90b805SBarry Smith if (!aij->saved_values) { 157729bbc08cSBarry Smith SETERRQ(1,"Must call MatStoreValues(A);first"); 15783e90b805SBarry Smith } 15793e90b805SBarry Smith 15803e90b805SBarry Smith /* copy values over */ 158187828ca2SBarry Smith ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 15823e90b805SBarry Smith PetscFunctionReturn(0); 15833e90b805SBarry Smith } 15843e90b805SBarry Smith EXTERN_C_END 15853e90b805SBarry Smith 1586273d9f13SBarry Smith EXTERN_C_BEGIN 1587273d9f13SBarry Smith extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1588273d9f13SBarry Smith EXTERN_C_END 1589273d9f13SBarry Smith 1590273d9f13SBarry Smith EXTERN_C_BEGIN 15914a2ae208SSatish Balay #undef __FUNCT__ 15924a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ" 1593273d9f13SBarry Smith int MatCreate_SeqBAIJ(Mat B) 15942593348eSBarry Smith { 1595273d9f13SBarry Smith int ierr,size; 1596b6490206SBarry Smith Mat_SeqBAIJ *b; 15973b2fbd54SBarry Smith 15983a40ed3dSBarry Smith PetscFunctionBegin; 1599273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 160029bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1601b6490206SBarry Smith 1602273d9f13SBarry Smith B->m = B->M = PetscMax(B->m,B->M); 1603273d9f13SBarry Smith B->n = B->N = PetscMax(B->n,B->N); 1604b0a32e0cSBarry Smith ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1605b0a32e0cSBarry Smith B->data = (void*)b; 1606549d3d68SSatish Balay ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1607549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 16082593348eSBarry Smith B->factor = 0; 16092593348eSBarry Smith B->lupivotthreshold = 1.0; 161090f02eecSBarry Smith B->mapping = 0; 16112593348eSBarry Smith b->row = 0; 16122593348eSBarry Smith b->col = 0; 1613e51c0b9cSSatish Balay b->icol = 0; 16142593348eSBarry Smith b->reallocs = 0; 16153e90b805SBarry Smith b->saved_values = 0; 1616cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 1617563b5814SBarry Smith b->setvalueslen = 0; 1618563b5814SBarry Smith b->setvaluescopy = PETSC_NULL; 1619563b5814SBarry Smith #endif 16208661488fSKris Buschelman b->single_precision_solves = PETSC_FALSE; 16212593348eSBarry Smith 16223a64cc32SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 16233a64cc32SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1624a5ae1ecdSBarry Smith 1625c4992f7dSBarry Smith b->sorted = PETSC_FALSE; 1626c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 16272593348eSBarry Smith b->nonew = 0; 16282593348eSBarry Smith b->diag = 0; 16292593348eSBarry Smith b->solve_work = 0; 1630de6a44a3SBarry Smith b->mult_work = 0; 16312a1b7f2aSHong Zhang B->spptr = 0; 16320e6d2581SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 1633883fce79SBarry Smith b->keepzeroedrows = PETSC_FALSE; 16344e220ebcSLois Curfman McInnes 1635f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 16363e90b805SBarry Smith "MatStoreValues_SeqBAIJ", 1637bc4b532fSSatish Balay MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1638f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 16393e90b805SBarry Smith "MatRetrieveValues_SeqBAIJ", 1640bc4b532fSSatish Balay MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1641f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 164227a8da17SBarry Smith "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1643bc4b532fSSatish Balay MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1644273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1645273d9f13SBarry Smith "MatConvert_SeqBAIJ_SeqAIJ", 1646273d9f13SBarry Smith MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 16473a40ed3dSBarry Smith PetscFunctionReturn(0); 16482593348eSBarry Smith } 1649273d9f13SBarry Smith EXTERN_C_END 16502593348eSBarry Smith 16514a2ae208SSatish Balay #undef __FUNCT__ 16524a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_SeqBAIJ" 16532e8a6d31SBarry Smith int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 16542593348eSBarry Smith { 16552593348eSBarry Smith Mat C; 1656b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 165727a8da17SBarry Smith int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1658de6a44a3SBarry Smith 16593a40ed3dSBarry Smith PetscFunctionBegin; 166029bbc08cSBarry Smith if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 16612593348eSBarry Smith 16622593348eSBarry Smith *B = 0; 1663273d9f13SBarry Smith ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1664273d9f13SBarry Smith ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1665273d9f13SBarry Smith c = (Mat_SeqBAIJ*)C->data; 166644cd7ae7SLois Curfman McInnes 1667b6490206SBarry Smith c->bs = a->bs; 16689df24120SSatish Balay c->bs2 = a->bs2; 1669b6490206SBarry Smith c->mbs = a->mbs; 1670b6490206SBarry Smith c->nbs = a->nbs; 167135d8aa7fSBarry Smith ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 16722593348eSBarry Smith 1673b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1674b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1675b6490206SBarry Smith for (i=0; i<mbs; i++) { 16762593348eSBarry Smith c->imax[i] = a->imax[i]; 16772593348eSBarry Smith c->ilen[i] = a->ilen[i]; 16782593348eSBarry Smith } 16792593348eSBarry Smith 16802593348eSBarry Smith /* allocate the matrix space */ 1681c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 16823f1db9ecSBarry Smith len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1683b0a32e0cSBarry Smith ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 16847e67e3f9SSatish Balay c->j = (int*)(c->a + nz*bs2); 1685de6a44a3SBarry Smith c->i = c->j + nz; 1686549d3d68SSatish Balay ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1687b6490206SBarry Smith if (mbs > 0) { 1688549d3d68SSatish Balay ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 16892e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 1690549d3d68SSatish Balay ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 16912e8a6d31SBarry Smith } else { 1692549d3d68SSatish Balay ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 16932593348eSBarry Smith } 16942593348eSBarry Smith } 16952593348eSBarry Smith 1696b0a32e0cSBarry Smith PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 16972593348eSBarry Smith c->sorted = a->sorted; 16982593348eSBarry Smith c->roworiented = a->roworiented; 16992593348eSBarry Smith c->nonew = a->nonew; 17002593348eSBarry Smith 17012593348eSBarry Smith if (a->diag) { 1702b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1703b0a32e0cSBarry Smith PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1704b6490206SBarry Smith for (i=0; i<mbs; i++) { 17052593348eSBarry Smith c->diag[i] = a->diag[i]; 17062593348eSBarry Smith } 170798305bb5SBarry Smith } else c->diag = 0; 17082593348eSBarry Smith c->nz = a->nz; 17092593348eSBarry Smith c->maxnz = a->maxnz; 17102593348eSBarry Smith c->solve_work = 0; 17112a1b7f2aSHong Zhang C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 17127fc0212eSBarry Smith c->mult_work = 0; 1713273d9f13SBarry Smith C->preallocated = PETSC_TRUE; 1714273d9f13SBarry Smith C->assembled = PETSC_TRUE; 17152593348eSBarry Smith *B = C; 1716b0a32e0cSBarry Smith ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 17173a40ed3dSBarry Smith PetscFunctionReturn(0); 17182593348eSBarry Smith } 17192593348eSBarry Smith 1720273d9f13SBarry Smith EXTERN_C_BEGIN 17214a2ae208SSatish Balay #undef __FUNCT__ 17224a2ae208SSatish Balay #define __FUNCT__ "MatLoad_SeqBAIJ" 1723b0a32e0cSBarry Smith int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 17242593348eSBarry Smith { 1725b6490206SBarry Smith Mat_SeqBAIJ *a; 17262593348eSBarry Smith Mat B; 1727f1af5d2fSBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1728b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 172935aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1730a7c10996SSatish Balay int *masked,nmask,tmp,bs2,ishift; 173187828ca2SBarry Smith PetscScalar *aa; 173219bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 17332593348eSBarry Smith 17343a40ed3dSBarry Smith PetscFunctionBegin; 1735b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1736de6a44a3SBarry Smith bs2 = bs*bs; 1737b6490206SBarry Smith 1738d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 173929bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1740b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 17410752156aSBarry Smith ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1742552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 17432593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 17442593348eSBarry Smith 1745d64ed03dSBarry Smith if (header[3] < 0) { 174629bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1747d64ed03dSBarry Smith } 1748d64ed03dSBarry Smith 174929bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 175035aab85fSBarry Smith 175135aab85fSBarry Smith /* 175235aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 175335aab85fSBarry Smith divisible by the blocksize 175435aab85fSBarry Smith */ 1755b6490206SBarry Smith mbs = M/bs; 175635aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 175735aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 175835aab85fSBarry Smith else mbs++; 175935aab85fSBarry Smith if (extra_rows) { 1760b0a32e0cSBarry Smith PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 176135aab85fSBarry Smith } 1762b6490206SBarry Smith 17632593348eSBarry Smith /* read in row lengths */ 1764b0a32e0cSBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 17650752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 176635aab85fSBarry Smith for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 17672593348eSBarry Smith 1768b6490206SBarry Smith /* read in column indices */ 1769b0a32e0cSBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 17700752156aSBarry Smith ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 177135aab85fSBarry Smith for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1772b6490206SBarry Smith 1773b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 1774b0a32e0cSBarry Smith ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1775549d3d68SSatish Balay ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1776b0a32e0cSBarry Smith ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1777549d3d68SSatish Balay ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 177835aab85fSBarry Smith masked = mask + mbs; 1779b6490206SBarry Smith rowcount = 0; nzcount = 0; 1780b6490206SBarry Smith for (i=0; i<mbs; i++) { 178135aab85fSBarry Smith nmask = 0; 1782b6490206SBarry Smith for (j=0; j<bs; j++) { 1783b6490206SBarry Smith kmax = rowlengths[rowcount]; 1784b6490206SBarry Smith for (k=0; k<kmax; k++) { 178535aab85fSBarry Smith tmp = jj[nzcount++]/bs; 178635aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1787b6490206SBarry Smith } 1788b6490206SBarry Smith rowcount++; 1789b6490206SBarry Smith } 179035aab85fSBarry Smith browlengths[i] += nmask; 179135aab85fSBarry Smith /* zero out the mask elements we set */ 179235aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1793b6490206SBarry Smith } 1794b6490206SBarry Smith 17952593348eSBarry Smith /* create our matrix */ 17963eee16b0SBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 17972593348eSBarry Smith B = *A; 1798b6490206SBarry Smith a = (Mat_SeqBAIJ*)B->data; 17992593348eSBarry Smith 18002593348eSBarry Smith /* set matrix "i" values */ 1801de6a44a3SBarry Smith a->i[0] = 0; 1802b6490206SBarry Smith for (i=1; i<= mbs; i++) { 1803b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 1804b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 18052593348eSBarry Smith } 1806b6490206SBarry Smith a->nz = 0; 1807b6490206SBarry Smith for (i=0; i<mbs; i++) a->nz += browlengths[i]; 18082593348eSBarry Smith 1809b6490206SBarry Smith /* read in nonzero values */ 181087828ca2SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 18110752156aSBarry Smith ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 181235aab85fSBarry Smith for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1813b6490206SBarry Smith 1814b6490206SBarry Smith /* set "a" and "j" values into matrix */ 1815b6490206SBarry Smith nzcount = 0; jcount = 0; 1816b6490206SBarry Smith for (i=0; i<mbs; i++) { 1817b6490206SBarry Smith nzcountb = nzcount; 181835aab85fSBarry Smith nmask = 0; 1819b6490206SBarry Smith for (j=0; j<bs; j++) { 1820b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1821b6490206SBarry Smith for (k=0; k<kmax; k++) { 182235aab85fSBarry Smith tmp = jj[nzcount++]/bs; 182335aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1824b6490206SBarry Smith } 1825b6490206SBarry Smith } 1826de6a44a3SBarry Smith /* sort the masked values */ 1827433994e6SBarry Smith ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1828de6a44a3SBarry Smith 1829b6490206SBarry Smith /* set "j" values into matrix */ 1830b6490206SBarry Smith maskcount = 1; 183135aab85fSBarry Smith for (j=0; j<nmask; j++) { 183235aab85fSBarry Smith a->j[jcount++] = masked[j]; 1833de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 1834b6490206SBarry Smith } 1835b6490206SBarry Smith /* set "a" values into matrix */ 1836de6a44a3SBarry Smith ishift = bs2*a->i[i]; 1837b6490206SBarry Smith for (j=0; j<bs; j++) { 1838b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1839b6490206SBarry Smith for (k=0; k<kmax; k++) { 1840de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 1841de6a44a3SBarry Smith block = mask[tmp] - 1; 1842de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 1843de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 1844375fe846SBarry Smith a->a[idx] = (MatScalar)aa[nzcountb++]; 1845b6490206SBarry Smith } 1846b6490206SBarry Smith } 184735aab85fSBarry Smith /* zero out the mask elements we set */ 184835aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1849b6490206SBarry Smith } 185029bbc08cSBarry Smith if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1851b6490206SBarry Smith 1852606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1853606d414cSSatish Balay ierr = PetscFree(browlengths);CHKERRQ(ierr); 1854606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 1855606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 1856606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 1857b6490206SBarry Smith 1858b6490206SBarry Smith B->assembled = PETSC_TRUE; 1859de6a44a3SBarry Smith 18609c01be13SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 18613a40ed3dSBarry Smith PetscFunctionReturn(0); 18622593348eSBarry Smith } 1863273d9f13SBarry Smith EXTERN_C_END 18642593348eSBarry Smith 18654a2ae208SSatish Balay #undef __FUNCT__ 18664a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ" 1867273d9f13SBarry Smith /*@C 1868273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1869273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 1870273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 1871273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 1872273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 18732593348eSBarry Smith 1874273d9f13SBarry Smith Collective on MPI_Comm 1875273d9f13SBarry Smith 1876273d9f13SBarry Smith Input Parameters: 1877273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 1878273d9f13SBarry Smith . bs - size of block 1879273d9f13SBarry Smith . m - number of rows 1880273d9f13SBarry Smith . n - number of columns 188135d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 188235d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 1883273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 1884273d9f13SBarry Smith 1885273d9f13SBarry Smith Output Parameter: 1886273d9f13SBarry Smith . A - the matrix 1887273d9f13SBarry Smith 1888273d9f13SBarry Smith Options Database Keys: 1889273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 1890273d9f13SBarry Smith block calculations (much slower) 1891273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 1892273d9f13SBarry Smith 1893273d9f13SBarry Smith Level: intermediate 1894273d9f13SBarry Smith 1895273d9f13SBarry Smith Notes: 189635d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 189735d8aa7fSBarry Smith 1898273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 1899273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 1900273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 1901273d9f13SBarry Smith 1902273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 1903273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1904273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 1905273d9f13SBarry Smith matrices. 1906273d9f13SBarry Smith 1907273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1908273d9f13SBarry Smith @*/ 1909273d9f13SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1910273d9f13SBarry Smith { 1911273d9f13SBarry Smith int ierr; 1912273d9f13SBarry Smith 1913273d9f13SBarry Smith PetscFunctionBegin; 1914273d9f13SBarry Smith ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1915273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1916273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1917273d9f13SBarry Smith PetscFunctionReturn(0); 1918273d9f13SBarry Smith } 1919273d9f13SBarry Smith 19204a2ae208SSatish Balay #undef __FUNCT__ 19214a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1922273d9f13SBarry Smith /*@C 1923273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1924273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 1925273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 1926273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 1927273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 1928273d9f13SBarry Smith 1929273d9f13SBarry Smith Collective on MPI_Comm 1930273d9f13SBarry Smith 1931273d9f13SBarry Smith Input Parameters: 1932273d9f13SBarry Smith + A - the matrix 1933273d9f13SBarry Smith . bs - size of block 1934273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 1935273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 1936273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 1937273d9f13SBarry Smith 1938273d9f13SBarry Smith Options Database Keys: 1939273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 1940273d9f13SBarry Smith block calculations (much slower) 1941273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 1942273d9f13SBarry Smith 1943273d9f13SBarry Smith Level: intermediate 1944273d9f13SBarry Smith 1945273d9f13SBarry Smith Notes: 1946273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 1947273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 1948273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 1949273d9f13SBarry Smith 1950273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 1951273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1952273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 1953273d9f13SBarry Smith matrices. 1954273d9f13SBarry Smith 1955273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1956273d9f13SBarry Smith @*/ 1957273d9f13SBarry Smith int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1958273d9f13SBarry Smith { 1959273d9f13SBarry Smith Mat_SeqBAIJ *b; 1960273d9f13SBarry Smith int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1961273d9f13SBarry Smith PetscTruth flg; 1962273d9f13SBarry Smith 1963273d9f13SBarry Smith PetscFunctionBegin; 1964273d9f13SBarry Smith ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1965273d9f13SBarry Smith if (!flg) PetscFunctionReturn(0); 1966273d9f13SBarry Smith 1967273d9f13SBarry Smith B->preallocated = PETSC_TRUE; 1968b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1969273d9f13SBarry Smith if (nnz && newbs != bs) { 1970273d9f13SBarry Smith SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1971273d9f13SBarry Smith } 1972273d9f13SBarry Smith bs = newbs; 1973273d9f13SBarry Smith 1974273d9f13SBarry Smith mbs = B->m/bs; 1975273d9f13SBarry Smith nbs = B->n/bs; 1976273d9f13SBarry Smith bs2 = bs*bs; 1977273d9f13SBarry Smith 1978273d9f13SBarry Smith if (mbs*bs!=B->m || nbs*bs!=B->n) { 197935d8aa7fSBarry Smith SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1980273d9f13SBarry Smith } 1981273d9f13SBarry Smith 1982435da068SBarry Smith if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1983435da068SBarry Smith if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1984273d9f13SBarry Smith if (nnz) { 1985273d9f13SBarry Smith for (i=0; i<mbs; i++) { 1986273d9f13SBarry Smith if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 19873a7fca6bSBarry Smith if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1988273d9f13SBarry Smith } 1989273d9f13SBarry Smith } 1990273d9f13SBarry Smith 1991273d9f13SBarry Smith b = (Mat_SeqBAIJ*)B->data; 1992b0a32e0cSBarry Smith ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 199354138f6bSKris Buschelman B->ops->solve = MatSolve_SeqBAIJ_Update; 199454138f6bSKris Buschelman B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1995273d9f13SBarry Smith if (!flg) { 1996273d9f13SBarry Smith switch (bs) { 1997273d9f13SBarry Smith case 1: 1998273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1999273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_1; 2000273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2001273d9f13SBarry Smith break; 2002273d9f13SBarry Smith case 2: 2003273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2004273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_2; 2005273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2006273d9f13SBarry Smith break; 2007273d9f13SBarry Smith case 3: 2008273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2009273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_3; 2010273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2011273d9f13SBarry Smith break; 2012273d9f13SBarry Smith case 4: 2013273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2014273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_4; 2015273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2016273d9f13SBarry Smith break; 2017273d9f13SBarry Smith case 5: 2018273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2019273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_5; 2020273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2021273d9f13SBarry Smith break; 2022273d9f13SBarry Smith case 6: 2023273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2024273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_6; 2025273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2026273d9f13SBarry Smith break; 2027273d9f13SBarry Smith case 7: 202854138f6bSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2029273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_7; 2030273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2031273d9f13SBarry Smith break; 2032273d9f13SBarry Smith default: 203354138f6bSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2034273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 2035273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2036273d9f13SBarry Smith break; 2037273d9f13SBarry Smith } 2038273d9f13SBarry Smith } 2039273d9f13SBarry Smith b->bs = bs; 2040273d9f13SBarry Smith b->mbs = mbs; 2041273d9f13SBarry Smith b->nbs = nbs; 2042b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2043273d9f13SBarry Smith if (!nnz) { 2044435da068SBarry Smith if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2045273d9f13SBarry Smith else if (nz <= 0) nz = 1; 2046273d9f13SBarry Smith for (i=0; i<mbs; i++) b->imax[i] = nz; 2047273d9f13SBarry Smith nz = nz*mbs; 2048273d9f13SBarry Smith } else { 2049273d9f13SBarry Smith nz = 0; 2050273d9f13SBarry Smith for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2051273d9f13SBarry Smith } 2052273d9f13SBarry Smith 2053273d9f13SBarry Smith /* allocate the matrix space */ 2054273d9f13SBarry Smith len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2055b0a32e0cSBarry Smith ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2056273d9f13SBarry Smith ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2057273d9f13SBarry Smith b->j = (int*)(b->a + nz*bs2); 2058273d9f13SBarry Smith ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2059273d9f13SBarry Smith b->i = b->j + nz; 2060273d9f13SBarry Smith b->singlemalloc = PETSC_TRUE; 2061273d9f13SBarry Smith 2062273d9f13SBarry Smith b->i[0] = 0; 2063273d9f13SBarry Smith for (i=1; i<mbs+1; i++) { 2064273d9f13SBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 2065273d9f13SBarry Smith } 2066273d9f13SBarry Smith 2067273d9f13SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 206816d6aa21SSatish Balay ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2069b0a32e0cSBarry Smith PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2070273d9f13SBarry Smith for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2071273d9f13SBarry Smith 2072273d9f13SBarry Smith b->bs = bs; 2073273d9f13SBarry Smith b->bs2 = bs2; 2074273d9f13SBarry Smith b->mbs = mbs; 2075273d9f13SBarry Smith b->nz = 0; 2076273d9f13SBarry Smith b->maxnz = nz*bs2; 2077273d9f13SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 2078273d9f13SBarry Smith PetscFunctionReturn(0); 2079273d9f13SBarry Smith } 20802593348eSBarry Smith 2081