1bba1ac68SSatish Balay /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 22593348eSBarry Smith 32593348eSBarry Smith /* 4b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 52593348eSBarry Smith matrix storage format. 62593348eSBarry Smith */ 770f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 81a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 91a6a6d98SBarry Smith #include "src/inline/spops.h" 10325e03aeSBarry Smith #include "petscsys.h" /*I "petscmat.h" I*/ 113b547af2SSatish Balay 12af674e45SBarry Smith /* 13af674e45SBarry Smith Special version for Fun3d sequential benchmark 14af674e45SBarry Smith */ 15af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 16af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 19af674e45SBarry Smith #endif 20af674e45SBarry Smith 21af674e45SBarry Smith #undef __FUNCT__ 22af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_" 23af674e45SBarry Smith void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,MatScalar *v,InsertMode *is,int *err) 24af674e45SBarry Smith { 25af674e45SBarry Smith Mat A = *AA; 26af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27*a037b02bSBarry Smith int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 28af674e45SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 29*a037b02bSBarry Smith int *aj=a->j,stepval; 30af674e45SBarry Smith MatScalar *value = v,*ap,*aa = a->a,*bap; 31af674e45SBarry Smith 32af674e45SBarry Smith PetscFunctionBegin; 33af674e45SBarry Smith stepval = (n-1)*4; 34af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 35af674e45SBarry Smith row = im[k]; 36af674e45SBarry Smith rp = aj + ai[row]; 37af674e45SBarry Smith ap = aa + 16*ai[row]; 38af674e45SBarry Smith nrow = ailen[row]; 39af674e45SBarry Smith low = 0; 40af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 41af674e45SBarry Smith col = in[l]; 42af674e45SBarry Smith value = v + k*(stepval+4)*4 + l*4; 43af674e45SBarry Smith low = 0; high = nrow; 44af674e45SBarry Smith while (high-low > 7) { 45af674e45SBarry Smith t = (low+high)/2; 46af674e45SBarry Smith if (rp[t] > col) high = t; 47af674e45SBarry Smith else low = t; 48af674e45SBarry Smith } 49af674e45SBarry Smith for (i=low; i<high; i++) { 50af674e45SBarry Smith if (rp[i] > col) break; 51af674e45SBarry Smith if (rp[i] == col) { 52af674e45SBarry Smith bap = ap + 16*i; 53af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 54af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 55af674e45SBarry Smith bap[jj] += *value++; 56af674e45SBarry Smith } 57af674e45SBarry Smith } 58af674e45SBarry Smith goto noinsert2; 59af674e45SBarry Smith } 60af674e45SBarry Smith } 61af674e45SBarry Smith N = nrow++ - 1; 62af674e45SBarry Smith /* shift up all the later entries in this row */ 63af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 64af674e45SBarry Smith rp[ii+1] = rp[ii]; 65*a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 66af674e45SBarry Smith } 67af674e45SBarry Smith if (N >= i) { 68*a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 69af674e45SBarry Smith } 70af674e45SBarry Smith rp[i] = col; 71af674e45SBarry Smith bap = ap + 16*i; 72af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 73af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 74af674e45SBarry Smith bap[jj] = *value++; 75af674e45SBarry Smith } 76af674e45SBarry Smith } 77af674e45SBarry Smith noinsert2:; 78af674e45SBarry Smith low = i; 79af674e45SBarry Smith } 80af674e45SBarry Smith ailen[row] = nrow; 81af674e45SBarry Smith } 82af674e45SBarry Smith } 83af674e45SBarry Smith 84af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 85af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 86af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 87af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 88af674e45SBarry Smith #endif 89af674e45SBarry Smith 90af674e45SBarry Smith #undef __FUNCT__ 91af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_" 92*a037b02bSBarry Smith void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v,InsertMode *is,int *err) 93af674e45SBarry Smith { 94af674e45SBarry Smith Mat A = *AA; 95af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 96*a037b02bSBarry Smith int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 97af674e45SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 98af674e45SBarry Smith int *aj=a->j,brow,bcol; 99af674e45SBarry Smith int ridx,cidx,ierr; 100af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 101af674e45SBarry Smith 102af674e45SBarry Smith PetscFunctionBegin; 103af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 104af674e45SBarry Smith row = im[k]; brow = row/4; 105af674e45SBarry Smith rp = aj + ai[brow]; 106af674e45SBarry Smith ap = aa + 16*ai[brow]; 107af674e45SBarry Smith nrow = ailen[brow]; 108af674e45SBarry Smith low = 0; 109af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 110af674e45SBarry Smith col = in[l]; bcol = col/4; 111af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 112af674e45SBarry Smith value = v[l + k*n]; 113af674e45SBarry Smith low = 0; high = nrow; 114af674e45SBarry Smith while (high-low > 7) { 115af674e45SBarry Smith t = (low+high)/2; 116af674e45SBarry Smith if (rp[t] > bcol) high = t; 117af674e45SBarry Smith else low = t; 118af674e45SBarry Smith } 119af674e45SBarry Smith for (i=low; i<high; i++) { 120af674e45SBarry Smith if (rp[i] > bcol) break; 121af674e45SBarry Smith if (rp[i] == bcol) { 122af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 123af674e45SBarry Smith *bap += value; 124af674e45SBarry Smith goto noinsert1; 125af674e45SBarry Smith } 126af674e45SBarry Smith } 127af674e45SBarry Smith N = nrow++ - 1; 128af674e45SBarry Smith /* shift up all the later entries in this row */ 129af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 130af674e45SBarry Smith rp[ii+1] = rp[ii]; 131*a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 132af674e45SBarry Smith } 133af674e45SBarry Smith if (N>=i) { 134*a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 135af674e45SBarry Smith } 136af674e45SBarry Smith rp[i] = bcol; 137af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 138af674e45SBarry Smith noinsert1:; 139af674e45SBarry Smith low = i; 140af674e45SBarry Smith } 141af674e45SBarry Smith ailen[brow] = nrow; 142af674e45SBarry Smith } 143af674e45SBarry Smith } 144af674e45SBarry Smith 14595d5f7c2SBarry Smith /* UGLY, ugly, ugly 14687828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 1473477af42SKris Buschelman not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 14895d5f7c2SBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 14995d5f7c2SBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 15095d5f7c2SBarry Smith into the single precision data structures. 15195d5f7c2SBarry Smith */ 15295d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 153ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 15495d5f7c2SBarry Smith #else 15595d5f7c2SBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 15695d5f7c2SBarry Smith #endif 15704929863SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 15804929863SHong Zhang EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 15904929863SHong Zhang #endif 16095d5f7c2SBarry Smith 1612d61bbb3SSatish Balay #define CHUNKSIZE 10 162de6a44a3SBarry Smith 163be5855fcSBarry Smith /* 164be5855fcSBarry Smith Checks for missing diagonals 165be5855fcSBarry Smith */ 1664a2ae208SSatish Balay #undef __FUNCT__ 1674a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 168c4992f7dSBarry Smith int MatMissingDiagonal_SeqBAIJ(Mat A) 169be5855fcSBarry Smith { 170be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 171883fce79SBarry Smith int *diag,*jj = a->j,i,ierr; 172be5855fcSBarry Smith 173be5855fcSBarry Smith PetscFunctionBegin; 174c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 175883fce79SBarry Smith diag = a->diag; 1760e8e8aceSBarry Smith for (i=0; i<a->mbs; i++) { 177be5855fcSBarry Smith if (jj[diag[i]] != i) { 17829bbc08cSBarry Smith SETERRQ1(1,"Matrix is missing diagonal number %d",i); 179be5855fcSBarry Smith } 180be5855fcSBarry Smith } 181be5855fcSBarry Smith PetscFunctionReturn(0); 182be5855fcSBarry Smith } 183be5855fcSBarry Smith 1844a2ae208SSatish Balay #undef __FUNCT__ 1854a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 186c4992f7dSBarry Smith int MatMarkDiagonal_SeqBAIJ(Mat A) 187de6a44a3SBarry Smith { 188de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 18982502324SSatish Balay int i,j,*diag,m = a->mbs,ierr; 190de6a44a3SBarry Smith 1913a40ed3dSBarry Smith PetscFunctionBegin; 192883fce79SBarry Smith if (a->diag) PetscFunctionReturn(0); 193883fce79SBarry Smith 194b0a32e0cSBarry Smith ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 195b0a32e0cSBarry Smith PetscLogObjectMemory(A,(m+1)*sizeof(int)); 1967fc0212eSBarry Smith for (i=0; i<m; i++) { 197ecc615b2SBarry Smith diag[i] = a->i[i+1]; 198de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 199de6a44a3SBarry Smith if (a->j[j] == i) { 200de6a44a3SBarry Smith diag[i] = j; 201de6a44a3SBarry Smith break; 202de6a44a3SBarry Smith } 203de6a44a3SBarry Smith } 204de6a44a3SBarry Smith } 205de6a44a3SBarry Smith a->diag = diag; 2063a40ed3dSBarry Smith PetscFunctionReturn(0); 207de6a44a3SBarry Smith } 2082593348eSBarry Smith 2092593348eSBarry Smith 210ca44d042SBarry Smith EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 2113b2fbd54SBarry Smith 2124a2ae208SSatish Balay #undef __FUNCT__ 2134a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 2140e6d2581SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 2153b2fbd54SBarry Smith { 2163b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2173b2fbd54SBarry Smith int ierr,n = a->mbs,i; 2183b2fbd54SBarry Smith 2193a40ed3dSBarry Smith PetscFunctionBegin; 2203b2fbd54SBarry Smith *nn = n; 2213a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 2223b2fbd54SBarry Smith if (symmetric) { 2233b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 2243b2fbd54SBarry Smith } else if (oshift == 1) { 2253b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 226435da068SBarry Smith int nz = a->i[n]; 2273b2fbd54SBarry Smith for (i=0; i<nz; i++) a->j[i]++; 2283b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]++; 2293b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 2303b2fbd54SBarry Smith } else { 2313b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 2323b2fbd54SBarry Smith } 2333b2fbd54SBarry Smith 2343a40ed3dSBarry Smith PetscFunctionReturn(0); 2353b2fbd54SBarry Smith } 2363b2fbd54SBarry Smith 2374a2ae208SSatish Balay #undef __FUNCT__ 2384a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 239435da068SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 2403b2fbd54SBarry Smith { 2413b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 242606d414cSSatish Balay int i,n = a->mbs,ierr; 2433b2fbd54SBarry Smith 2443a40ed3dSBarry Smith PetscFunctionBegin; 2453a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 2463b2fbd54SBarry Smith if (symmetric) { 247606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 248606d414cSSatish Balay ierr = PetscFree(*ja);CHKERRQ(ierr); 249af5da2bfSBarry Smith } else if (oshift == 1) { 250435da068SBarry Smith int nz = a->i[n]-1; 2513b2fbd54SBarry Smith for (i=0; i<nz; i++) a->j[i]--; 2523b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]--; 2533b2fbd54SBarry Smith } 2543a40ed3dSBarry Smith PetscFunctionReturn(0); 2553b2fbd54SBarry Smith } 2563b2fbd54SBarry Smith 2574a2ae208SSatish Balay #undef __FUNCT__ 2584a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 2592d61bbb3SSatish Balay int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 2602d61bbb3SSatish Balay { 2612d61bbb3SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2622d61bbb3SSatish Balay 2632d61bbb3SSatish Balay PetscFunctionBegin; 2642d61bbb3SSatish Balay *bs = baij->bs; 2652d61bbb3SSatish Balay PetscFunctionReturn(0); 2662d61bbb3SSatish Balay } 2672d61bbb3SSatish Balay 2682d61bbb3SSatish Balay 2694a2ae208SSatish Balay #undef __FUNCT__ 2704a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ" 271e1311b90SBarry Smith int MatDestroy_SeqBAIJ(Mat A) 2722d61bbb3SSatish Balay { 2732d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 274e51c0b9cSSatish Balay int ierr; 2752d61bbb3SSatish Balay 276433994e6SBarry Smith PetscFunctionBegin; 277aa482453SBarry Smith #if defined(PETSC_USE_LOG) 278b0a32e0cSBarry Smith PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 2792d61bbb3SSatish Balay #endif 280606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 281606d414cSSatish Balay if (!a->singlemalloc) { 282606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 283606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 284606d414cSSatish Balay } 285c38d4ed2SBarry Smith if (a->row) { 286c38d4ed2SBarry Smith ierr = ISDestroy(a->row);CHKERRQ(ierr); 287c38d4ed2SBarry Smith } 288c38d4ed2SBarry Smith if (a->col) { 289c38d4ed2SBarry Smith ierr = ISDestroy(a->col);CHKERRQ(ierr); 290c38d4ed2SBarry Smith } 291606d414cSSatish Balay if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 292606d414cSSatish Balay if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 293606d414cSSatish Balay if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 294606d414cSSatish Balay if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 295606d414cSSatish Balay if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 296e51c0b9cSSatish Balay if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 297606d414cSSatish Balay if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 298563b5814SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 299563b5814SBarry Smith if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 300563b5814SBarry Smith #endif 301606d414cSSatish Balay ierr = PetscFree(a);CHKERRQ(ierr); 3022d61bbb3SSatish Balay PetscFunctionReturn(0); 3032d61bbb3SSatish Balay } 3042d61bbb3SSatish Balay 3054a2ae208SSatish Balay #undef __FUNCT__ 3064a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ" 3072d61bbb3SSatish Balay int MatSetOption_SeqBAIJ(Mat A,MatOption op) 3082d61bbb3SSatish Balay { 3092d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 3102d61bbb3SSatish Balay 3112d61bbb3SSatish Balay PetscFunctionBegin; 312aa275fccSKris Buschelman switch (op) { 313aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 314aa275fccSKris Buschelman a->roworiented = PETSC_TRUE; 315aa275fccSKris Buschelman break; 316aa275fccSKris Buschelman case MAT_COLUMN_ORIENTED: 317aa275fccSKris Buschelman a->roworiented = PETSC_FALSE; 318aa275fccSKris Buschelman break; 319aa275fccSKris Buschelman case MAT_COLUMNS_SORTED: 320aa275fccSKris Buschelman a->sorted = PETSC_TRUE; 321aa275fccSKris Buschelman break; 322aa275fccSKris Buschelman case MAT_COLUMNS_UNSORTED: 323aa275fccSKris Buschelman a->sorted = PETSC_FALSE; 324aa275fccSKris Buschelman break; 325aa275fccSKris Buschelman case MAT_KEEP_ZEROED_ROWS: 326aa275fccSKris Buschelman a->keepzeroedrows = PETSC_TRUE; 327aa275fccSKris Buschelman break; 328aa275fccSKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 329aa275fccSKris Buschelman a->nonew = 1; 330aa275fccSKris Buschelman break; 331aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 332aa275fccSKris Buschelman a->nonew = -1; 333aa275fccSKris Buschelman break; 334aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 335aa275fccSKris Buschelman a->nonew = -2; 336aa275fccSKris Buschelman break; 337aa275fccSKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 338aa275fccSKris Buschelman a->nonew = 0; 339aa275fccSKris Buschelman break; 340aa275fccSKris Buschelman case MAT_ROWS_SORTED: 341aa275fccSKris Buschelman case MAT_ROWS_UNSORTED: 342aa275fccSKris Buschelman case MAT_YES_NEW_DIAGONALS: 343aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 344aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 345b0a32e0cSBarry Smith PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 346aa275fccSKris Buschelman break; 347aa275fccSKris Buschelman case MAT_NO_NEW_DIAGONALS: 34829bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 349bd648c37SKris Buschelman case MAT_USE_SINGLE_PRECISION_SOLVES: 350bd648c37SKris Buschelman if (a->bs==4) { 35194ee7fc8SKris Buschelman PetscTruth sse_enabled_local,sse_enabled_global; 352bd648c37SKris Buschelman int ierr; 35394ee7fc8SKris Buschelman 35494ee7fc8SKris Buschelman sse_enabled_local = PETSC_FALSE; 35594ee7fc8SKris Buschelman sse_enabled_global = PETSC_FALSE; 35694ee7fc8SKris Buschelman 35794ee7fc8SKris Buschelman ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 358e64df4b9SKris Buschelman #if defined(PETSC_HAVE_SSE) 35994ee7fc8SKris Buschelman if (sse_enabled_local) { 36054138f6bSKris Buschelman a->single_precision_solves = PETSC_TRUE; 36154138f6bSKris Buschelman A->ops->solve = MatSolve_SeqBAIJ_Update; 36254138f6bSKris Buschelman A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 363cf242676SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 36454138f6bSKris Buschelman break; 36594ee7fc8SKris Buschelman } else { 36694ee7fc8SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 3678661488fSKris Buschelman } 368e64df4b9SKris Buschelman #else 369bd648c37SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 370e64df4b9SKris Buschelman #endif 371bd648c37SKris Buschelman } 372bd648c37SKris Buschelman break; 373aa275fccSKris Buschelman default: 37429bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 3752d61bbb3SSatish Balay } 3762d61bbb3SSatish Balay PetscFunctionReturn(0); 3772d61bbb3SSatish Balay } 3782d61bbb3SSatish Balay 3794a2ae208SSatish Balay #undef __FUNCT__ 3804a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ" 38187828ca2SBarry Smith int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 3822d61bbb3SSatish Balay { 3832d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 38482502324SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 3853f1db9ecSBarry Smith MatScalar *aa,*aa_i; 38687828ca2SBarry Smith PetscScalar *v_i; 3872d61bbb3SSatish Balay 3882d61bbb3SSatish Balay PetscFunctionBegin; 3892d61bbb3SSatish Balay bs = a->bs; 3902d61bbb3SSatish Balay ai = a->i; 3912d61bbb3SSatish Balay aj = a->j; 3922d61bbb3SSatish Balay aa = a->a; 3932d61bbb3SSatish Balay bs2 = a->bs2; 3942d61bbb3SSatish Balay 395273d9f13SBarry Smith if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 3962d61bbb3SSatish Balay 3972d61bbb3SSatish Balay bn = row/bs; /* Block number */ 3982d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 3992d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 4002d61bbb3SSatish Balay *nz = bs*M; 4012d61bbb3SSatish Balay 4022d61bbb3SSatish Balay if (v) { 4032d61bbb3SSatish Balay *v = 0; 4042d61bbb3SSatish Balay if (*nz) { 40587828ca2SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 4062d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 4072d61bbb3SSatish Balay v_i = *v + i*bs; 4082d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 4092d61bbb3SSatish Balay for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 4102d61bbb3SSatish Balay } 4112d61bbb3SSatish Balay } 4122d61bbb3SSatish Balay } 4132d61bbb3SSatish Balay 4142d61bbb3SSatish Balay if (idx) { 4152d61bbb3SSatish Balay *idx = 0; 4162d61bbb3SSatish Balay if (*nz) { 417b0a32e0cSBarry Smith ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 4182d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 4192d61bbb3SSatish Balay idx_i = *idx + i*bs; 4202d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 4212d61bbb3SSatish Balay for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 4222d61bbb3SSatish Balay } 4232d61bbb3SSatish Balay } 4242d61bbb3SSatish Balay } 4252d61bbb3SSatish Balay PetscFunctionReturn(0); 4262d61bbb3SSatish Balay } 4272d61bbb3SSatish Balay 4284a2ae208SSatish Balay #undef __FUNCT__ 4294a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 43087828ca2SBarry Smith int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 4312d61bbb3SSatish Balay { 432606d414cSSatish Balay int ierr; 433606d414cSSatish Balay 4342d61bbb3SSatish Balay PetscFunctionBegin; 435606d414cSSatish Balay if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 436606d414cSSatish Balay if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 4372d61bbb3SSatish Balay PetscFunctionReturn(0); 4382d61bbb3SSatish Balay } 4392d61bbb3SSatish Balay 4404a2ae208SSatish Balay #undef __FUNCT__ 4414a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ" 4422d61bbb3SSatish Balay int MatTranspose_SeqBAIJ(Mat A,Mat *B) 4432d61bbb3SSatish Balay { 4442d61bbb3SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 4452d61bbb3SSatish Balay Mat C; 4462d61bbb3SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 447273d9f13SBarry Smith int *rows,*cols,bs2=a->bs2; 44887828ca2SBarry Smith PetscScalar *array; 4492d61bbb3SSatish Balay 4502d61bbb3SSatish Balay PetscFunctionBegin; 45129bbc08cSBarry Smith if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 452b0a32e0cSBarry Smith ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 453549d3d68SSatish Balay ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 4542d61bbb3SSatish Balay 455375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 45687828ca2SBarry Smith ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 45787828ca2SBarry Smith for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 458375fe846SBarry Smith #else 4593eda8832SBarry Smith array = a->a; 460375fe846SBarry Smith #endif 461375fe846SBarry Smith 4622d61bbb3SSatish Balay for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 463273d9f13SBarry Smith ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 464606d414cSSatish Balay ierr = PetscFree(col);CHKERRQ(ierr); 465b0a32e0cSBarry Smith ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 4662d61bbb3SSatish Balay cols = rows + bs; 4672d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 4682d61bbb3SSatish Balay cols[0] = i*bs; 4692d61bbb3SSatish Balay for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 4702d61bbb3SSatish Balay len = ai[i+1] - ai[i]; 4712d61bbb3SSatish Balay for (j=0; j<len; j++) { 4722d61bbb3SSatish Balay rows[0] = (*aj++)*bs; 4732d61bbb3SSatish Balay for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 4742d61bbb3SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 4752d61bbb3SSatish Balay array += bs2; 4762d61bbb3SSatish Balay } 4772d61bbb3SSatish Balay } 478606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 479375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 480375fe846SBarry Smith ierr = PetscFree(array); 481375fe846SBarry Smith #endif 4822d61bbb3SSatish Balay 4832d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4842d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4852d61bbb3SSatish Balay 486c4992f7dSBarry Smith if (B) { 4872d61bbb3SSatish Balay *B = C; 4882d61bbb3SSatish Balay } else { 489273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 4902d61bbb3SSatish Balay } 4912d61bbb3SSatish Balay PetscFunctionReturn(0); 4922d61bbb3SSatish Balay } 4932d61bbb3SSatish Balay 4944a2ae208SSatish Balay #undef __FUNCT__ 4954a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary" 496b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 4972593348eSBarry Smith { 498b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 4999df24120SSatish Balay int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 50087828ca2SBarry Smith PetscScalar *aa; 501ce6f0cecSBarry Smith FILE *file; 5022593348eSBarry Smith 5033a40ed3dSBarry Smith PetscFunctionBegin; 504b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 505b0a32e0cSBarry Smith ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 506552e946dSBarry Smith col_lens[0] = MAT_FILE_COOKIE; 5073b2fbd54SBarry Smith 508273d9f13SBarry Smith col_lens[1] = A->m; 509273d9f13SBarry Smith col_lens[2] = A->n; 5107e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 5112593348eSBarry Smith 5122593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 513b6490206SBarry Smith count = 0; 514b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 515b6490206SBarry Smith for (j=0; j<bs; j++) { 516b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 517b6490206SBarry Smith } 5182593348eSBarry Smith } 519273d9f13SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 520606d414cSSatish Balay ierr = PetscFree(col_lens);CHKERRQ(ierr); 5212593348eSBarry Smith 5222593348eSBarry Smith /* store column indices (zero start index) */ 523b0a32e0cSBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 524b6490206SBarry Smith count = 0; 525b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 526b6490206SBarry Smith for (j=0; j<bs; j++) { 527b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 528b6490206SBarry Smith for (l=0; l<bs; l++) { 529b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 5302593348eSBarry Smith } 5312593348eSBarry Smith } 532b6490206SBarry Smith } 533b6490206SBarry Smith } 5340752156aSBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 535606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 5362593348eSBarry Smith 5372593348eSBarry Smith /* store nonzero values */ 53887828ca2SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 539b6490206SBarry Smith count = 0; 540b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 541b6490206SBarry Smith for (j=0; j<bs; j++) { 542b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 543b6490206SBarry Smith for (l=0; l<bs; l++) { 5447e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 545b6490206SBarry Smith } 546b6490206SBarry Smith } 547b6490206SBarry Smith } 548b6490206SBarry Smith } 5490752156aSBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 550606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 551ce6f0cecSBarry Smith 552b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 553ce6f0cecSBarry Smith if (file) { 554ce6f0cecSBarry Smith fprintf(file,"-matload_block_size %d\n",a->bs); 555ce6f0cecSBarry Smith } 5563a40ed3dSBarry Smith PetscFunctionReturn(0); 5572593348eSBarry Smith } 5582593348eSBarry Smith 55904929863SHong Zhang extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 56004929863SHong Zhang 5614a2ae208SSatish Balay #undef __FUNCT__ 5624a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 563b0a32e0cSBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 5642593348eSBarry Smith { 565b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 566fb9695e5SSatish Balay int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 567f3ef73ceSBarry Smith PetscViewerFormat format; 5682593348eSBarry Smith 5693a40ed3dSBarry Smith PetscFunctionBegin; 570b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 571fb9695e5SSatish Balay if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 572b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 573fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 574bcd9e38bSBarry Smith Mat aij; 575bcd9e38bSBarry Smith ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 576bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 577bcd9e38bSBarry Smith ierr = MatDestroy(aij);CHKERRQ(ierr); 57804929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 57904929863SHong Zhang #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 58004929863SHong Zhang ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 58104929863SHong Zhang #endif 58204929863SHong Zhang PetscFunctionReturn(0); 583fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 584b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 58544cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 58644cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 587b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 58844cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 58944cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 590aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 5910e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 59262b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 5930e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 5940e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 59562b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 5960e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 5970e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 59862b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 5990ef38995SBarry Smith } 60044cd7ae7SLois Curfman McInnes #else 6010ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 60262b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 6030ef38995SBarry Smith } 60444cd7ae7SLois Curfman McInnes #endif 60544cd7ae7SLois Curfman McInnes } 60644cd7ae7SLois Curfman McInnes } 607b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 60844cd7ae7SLois Curfman McInnes } 60944cd7ae7SLois Curfman McInnes } 610b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 6110ef38995SBarry Smith } else { 612b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 613b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 614b6490206SBarry Smith for (j=0; j<bs; j++) { 615b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 616b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 617b6490206SBarry Smith for (l=0; l<bs; l++) { 618aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 6190e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 62062b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 6210e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6220e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 62362b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 6240e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6250ef38995SBarry Smith } else { 62662b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 62788685aaeSLois Curfman McInnes } 62888685aaeSLois Curfman McInnes #else 62962b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 63088685aaeSLois Curfman McInnes #endif 6312593348eSBarry Smith } 6322593348eSBarry Smith } 633b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 6342593348eSBarry Smith } 6352593348eSBarry Smith } 636b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 637b6490206SBarry Smith } 638b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6393a40ed3dSBarry Smith PetscFunctionReturn(0); 6402593348eSBarry Smith } 6412593348eSBarry Smith 6424a2ae208SSatish Balay #undef __FUNCT__ 6434a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 644b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 6453270192aSSatish Balay { 64677ed5343SBarry Smith Mat A = (Mat) Aa; 6473270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 648b65db4caSBarry Smith int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 6490e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 6503f1db9ecSBarry Smith MatScalar *aa; 651b0a32e0cSBarry Smith PetscViewer viewer; 6523270192aSSatish Balay 6533a40ed3dSBarry Smith PetscFunctionBegin; 6543270192aSSatish Balay 655b65db4caSBarry Smith /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 65677ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 65777ed5343SBarry Smith 658b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 65977ed5343SBarry Smith 6603270192aSSatish Balay /* loop over matrix elements drawing boxes */ 661b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 6623270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6633270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 664273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6653270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6663270192aSSatish Balay aa = a->a + j*bs2; 6673270192aSSatish Balay for (k=0; k<bs; k++) { 6683270192aSSatish Balay for (l=0; l<bs; l++) { 6690e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 670b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 6713270192aSSatish Balay } 6723270192aSSatish Balay } 6733270192aSSatish Balay } 6743270192aSSatish Balay } 675b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 6763270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6773270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 678273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6793270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6803270192aSSatish Balay aa = a->a + j*bs2; 6813270192aSSatish Balay for (k=0; k<bs; k++) { 6823270192aSSatish Balay for (l=0; l<bs; l++) { 6830e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 684b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 6853270192aSSatish Balay } 6863270192aSSatish Balay } 6873270192aSSatish Balay } 6883270192aSSatish Balay } 6893270192aSSatish Balay 690b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 6913270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6923270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 693273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6943270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6953270192aSSatish Balay aa = a->a + j*bs2; 6963270192aSSatish Balay for (k=0; k<bs; k++) { 6973270192aSSatish Balay for (l=0; l<bs; l++) { 6980e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 699b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 7003270192aSSatish Balay } 7013270192aSSatish Balay } 7023270192aSSatish Balay } 7033270192aSSatish Balay } 70477ed5343SBarry Smith PetscFunctionReturn(0); 70577ed5343SBarry Smith } 7063270192aSSatish Balay 7074a2ae208SSatish Balay #undef __FUNCT__ 7084a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw" 709b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 71077ed5343SBarry Smith { 7117dce120fSSatish Balay int ierr; 7120e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 713b0a32e0cSBarry Smith PetscDraw draw; 71477ed5343SBarry Smith PetscTruth isnull; 7153270192aSSatish Balay 71677ed5343SBarry Smith PetscFunctionBegin; 71777ed5343SBarry Smith 718b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 719b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 72077ed5343SBarry Smith 72177ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 722273d9f13SBarry Smith xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 72377ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 724b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 725b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 72677ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 7273a40ed3dSBarry Smith PetscFunctionReturn(0); 7283270192aSSatish Balay } 7293270192aSSatish Balay 7304a2ae208SSatish Balay #undef __FUNCT__ 7314a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ" 732b0a32e0cSBarry Smith int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 7332593348eSBarry Smith { 73419bcc07fSBarry Smith int ierr; 7356831982aSBarry Smith PetscTruth issocket,isascii,isbinary,isdraw; 7362593348eSBarry Smith 7373a40ed3dSBarry Smith PetscFunctionBegin; 738b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 739b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 740fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 741fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 7420f5bd95cSBarry Smith if (issocket) { 74329bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 7440f5bd95cSBarry Smith } else if (isascii){ 7453a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 7460f5bd95cSBarry Smith } else if (isbinary) { 7473a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 7480f5bd95cSBarry Smith } else if (isdraw) { 7493a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 7505cd90555SBarry Smith } else { 75129bbc08cSBarry Smith SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 7522593348eSBarry Smith } 7533a40ed3dSBarry Smith PetscFunctionReturn(0); 7542593348eSBarry Smith } 755b6490206SBarry Smith 756cd0e1443SSatish Balay 7574a2ae208SSatish Balay #undef __FUNCT__ 7584a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ" 75987828ca2SBarry Smith int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 760cd0e1443SSatish Balay { 761cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 7622d61bbb3SSatish Balay int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 7632d61bbb3SSatish Balay int *ai = a->i,*ailen = a->ilen; 7642d61bbb3SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 7653f1db9ecSBarry Smith MatScalar *ap,*aa = a->a,zero = 0.0; 766cd0e1443SSatish Balay 7673a40ed3dSBarry Smith PetscFunctionBegin; 7682d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 769cd0e1443SSatish Balay row = im[k]; brow = row/bs; 77029bbc08cSBarry Smith if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 771273d9f13SBarry Smith if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 7722d61bbb3SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 7732c3acbe9SBarry Smith nrow = ailen[brow]; 7742d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 77529bbc08cSBarry Smith if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 776273d9f13SBarry Smith if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 7772d61bbb3SSatish Balay col = in[l] ; 7782d61bbb3SSatish Balay bcol = col/bs; 7792d61bbb3SSatish Balay cidx = col%bs; 7802d61bbb3SSatish Balay ridx = row%bs; 7812d61bbb3SSatish Balay high = nrow; 7822d61bbb3SSatish Balay low = 0; /* assume unsorted */ 7832d61bbb3SSatish Balay while (high-low > 5) { 784cd0e1443SSatish Balay t = (low+high)/2; 785cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 786cd0e1443SSatish Balay else low = t; 787cd0e1443SSatish Balay } 788cd0e1443SSatish Balay for (i=low; i<high; i++) { 789cd0e1443SSatish Balay if (rp[i] > bcol) break; 790cd0e1443SSatish Balay if (rp[i] == bcol) { 7912d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 7922d61bbb3SSatish Balay goto finished; 793cd0e1443SSatish Balay } 794cd0e1443SSatish Balay } 7952d61bbb3SSatish Balay *v++ = zero; 7962d61bbb3SSatish Balay finished:; 797cd0e1443SSatish Balay } 798cd0e1443SSatish Balay } 7993a40ed3dSBarry Smith PetscFunctionReturn(0); 800cd0e1443SSatish Balay } 801cd0e1443SSatish Balay 80295d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 8034a2ae208SSatish Balay #undef __FUNCT__ 8044a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 80587828ca2SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 80695d5f7c2SBarry Smith { 807563b5814SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 808563b5814SBarry Smith int ierr,i,N = m*n*b->bs2; 809563b5814SBarry Smith MatScalar *vsingle; 81095d5f7c2SBarry Smith 81195d5f7c2SBarry Smith PetscFunctionBegin; 812563b5814SBarry Smith if (N > b->setvalueslen) { 813563b5814SBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 814b0a32e0cSBarry Smith ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 815563b5814SBarry Smith b->setvalueslen = N; 816563b5814SBarry Smith } 817563b5814SBarry Smith vsingle = b->setvaluescopy; 81895d5f7c2SBarry Smith for (i=0; i<N; i++) { 81995d5f7c2SBarry Smith vsingle[i] = v[i]; 82095d5f7c2SBarry Smith } 82195d5f7c2SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 82295d5f7c2SBarry Smith PetscFunctionReturn(0); 82395d5f7c2SBarry Smith } 82495d5f7c2SBarry Smith #endif 82595d5f7c2SBarry Smith 8262d61bbb3SSatish Balay 8274a2ae208SSatish Balay #undef __FUNCT__ 8284a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 82995d5f7c2SBarry Smith int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 83092c4ed94SBarry Smith { 83192c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 8328a84c255SSatish Balay int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 833273d9f13SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 834549d3d68SSatish Balay int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 835273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 83695d5f7c2SBarry Smith MatScalar *value = v,*ap,*aa = a->a,*bap; 83792c4ed94SBarry Smith 8383a40ed3dSBarry Smith PetscFunctionBegin; 8390e324ae4SSatish Balay if (roworiented) { 8400e324ae4SSatish Balay stepval = (n-1)*bs; 8410e324ae4SSatish Balay } else { 8420e324ae4SSatish Balay stepval = (m-1)*bs; 8430e324ae4SSatish Balay } 84492c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 84592c4ed94SBarry Smith row = im[k]; 8465ef9f2a5SBarry Smith if (row < 0) continue; 847aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 84829bbc08cSBarry Smith if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 84992c4ed94SBarry Smith #endif 85092c4ed94SBarry Smith rp = aj + ai[row]; 85192c4ed94SBarry Smith ap = aa + bs2*ai[row]; 85292c4ed94SBarry Smith rmax = imax[row]; 85392c4ed94SBarry Smith nrow = ailen[row]; 85492c4ed94SBarry Smith low = 0; 85592c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 8565ef9f2a5SBarry Smith if (in[l] < 0) continue; 857aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 85829bbc08cSBarry Smith if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 85992c4ed94SBarry Smith #endif 86092c4ed94SBarry Smith col = in[l]; 86192c4ed94SBarry Smith if (roworiented) { 8620e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 8630e324ae4SSatish Balay } else { 8640e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 86592c4ed94SBarry Smith } 86692c4ed94SBarry Smith if (!sorted) low = 0; high = nrow; 86792c4ed94SBarry Smith while (high-low > 7) { 86892c4ed94SBarry Smith t = (low+high)/2; 86992c4ed94SBarry Smith if (rp[t] > col) high = t; 87092c4ed94SBarry Smith else low = t; 87192c4ed94SBarry Smith } 87292c4ed94SBarry Smith for (i=low; i<high; i++) { 87392c4ed94SBarry Smith if (rp[i] > col) break; 87492c4ed94SBarry Smith if (rp[i] == col) { 8758a84c255SSatish Balay bap = ap + bs2*i; 8760e324ae4SSatish Balay if (roworiented) { 8778a84c255SSatish Balay if (is == ADD_VALUES) { 878dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 879dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 8808a84c255SSatish Balay bap[jj] += *value++; 881dd9472c6SBarry Smith } 882dd9472c6SBarry Smith } 8830e324ae4SSatish Balay } else { 884dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 885dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 8860e324ae4SSatish Balay bap[jj] = *value++; 8878a84c255SSatish Balay } 888dd9472c6SBarry Smith } 889dd9472c6SBarry Smith } 8900e324ae4SSatish Balay } else { 8910e324ae4SSatish Balay if (is == ADD_VALUES) { 892dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 893dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 8940e324ae4SSatish Balay *bap++ += *value++; 895dd9472c6SBarry Smith } 896dd9472c6SBarry Smith } 8970e324ae4SSatish Balay } else { 898dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 899dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 9000e324ae4SSatish Balay *bap++ = *value++; 9010e324ae4SSatish Balay } 9028a84c255SSatish Balay } 903dd9472c6SBarry Smith } 904dd9472c6SBarry Smith } 905f1241b54SBarry Smith goto noinsert2; 90692c4ed94SBarry Smith } 90792c4ed94SBarry Smith } 90889280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 90929bbc08cSBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 91092c4ed94SBarry Smith if (nrow >= rmax) { 91192c4ed94SBarry Smith /* there is no extra room in row, therefore enlarge */ 91292c4ed94SBarry Smith int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 9133f1db9ecSBarry Smith MatScalar *new_a; 91492c4ed94SBarry Smith 91529bbc08cSBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 91689280ab3SLois Curfman McInnes 91792c4ed94SBarry Smith /* malloc new storage space */ 9183f1db9ecSBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 919b0a32e0cSBarry Smith ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 92092c4ed94SBarry Smith new_j = (int*)(new_a + bs2*new_nz); 92192c4ed94SBarry Smith new_i = new_j + new_nz; 92292c4ed94SBarry Smith 92392c4ed94SBarry Smith /* copy over old data into new slots */ 92492c4ed94SBarry Smith for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 92592c4ed94SBarry Smith for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 926549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 92792c4ed94SBarry Smith len = (new_nz - CHUNKSIZE - ai[row] - nrow); 928549d3d68SSatish Balay ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 929549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 930549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 931549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 93292c4ed94SBarry Smith /* free up old matrix storage */ 933606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 934606d414cSSatish Balay if (!a->singlemalloc) { 935606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 936606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 937606d414cSSatish Balay } 93892c4ed94SBarry Smith aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 939c4992f7dSBarry Smith a->singlemalloc = PETSC_TRUE; 94092c4ed94SBarry Smith 94192c4ed94SBarry Smith rp = aj + ai[row]; ap = aa + bs2*ai[row]; 94292c4ed94SBarry Smith rmax = imax[row] = imax[row] + CHUNKSIZE; 943b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 94492c4ed94SBarry Smith a->maxnz += bs2*CHUNKSIZE; 94592c4ed94SBarry Smith a->reallocs++; 94692c4ed94SBarry Smith a->nz++; 94792c4ed94SBarry Smith } 94892c4ed94SBarry Smith N = nrow++ - 1; 94992c4ed94SBarry Smith /* shift up all the later entries in this row */ 95092c4ed94SBarry Smith for (ii=N; ii>=i; ii--) { 95192c4ed94SBarry Smith rp[ii+1] = rp[ii]; 952549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 95392c4ed94SBarry Smith } 954549d3d68SSatish Balay if (N >= i) { 955549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 956549d3d68SSatish Balay } 95792c4ed94SBarry Smith rp[i] = col; 9588a84c255SSatish Balay bap = ap + bs2*i; 9590e324ae4SSatish Balay if (roworiented) { 960dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 961dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 9620e324ae4SSatish Balay bap[jj] = *value++; 963dd9472c6SBarry Smith } 964dd9472c6SBarry Smith } 9650e324ae4SSatish Balay } else { 966dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 967dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 9680e324ae4SSatish Balay *bap++ = *value++; 9690e324ae4SSatish Balay } 970dd9472c6SBarry Smith } 971dd9472c6SBarry Smith } 972f1241b54SBarry Smith noinsert2:; 97392c4ed94SBarry Smith low = i; 97492c4ed94SBarry Smith } 97592c4ed94SBarry Smith ailen[row] = nrow; 97692c4ed94SBarry Smith } 9773a40ed3dSBarry Smith PetscFunctionReturn(0); 97892c4ed94SBarry Smith } 97992c4ed94SBarry Smith 9804a2ae208SSatish Balay #undef __FUNCT__ 9814a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 9828f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 983584200bdSSatish Balay { 984584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 985584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 986273d9f13SBarry Smith int m = A->m,*ip,N,*ailen = a->ilen; 987549d3d68SSatish Balay int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 9883f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 989a56a16c8SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 990a56a16c8SHong Zhang PetscTruth flag; 991a56a16c8SHong Zhang #endif 992584200bdSSatish Balay 9933a40ed3dSBarry Smith PetscFunctionBegin; 9943a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 995584200bdSSatish Balay 99643ee02c3SBarry Smith if (m) rmax = ailen[0]; 997584200bdSSatish Balay for (i=1; i<mbs; i++) { 998584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 999584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 1000d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 1001584200bdSSatish Balay if (fshift) { 1002a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1003584200bdSSatish Balay N = ailen[i]; 1004584200bdSSatish Balay for (j=0; j<N; j++) { 1005584200bdSSatish Balay ip[j-fshift] = ip[j]; 1006549d3d68SSatish Balay ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1007584200bdSSatish Balay } 1008584200bdSSatish Balay } 1009584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 1010584200bdSSatish Balay } 1011584200bdSSatish Balay if (mbs) { 1012584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1013584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1014584200bdSSatish Balay } 1015584200bdSSatish Balay /* reset ilen and imax for each row */ 1016584200bdSSatish Balay for (i=0; i<mbs; i++) { 1017584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 1018584200bdSSatish Balay } 1019a7c10996SSatish Balay a->nz = ai[mbs]; 1020584200bdSSatish Balay 1021584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 1022584200bdSSatish Balay if (fshift && a->diag) { 1023606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 1024b424e231SHong Zhang PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1025584200bdSSatish Balay a->diag = 0; 1026584200bdSSatish Balay } 1027bba1ac68SSatish Balay PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1028bba1ac68SSatish Balay PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1029b0a32e0cSBarry Smith PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1030e2f3b5e9SSatish Balay a->reallocs = 0; 10310e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 1032cf4441caSHong Zhang 1033a56a16c8SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 1034a56a16c8SHong Zhang ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1035a56a16c8SHong Zhang if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1036a56a16c8SHong Zhang #endif 1037a56a16c8SHong Zhang 10383a40ed3dSBarry Smith PetscFunctionReturn(0); 1039584200bdSSatish Balay } 1040584200bdSSatish Balay 10415a838052SSatish Balay 1042bea157c4SSatish Balay 1043bea157c4SSatish Balay /* 1044bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 1045bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1046bea157c4SSatish Balay then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1047bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 1048bea157c4SSatish Balay */ 10494a2ae208SSatish Balay #undef __FUNCT__ 10504a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1051bea157c4SSatish Balay static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1052d9b7c43dSSatish Balay { 1053bea157c4SSatish Balay int i,j,k,row; 1054bea157c4SSatish Balay PetscTruth flg; 10553a40ed3dSBarry Smith 1056433994e6SBarry Smith PetscFunctionBegin; 1057bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 1058bea157c4SSatish Balay row = idx[i]; 1059bea157c4SSatish Balay if (row%bs!=0) { /* Not the begining of a block */ 1060bea157c4SSatish Balay sizes[j] = 1; 1061bea157c4SSatish Balay i++; 1062e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1063bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1064bea157c4SSatish Balay i++; 1065bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 1066bea157c4SSatish Balay flg = PETSC_TRUE; 1067bea157c4SSatish Balay for (k=1; k<bs; k++) { 1068bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 1069bea157c4SSatish Balay flg = PETSC_FALSE; 1070bea157c4SSatish Balay break; 1071d9b7c43dSSatish Balay } 1072bea157c4SSatish Balay } 1073bea157c4SSatish Balay if (flg == PETSC_TRUE) { /* No break in the bs */ 1074bea157c4SSatish Balay sizes[j] = bs; 1075bea157c4SSatish Balay i+= bs; 1076bea157c4SSatish Balay } else { 1077bea157c4SSatish Balay sizes[j] = 1; 1078bea157c4SSatish Balay i++; 1079bea157c4SSatish Balay } 1080bea157c4SSatish Balay } 1081bea157c4SSatish Balay } 1082bea157c4SSatish Balay *bs_max = j; 10833a40ed3dSBarry Smith PetscFunctionReturn(0); 1084d9b7c43dSSatish Balay } 1085d9b7c43dSSatish Balay 10864a2ae208SSatish Balay #undef __FUNCT__ 10874a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ" 108887828ca2SBarry Smith int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1089d9b7c43dSSatish Balay { 1090d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1091b6815fffSBarry Smith int ierr,i,j,k,count,is_n,*is_idx,*rows; 1092bea157c4SSatish Balay int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 109387828ca2SBarry Smith PetscScalar zero = 0.0; 10943f1db9ecSBarry Smith MatScalar *aa; 1095d9b7c43dSSatish Balay 10963a40ed3dSBarry Smith PetscFunctionBegin; 1097d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1098b9b97703SBarry Smith ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1099d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1100d9b7c43dSSatish Balay 1101bea157c4SSatish Balay /* allocate memory for rows,sizes */ 1102b0a32e0cSBarry Smith ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1103bea157c4SSatish Balay sizes = rows + is_n; 1104bea157c4SSatish Balay 1105563b5814SBarry Smith /* copy IS values to rows, and sort them */ 1106bea157c4SSatish Balay for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1107bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1108dffd3267SBarry Smith if (baij->keepzeroedrows) { 1109dffd3267SBarry Smith for (i=0; i<is_n; i++) { sizes[i] = 1; } 1110dffd3267SBarry Smith bs_max = is_n; 1111dffd3267SBarry Smith } else { 1112bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1113dffd3267SBarry Smith } 1114b97a56abSSatish Balay ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1115bea157c4SSatish Balay 1116bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1117bea157c4SSatish Balay row = rows[j]; 1118273d9f13SBarry Smith if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1119bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1120bea157c4SSatish Balay aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1121dffd3267SBarry Smith if (sizes[i] == bs && !baij->keepzeroedrows) { 1122bea157c4SSatish Balay if (diag) { 1123bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 1124bea157c4SSatish Balay baij->ilen[row/bs] = 1; 1125bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 1126bea157c4SSatish Balay ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1127a07cd24cSSatish Balay } 1128563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 1129bea157c4SSatish Balay for (k=0; k<bs; k++) { 1130bea157c4SSatish Balay ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1131bea157c4SSatish Balay } 1132bea157c4SSatish Balay } else { /* (!diag) */ 1133bea157c4SSatish Balay baij->ilen[row/bs] = 0; 1134bea157c4SSatish Balay } /* end (!diag) */ 1135bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 1136aa482453SBarry Smith #if defined (PETSC_USE_DEBUG) 113729bbc08cSBarry Smith if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1138bea157c4SSatish Balay #endif 1139bea157c4SSatish Balay for (k=0; k<count; k++) { 1140d9b7c43dSSatish Balay aa[0] = zero; 1141d9b7c43dSSatish Balay aa += bs; 1142d9b7c43dSSatish Balay } 1143d9b7c43dSSatish Balay if (diag) { 1144f830108cSBarry Smith ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1145d9b7c43dSSatish Balay } 1146d9b7c43dSSatish Balay } 1147bea157c4SSatish Balay } 1148bea157c4SSatish Balay 1149606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 11509a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11513a40ed3dSBarry Smith PetscFunctionReturn(0); 1152d9b7c43dSSatish Balay } 11531c351548SSatish Balay 11544a2ae208SSatish Balay #undef __FUNCT__ 11554a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ" 115687828ca2SBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 11572d61bbb3SSatish Balay { 11582d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11592d61bbb3SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1160273d9f13SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 11612d61bbb3SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1162549d3d68SSatish Balay int ridx,cidx,bs2=a->bs2,ierr; 1163273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 11643f1db9ecSBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 11652d61bbb3SSatish Balay 11662d61bbb3SSatish Balay PetscFunctionBegin; 11672d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 11682d61bbb3SSatish Balay row = im[k]; brow = row/bs; 11695ef9f2a5SBarry Smith if (row < 0) continue; 1170aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1171273d9f13SBarry Smith if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 11722d61bbb3SSatish Balay #endif 11732d61bbb3SSatish Balay rp = aj + ai[brow]; 11742d61bbb3SSatish Balay ap = aa + bs2*ai[brow]; 11752d61bbb3SSatish Balay rmax = imax[brow]; 11762d61bbb3SSatish Balay nrow = ailen[brow]; 11772d61bbb3SSatish Balay low = 0; 11782d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 11795ef9f2a5SBarry Smith if (in[l] < 0) continue; 1180aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1181273d9f13SBarry Smith if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 11822d61bbb3SSatish Balay #endif 11832d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 11842d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 11852d61bbb3SSatish Balay if (roworiented) { 11865ef9f2a5SBarry Smith value = v[l + k*n]; 11872d61bbb3SSatish Balay } else { 11882d61bbb3SSatish Balay value = v[k + l*m]; 11892d61bbb3SSatish Balay } 11902d61bbb3SSatish Balay if (!sorted) low = 0; high = nrow; 11912d61bbb3SSatish Balay while (high-low > 7) { 11922d61bbb3SSatish Balay t = (low+high)/2; 11932d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 11942d61bbb3SSatish Balay else low = t; 11952d61bbb3SSatish Balay } 11962d61bbb3SSatish Balay for (i=low; i<high; i++) { 11972d61bbb3SSatish Balay if (rp[i] > bcol) break; 11982d61bbb3SSatish Balay if (rp[i] == bcol) { 11992d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 12002d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 12012d61bbb3SSatish Balay else *bap = value; 12022d61bbb3SSatish Balay goto noinsert1; 12032d61bbb3SSatish Balay } 12042d61bbb3SSatish Balay } 12052d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 120629bbc08cSBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 12072d61bbb3SSatish Balay if (nrow >= rmax) { 12082d61bbb3SSatish Balay /* there is no extra room in row, therefore enlarge */ 12092d61bbb3SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 12103f1db9ecSBarry Smith MatScalar *new_a; 12112d61bbb3SSatish Balay 121229bbc08cSBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 12132d61bbb3SSatish Balay 12142d61bbb3SSatish Balay /* Malloc new storage space */ 12153f1db9ecSBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1216b0a32e0cSBarry Smith ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 12172d61bbb3SSatish Balay new_j = (int*)(new_a + bs2*new_nz); 12182d61bbb3SSatish Balay new_i = new_j + new_nz; 12192d61bbb3SSatish Balay 12202d61bbb3SSatish Balay /* copy over old data into new slots */ 12212d61bbb3SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 12222d61bbb3SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1223549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 12242d61bbb3SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1225549d3d68SSatish Balay ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1226549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1227549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1228549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 12292d61bbb3SSatish Balay /* free up old matrix storage */ 1230606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 1231606d414cSSatish Balay if (!a->singlemalloc) { 1232606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 1233606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 1234606d414cSSatish Balay } 12352d61bbb3SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1236c4992f7dSBarry Smith a->singlemalloc = PETSC_TRUE; 12372d61bbb3SSatish Balay 12382d61bbb3SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 12392d61bbb3SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1240b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 12412d61bbb3SSatish Balay a->maxnz += bs2*CHUNKSIZE; 12422d61bbb3SSatish Balay a->reallocs++; 12432d61bbb3SSatish Balay a->nz++; 12442d61bbb3SSatish Balay } 12452d61bbb3SSatish Balay N = nrow++ - 1; 12462d61bbb3SSatish Balay /* shift up all the later entries in this row */ 12472d61bbb3SSatish Balay for (ii=N; ii>=i; ii--) { 12482d61bbb3SSatish Balay rp[ii+1] = rp[ii]; 1249549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 12502d61bbb3SSatish Balay } 1251549d3d68SSatish Balay if (N>=i) { 1252549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1253549d3d68SSatish Balay } 12542d61bbb3SSatish Balay rp[i] = bcol; 12552d61bbb3SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 12562d61bbb3SSatish Balay noinsert1:; 12572d61bbb3SSatish Balay low = i; 12582d61bbb3SSatish Balay } 12592d61bbb3SSatish Balay ailen[brow] = nrow; 12602d61bbb3SSatish Balay } 12612d61bbb3SSatish Balay PetscFunctionReturn(0); 12622d61bbb3SSatish Balay } 12632d61bbb3SSatish Balay 12642d61bbb3SSatish Balay 12654a2ae208SSatish Balay #undef __FUNCT__ 12664a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ" 12675ef9f2a5SBarry Smith int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 12682d61bbb3SSatish Balay { 12692d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 12702d61bbb3SSatish Balay Mat outA; 12712d61bbb3SSatish Balay int ierr; 1272667159a5SBarry Smith PetscTruth row_identity,col_identity; 12732d61bbb3SSatish Balay 12742d61bbb3SSatish Balay PetscFunctionBegin; 127529bbc08cSBarry Smith if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1276667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1277667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1278667159a5SBarry Smith if (!row_identity || !col_identity) { 127929bbc08cSBarry Smith SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1280667159a5SBarry Smith } 12812d61bbb3SSatish Balay 12822d61bbb3SSatish Balay outA = inA; 12832d61bbb3SSatish Balay inA->factor = FACTOR_LU; 12842d61bbb3SSatish Balay 12852d61bbb3SSatish Balay if (!a->diag) { 1286c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 12872d61bbb3SSatish Balay } 1288cf242676SKris Buschelman 1289c38d4ed2SBarry Smith a->row = row; 1290c38d4ed2SBarry Smith a->col = col; 1291c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1292c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1293c38d4ed2SBarry Smith 1294c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 12954c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1296b0a32e0cSBarry Smith PetscLogObjectParent(inA,a->icol); 1297c38d4ed2SBarry Smith 1298cf242676SKris Buschelman /* 1299cf242676SKris Buschelman Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1300cf242676SKris Buschelman for ILU(0) factorization with natural ordering 1301cf242676SKris Buschelman */ 1302cf242676SKris Buschelman if (a->bs < 8) { 1303cf242676SKris Buschelman ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1304cf242676SKris Buschelman } else { 1305c38d4ed2SBarry Smith if (!a->solve_work) { 130687828ca2SBarry Smith ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 130787828ca2SBarry Smith PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1308c38d4ed2SBarry Smith } 13092d61bbb3SSatish Balay } 13102d61bbb3SSatish Balay 1311667159a5SBarry Smith ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1312667159a5SBarry Smith 13132d61bbb3SSatish Balay PetscFunctionReturn(0); 13142d61bbb3SSatish Balay } 13154a2ae208SSatish Balay #undef __FUNCT__ 13164a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1317ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 1318ba4ca20aSSatish Balay { 1319c38d4ed2SBarry Smith static PetscTruth called = PETSC_FALSE; 1320ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 1321d132466eSBarry Smith int ierr; 1322ba4ca20aSSatish Balay 13233a40ed3dSBarry Smith PetscFunctionBegin; 1324c38d4ed2SBarry Smith if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1325d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1326d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 13273a40ed3dSBarry Smith PetscFunctionReturn(0); 1328ba4ca20aSSatish Balay } 1329d9b7c43dSSatish Balay 1330fb2e594dSBarry Smith EXTERN_C_BEGIN 13314a2ae208SSatish Balay #undef __FUNCT__ 13324a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 133327a8da17SBarry Smith int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 133427a8da17SBarry Smith { 133527a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 133614db4f74SSatish Balay int i,nz,nbs; 133727a8da17SBarry Smith 133827a8da17SBarry Smith PetscFunctionBegin; 133914db4f74SSatish Balay nz = baij->maxnz/baij->bs2; 134014db4f74SSatish Balay nbs = baij->nbs; 134127a8da17SBarry Smith for (i=0; i<nz; i++) { 134227a8da17SBarry Smith baij->j[i] = indices[i]; 134327a8da17SBarry Smith } 134427a8da17SBarry Smith baij->nz = nz; 134514db4f74SSatish Balay for (i=0; i<nbs; i++) { 134627a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 134727a8da17SBarry Smith } 134827a8da17SBarry Smith 134927a8da17SBarry Smith PetscFunctionReturn(0); 135027a8da17SBarry Smith } 1351fb2e594dSBarry Smith EXTERN_C_END 135227a8da17SBarry Smith 13534a2ae208SSatish Balay #undef __FUNCT__ 13544a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 135527a8da17SBarry Smith /*@ 135627a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 135727a8da17SBarry Smith in the matrix. 135827a8da17SBarry Smith 135927a8da17SBarry Smith Input Parameters: 136027a8da17SBarry Smith + mat - the SeqBAIJ matrix 136127a8da17SBarry Smith - indices - the column indices 136227a8da17SBarry Smith 136315091d37SBarry Smith Level: advanced 136415091d37SBarry Smith 136527a8da17SBarry Smith Notes: 136627a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 136727a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 136827a8da17SBarry Smith of the MatSetValues() operation. 136927a8da17SBarry Smith 137027a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 137127a8da17SBarry Smith MatCreateSeqBAIJ(). 137227a8da17SBarry Smith 137327a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 137427a8da17SBarry Smith 137527a8da17SBarry Smith @*/ 137627a8da17SBarry Smith int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 137727a8da17SBarry Smith { 137827a8da17SBarry Smith int ierr,(*f)(Mat,int *); 137927a8da17SBarry Smith 138027a8da17SBarry Smith PetscFunctionBegin; 138127a8da17SBarry Smith PetscValidHeaderSpecific(mat,MAT_COOKIE); 1382c134de8dSSatish Balay ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 138327a8da17SBarry Smith if (f) { 138427a8da17SBarry Smith ierr = (*f)(mat,indices);CHKERRQ(ierr); 138527a8da17SBarry Smith } else { 138629bbc08cSBarry Smith SETERRQ(1,"Wrong type of matrix to set column indices"); 138727a8da17SBarry Smith } 138827a8da17SBarry Smith PetscFunctionReturn(0); 138927a8da17SBarry Smith } 139027a8da17SBarry Smith 13914a2ae208SSatish Balay #undef __FUNCT__ 13924a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1393273d9f13SBarry Smith int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1394273d9f13SBarry Smith { 1395273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1396273d9f13SBarry Smith int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1397273d9f13SBarry Smith PetscReal atmp; 139887828ca2SBarry Smith PetscScalar *x,zero = 0.0; 1399273d9f13SBarry Smith MatScalar *aa; 1400273d9f13SBarry Smith int ncols,brow,krow,kcol; 1401273d9f13SBarry Smith 1402273d9f13SBarry Smith PetscFunctionBegin; 1403273d9f13SBarry Smith if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1404273d9f13SBarry Smith bs = a->bs; 1405273d9f13SBarry Smith aa = a->a; 1406273d9f13SBarry Smith ai = a->i; 1407273d9f13SBarry Smith aj = a->j; 1408273d9f13SBarry Smith mbs = a->mbs; 1409273d9f13SBarry Smith 1410273d9f13SBarry Smith ierr = VecSet(&zero,v);CHKERRQ(ierr); 1411273d9f13SBarry Smith ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1412273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1413273d9f13SBarry Smith if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1414273d9f13SBarry Smith for (i=0; i<mbs; i++) { 1415273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 1416273d9f13SBarry Smith brow = bs*i; 1417273d9f13SBarry Smith for (j=0; j<ncols; j++){ 1418273d9f13SBarry Smith /* bcol = bs*(*aj); */ 1419273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++){ 1420273d9f13SBarry Smith for (krow=0; krow<bs; krow++){ 1421273d9f13SBarry Smith atmp = PetscAbsScalar(*aa); aa++; 1422273d9f13SBarry Smith row = brow + krow; /* row index */ 1423273d9f13SBarry Smith /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1424273d9f13SBarry Smith if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1425273d9f13SBarry Smith } 1426273d9f13SBarry Smith } 1427273d9f13SBarry Smith aj++; 1428273d9f13SBarry Smith } 1429273d9f13SBarry Smith } 1430273d9f13SBarry Smith ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1431273d9f13SBarry Smith PetscFunctionReturn(0); 1432273d9f13SBarry Smith } 1433273d9f13SBarry Smith 14344a2ae208SSatish Balay #undef __FUNCT__ 14354a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1436273d9f13SBarry Smith int MatSetUpPreallocation_SeqBAIJ(Mat A) 1437273d9f13SBarry Smith { 1438273d9f13SBarry Smith int ierr; 1439273d9f13SBarry Smith 1440273d9f13SBarry Smith PetscFunctionBegin; 1441273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1442273d9f13SBarry Smith PetscFunctionReturn(0); 1443273d9f13SBarry Smith } 1444273d9f13SBarry Smith 14454a2ae208SSatish Balay #undef __FUNCT__ 14464a2ae208SSatish Balay #define __FUNCT__ "MatGetArray_SeqBAIJ" 144787828ca2SBarry Smith int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1448f2a5309cSSatish Balay { 1449f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1450f2a5309cSSatish Balay PetscFunctionBegin; 1451f2a5309cSSatish Balay *array = a->a; 1452f2a5309cSSatish Balay PetscFunctionReturn(0); 1453f2a5309cSSatish Balay } 1454f2a5309cSSatish Balay 14554a2ae208SSatish Balay #undef __FUNCT__ 14564a2ae208SSatish Balay #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 145787828ca2SBarry Smith int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1458f2a5309cSSatish Balay { 1459f2a5309cSSatish Balay PetscFunctionBegin; 1460f2a5309cSSatish Balay PetscFunctionReturn(0); 1461f2a5309cSSatish Balay } 1462f2a5309cSSatish Balay 14632593348eSBarry Smith /* -------------------------------------------------------------------*/ 1464cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1465cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 1466cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 1467cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 1468cc2dc46cSBarry Smith MatMultAdd_SeqBAIJ_N, 14697c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 14707c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 1471cc2dc46cSBarry Smith MatSolve_SeqBAIJ_N, 1472cc2dc46cSBarry Smith 0, 1473cc2dc46cSBarry Smith 0, 1474cc2dc46cSBarry Smith 0, 1475cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 1476cc2dc46cSBarry Smith 0, 1477b6490206SBarry Smith 0, 1478f2501298SSatish Balay MatTranspose_SeqBAIJ, 1479cc2dc46cSBarry Smith MatGetInfo_SeqBAIJ, 1480cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 1481cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 1482cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 1483cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 1484b6490206SBarry Smith 0, 1485cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 1486cc2dc46cSBarry Smith 0, 1487cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 1488cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 1489cc2dc46cSBarry Smith MatZeroRows_SeqBAIJ, 1490cc2dc46cSBarry Smith MatLUFactorSymbolic_SeqBAIJ, 1491cc2dc46cSBarry Smith MatLUFactorNumeric_SeqBAIJ_N, 1492cc2dc46cSBarry Smith 0, 1493cc2dc46cSBarry Smith 0, 1494273d9f13SBarry Smith MatSetUpPreallocation_SeqBAIJ, 1495cc2dc46cSBarry Smith MatILUFactorSymbolic_SeqBAIJ, 1496cc2dc46cSBarry Smith 0, 1497f2a5309cSSatish Balay MatGetArray_SeqBAIJ, 1498f2a5309cSSatish Balay MatRestoreArray_SeqBAIJ, 14992e8a6d31SBarry Smith MatDuplicate_SeqBAIJ, 1500cc2dc46cSBarry Smith 0, 1501cc2dc46cSBarry Smith 0, 1502cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 1503cc2dc46cSBarry Smith 0, 1504cc2dc46cSBarry Smith 0, 1505cc2dc46cSBarry Smith MatGetSubMatrices_SeqBAIJ, 1506cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 1507cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 1508cc2dc46cSBarry Smith 0, 1509cc2dc46cSBarry Smith MatPrintHelp_SeqBAIJ, 1510cc2dc46cSBarry Smith MatScale_SeqBAIJ, 1511cc2dc46cSBarry Smith 0, 1512cc2dc46cSBarry Smith 0, 1513cc2dc46cSBarry Smith 0, 1514cc2dc46cSBarry Smith MatGetBlockSize_SeqBAIJ, 15153b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 151692c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 1517cc2dc46cSBarry Smith 0, 1518cc2dc46cSBarry Smith 0, 1519cc2dc46cSBarry Smith 0, 1520cc2dc46cSBarry Smith 0, 1521cc2dc46cSBarry Smith 0, 1522cc2dc46cSBarry Smith 0, 1523d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 1524cc2dc46cSBarry Smith MatGetSubMatrix_SeqBAIJ, 1525b9b97703SBarry Smith MatDestroy_SeqBAIJ, 1526b9b97703SBarry Smith MatView_SeqBAIJ, 15273a64cc32SBarry Smith MatGetPetscMaps_Petsc, 1528273d9f13SBarry Smith 0, 1529273d9f13SBarry Smith 0, 1530273d9f13SBarry Smith 0, 1531273d9f13SBarry Smith 0, 1532273d9f13SBarry Smith 0, 1533273d9f13SBarry Smith 0, 1534273d9f13SBarry Smith MatGetRowMax_SeqBAIJ, 1535273d9f13SBarry Smith MatConvert_Basic}; 15362593348eSBarry Smith 15373e90b805SBarry Smith EXTERN_C_BEGIN 15384a2ae208SSatish Balay #undef __FUNCT__ 15394a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ" 15403e90b805SBarry Smith int MatStoreValues_SeqBAIJ(Mat mat) 15413e90b805SBarry Smith { 15423e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1543273d9f13SBarry Smith int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1544d132466eSBarry Smith int ierr; 15453e90b805SBarry Smith 15463e90b805SBarry Smith PetscFunctionBegin; 15473e90b805SBarry Smith if (aij->nonew != 1) { 154829bbc08cSBarry Smith SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 15493e90b805SBarry Smith } 15503e90b805SBarry Smith 15513e90b805SBarry Smith /* allocate space for values if not already there */ 15523e90b805SBarry Smith if (!aij->saved_values) { 155387828ca2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 15543e90b805SBarry Smith } 15553e90b805SBarry Smith 15563e90b805SBarry Smith /* copy values over */ 155787828ca2SBarry Smith ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 15583e90b805SBarry Smith PetscFunctionReturn(0); 15593e90b805SBarry Smith } 15603e90b805SBarry Smith EXTERN_C_END 15613e90b805SBarry Smith 15623e90b805SBarry Smith EXTERN_C_BEGIN 15634a2ae208SSatish Balay #undef __FUNCT__ 15644a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 156532e87ba3SBarry Smith int MatRetrieveValues_SeqBAIJ(Mat mat) 15663e90b805SBarry Smith { 15673e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1568273d9f13SBarry Smith int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 15693e90b805SBarry Smith 15703e90b805SBarry Smith PetscFunctionBegin; 15713e90b805SBarry Smith if (aij->nonew != 1) { 157229bbc08cSBarry Smith SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 15733e90b805SBarry Smith } 15743e90b805SBarry Smith if (!aij->saved_values) { 157529bbc08cSBarry Smith SETERRQ(1,"Must call MatStoreValues(A);first"); 15763e90b805SBarry Smith } 15773e90b805SBarry Smith 15783e90b805SBarry Smith /* copy values over */ 157987828ca2SBarry Smith ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 15803e90b805SBarry Smith PetscFunctionReturn(0); 15813e90b805SBarry Smith } 15823e90b805SBarry Smith EXTERN_C_END 15833e90b805SBarry Smith 1584273d9f13SBarry Smith EXTERN_C_BEGIN 1585273d9f13SBarry Smith extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1586273d9f13SBarry Smith EXTERN_C_END 1587273d9f13SBarry Smith 1588273d9f13SBarry Smith EXTERN_C_BEGIN 15894a2ae208SSatish Balay #undef __FUNCT__ 15904a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ" 1591273d9f13SBarry Smith int MatCreate_SeqBAIJ(Mat B) 15922593348eSBarry Smith { 1593273d9f13SBarry Smith int ierr,size; 1594b6490206SBarry Smith Mat_SeqBAIJ *b; 15953b2fbd54SBarry Smith 15963a40ed3dSBarry Smith PetscFunctionBegin; 1597273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 159829bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1599b6490206SBarry Smith 1600273d9f13SBarry Smith B->m = B->M = PetscMax(B->m,B->M); 1601273d9f13SBarry Smith B->n = B->N = PetscMax(B->n,B->N); 1602b0a32e0cSBarry Smith ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1603b0a32e0cSBarry Smith B->data = (void*)b; 1604549d3d68SSatish Balay ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1605549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 16062593348eSBarry Smith B->factor = 0; 16072593348eSBarry Smith B->lupivotthreshold = 1.0; 160890f02eecSBarry Smith B->mapping = 0; 16092593348eSBarry Smith b->row = 0; 16102593348eSBarry Smith b->col = 0; 1611e51c0b9cSSatish Balay b->icol = 0; 16122593348eSBarry Smith b->reallocs = 0; 16133e90b805SBarry Smith b->saved_values = 0; 1614cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 1615563b5814SBarry Smith b->setvalueslen = 0; 1616563b5814SBarry Smith b->setvaluescopy = PETSC_NULL; 1617563b5814SBarry Smith #endif 16188661488fSKris Buschelman b->single_precision_solves = PETSC_FALSE; 16192593348eSBarry Smith 16203a64cc32SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 16213a64cc32SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1622a5ae1ecdSBarry Smith 1623c4992f7dSBarry Smith b->sorted = PETSC_FALSE; 1624c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 16252593348eSBarry Smith b->nonew = 0; 16262593348eSBarry Smith b->diag = 0; 16272593348eSBarry Smith b->solve_work = 0; 1628de6a44a3SBarry Smith b->mult_work = 0; 16292a1b7f2aSHong Zhang B->spptr = 0; 16300e6d2581SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 1631883fce79SBarry Smith b->keepzeroedrows = PETSC_FALSE; 16324e220ebcSLois Curfman McInnes 1633f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 16343e90b805SBarry Smith "MatStoreValues_SeqBAIJ", 1635bc4b532fSSatish Balay MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1636f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 16373e90b805SBarry Smith "MatRetrieveValues_SeqBAIJ", 1638bc4b532fSSatish Balay MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1639f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 164027a8da17SBarry Smith "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1641bc4b532fSSatish Balay MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1642273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1643273d9f13SBarry Smith "MatConvert_SeqBAIJ_SeqAIJ", 1644273d9f13SBarry Smith MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 16453a40ed3dSBarry Smith PetscFunctionReturn(0); 16462593348eSBarry Smith } 1647273d9f13SBarry Smith EXTERN_C_END 16482593348eSBarry Smith 16494a2ae208SSatish Balay #undef __FUNCT__ 16504a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_SeqBAIJ" 16512e8a6d31SBarry Smith int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 16522593348eSBarry Smith { 16532593348eSBarry Smith Mat C; 1654b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 165527a8da17SBarry Smith int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1656de6a44a3SBarry Smith 16573a40ed3dSBarry Smith PetscFunctionBegin; 165829bbc08cSBarry Smith if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 16592593348eSBarry Smith 16602593348eSBarry Smith *B = 0; 1661273d9f13SBarry Smith ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1662273d9f13SBarry Smith ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1663273d9f13SBarry Smith c = (Mat_SeqBAIJ*)C->data; 166444cd7ae7SLois Curfman McInnes 1665b6490206SBarry Smith c->bs = a->bs; 16669df24120SSatish Balay c->bs2 = a->bs2; 1667b6490206SBarry Smith c->mbs = a->mbs; 1668b6490206SBarry Smith c->nbs = a->nbs; 166935d8aa7fSBarry Smith ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 16702593348eSBarry Smith 1671b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1672b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1673b6490206SBarry Smith for (i=0; i<mbs; i++) { 16742593348eSBarry Smith c->imax[i] = a->imax[i]; 16752593348eSBarry Smith c->ilen[i] = a->ilen[i]; 16762593348eSBarry Smith } 16772593348eSBarry Smith 16782593348eSBarry Smith /* allocate the matrix space */ 1679c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 16803f1db9ecSBarry Smith len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1681b0a32e0cSBarry Smith ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 16827e67e3f9SSatish Balay c->j = (int*)(c->a + nz*bs2); 1683de6a44a3SBarry Smith c->i = c->j + nz; 1684549d3d68SSatish Balay ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1685b6490206SBarry Smith if (mbs > 0) { 1686549d3d68SSatish Balay ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 16872e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 1688549d3d68SSatish Balay ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 16892e8a6d31SBarry Smith } else { 1690549d3d68SSatish Balay ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 16912593348eSBarry Smith } 16922593348eSBarry Smith } 16932593348eSBarry Smith 1694b0a32e0cSBarry Smith PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 16952593348eSBarry Smith c->sorted = a->sorted; 16962593348eSBarry Smith c->roworiented = a->roworiented; 16972593348eSBarry Smith c->nonew = a->nonew; 16982593348eSBarry Smith 16992593348eSBarry Smith if (a->diag) { 1700b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1701b0a32e0cSBarry Smith PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1702b6490206SBarry Smith for (i=0; i<mbs; i++) { 17032593348eSBarry Smith c->diag[i] = a->diag[i]; 17042593348eSBarry Smith } 170598305bb5SBarry Smith } else c->diag = 0; 17062593348eSBarry Smith c->nz = a->nz; 17072593348eSBarry Smith c->maxnz = a->maxnz; 17082593348eSBarry Smith c->solve_work = 0; 17092a1b7f2aSHong Zhang C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 17107fc0212eSBarry Smith c->mult_work = 0; 1711273d9f13SBarry Smith C->preallocated = PETSC_TRUE; 1712273d9f13SBarry Smith C->assembled = PETSC_TRUE; 17132593348eSBarry Smith *B = C; 1714b0a32e0cSBarry Smith ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 17153a40ed3dSBarry Smith PetscFunctionReturn(0); 17162593348eSBarry Smith } 17172593348eSBarry Smith 1718273d9f13SBarry Smith EXTERN_C_BEGIN 17194a2ae208SSatish Balay #undef __FUNCT__ 17204a2ae208SSatish Balay #define __FUNCT__ "MatLoad_SeqBAIJ" 1721b0a32e0cSBarry Smith int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 17222593348eSBarry Smith { 1723b6490206SBarry Smith Mat_SeqBAIJ *a; 17242593348eSBarry Smith Mat B; 1725f1af5d2fSBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1726b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 172735aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1728a7c10996SSatish Balay int *masked,nmask,tmp,bs2,ishift; 172987828ca2SBarry Smith PetscScalar *aa; 173019bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 17312593348eSBarry Smith 17323a40ed3dSBarry Smith PetscFunctionBegin; 1733b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1734de6a44a3SBarry Smith bs2 = bs*bs; 1735b6490206SBarry Smith 1736d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 173729bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1738b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 17390752156aSBarry Smith ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1740552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 17412593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 17422593348eSBarry Smith 1743d64ed03dSBarry Smith if (header[3] < 0) { 174429bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1745d64ed03dSBarry Smith } 1746d64ed03dSBarry Smith 174729bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 174835aab85fSBarry Smith 174935aab85fSBarry Smith /* 175035aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 175135aab85fSBarry Smith divisible by the blocksize 175235aab85fSBarry Smith */ 1753b6490206SBarry Smith mbs = M/bs; 175435aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 175535aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 175635aab85fSBarry Smith else mbs++; 175735aab85fSBarry Smith if (extra_rows) { 1758b0a32e0cSBarry Smith PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 175935aab85fSBarry Smith } 1760b6490206SBarry Smith 17612593348eSBarry Smith /* read in row lengths */ 1762b0a32e0cSBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 17630752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 176435aab85fSBarry Smith for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 17652593348eSBarry Smith 1766b6490206SBarry Smith /* read in column indices */ 1767b0a32e0cSBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 17680752156aSBarry Smith ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 176935aab85fSBarry Smith for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1770b6490206SBarry Smith 1771b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 1772b0a32e0cSBarry Smith ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1773549d3d68SSatish Balay ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1774b0a32e0cSBarry Smith ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1775549d3d68SSatish Balay ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 177635aab85fSBarry Smith masked = mask + mbs; 1777b6490206SBarry Smith rowcount = 0; nzcount = 0; 1778b6490206SBarry Smith for (i=0; i<mbs; i++) { 177935aab85fSBarry Smith nmask = 0; 1780b6490206SBarry Smith for (j=0; j<bs; j++) { 1781b6490206SBarry Smith kmax = rowlengths[rowcount]; 1782b6490206SBarry Smith for (k=0; k<kmax; k++) { 178335aab85fSBarry Smith tmp = jj[nzcount++]/bs; 178435aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1785b6490206SBarry Smith } 1786b6490206SBarry Smith rowcount++; 1787b6490206SBarry Smith } 178835aab85fSBarry Smith browlengths[i] += nmask; 178935aab85fSBarry Smith /* zero out the mask elements we set */ 179035aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1791b6490206SBarry Smith } 1792b6490206SBarry Smith 17932593348eSBarry Smith /* create our matrix */ 17943eee16b0SBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 17952593348eSBarry Smith B = *A; 1796b6490206SBarry Smith a = (Mat_SeqBAIJ*)B->data; 17972593348eSBarry Smith 17982593348eSBarry Smith /* set matrix "i" values */ 1799de6a44a3SBarry Smith a->i[0] = 0; 1800b6490206SBarry Smith for (i=1; i<= mbs; i++) { 1801b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 1802b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 18032593348eSBarry Smith } 1804b6490206SBarry Smith a->nz = 0; 1805b6490206SBarry Smith for (i=0; i<mbs; i++) a->nz += browlengths[i]; 18062593348eSBarry Smith 1807b6490206SBarry Smith /* read in nonzero values */ 180887828ca2SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 18090752156aSBarry Smith ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 181035aab85fSBarry Smith for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1811b6490206SBarry Smith 1812b6490206SBarry Smith /* set "a" and "j" values into matrix */ 1813b6490206SBarry Smith nzcount = 0; jcount = 0; 1814b6490206SBarry Smith for (i=0; i<mbs; i++) { 1815b6490206SBarry Smith nzcountb = nzcount; 181635aab85fSBarry Smith nmask = 0; 1817b6490206SBarry Smith for (j=0; j<bs; j++) { 1818b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1819b6490206SBarry Smith for (k=0; k<kmax; k++) { 182035aab85fSBarry Smith tmp = jj[nzcount++]/bs; 182135aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1822b6490206SBarry Smith } 1823b6490206SBarry Smith } 1824de6a44a3SBarry Smith /* sort the masked values */ 1825433994e6SBarry Smith ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1826de6a44a3SBarry Smith 1827b6490206SBarry Smith /* set "j" values into matrix */ 1828b6490206SBarry Smith maskcount = 1; 182935aab85fSBarry Smith for (j=0; j<nmask; j++) { 183035aab85fSBarry Smith a->j[jcount++] = masked[j]; 1831de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 1832b6490206SBarry Smith } 1833b6490206SBarry Smith /* set "a" values into matrix */ 1834de6a44a3SBarry Smith ishift = bs2*a->i[i]; 1835b6490206SBarry Smith for (j=0; j<bs; j++) { 1836b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1837b6490206SBarry Smith for (k=0; k<kmax; k++) { 1838de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 1839de6a44a3SBarry Smith block = mask[tmp] - 1; 1840de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 1841de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 1842375fe846SBarry Smith a->a[idx] = (MatScalar)aa[nzcountb++]; 1843b6490206SBarry Smith } 1844b6490206SBarry Smith } 184535aab85fSBarry Smith /* zero out the mask elements we set */ 184635aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1847b6490206SBarry Smith } 184829bbc08cSBarry Smith if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1849b6490206SBarry Smith 1850606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1851606d414cSSatish Balay ierr = PetscFree(browlengths);CHKERRQ(ierr); 1852606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 1853606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 1854606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 1855b6490206SBarry Smith 1856b6490206SBarry Smith B->assembled = PETSC_TRUE; 1857de6a44a3SBarry Smith 18589c01be13SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 18593a40ed3dSBarry Smith PetscFunctionReturn(0); 18602593348eSBarry Smith } 1861273d9f13SBarry Smith EXTERN_C_END 18622593348eSBarry Smith 18634a2ae208SSatish Balay #undef __FUNCT__ 18644a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ" 1865273d9f13SBarry Smith /*@C 1866273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1867273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 1868273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 1869273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 1870273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 18712593348eSBarry Smith 1872273d9f13SBarry Smith Collective on MPI_Comm 1873273d9f13SBarry Smith 1874273d9f13SBarry Smith Input Parameters: 1875273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 1876273d9f13SBarry Smith . bs - size of block 1877273d9f13SBarry Smith . m - number of rows 1878273d9f13SBarry Smith . n - number of columns 187935d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 188035d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 1881273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 1882273d9f13SBarry Smith 1883273d9f13SBarry Smith Output Parameter: 1884273d9f13SBarry Smith . A - the matrix 1885273d9f13SBarry Smith 1886273d9f13SBarry Smith Options Database Keys: 1887273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 1888273d9f13SBarry Smith block calculations (much slower) 1889273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 1890273d9f13SBarry Smith 1891273d9f13SBarry Smith Level: intermediate 1892273d9f13SBarry Smith 1893273d9f13SBarry Smith Notes: 189435d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 189535d8aa7fSBarry Smith 1896273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 1897273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 1898273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 1899273d9f13SBarry Smith 1900273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 1901273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1902273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 1903273d9f13SBarry Smith matrices. 1904273d9f13SBarry Smith 1905273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1906273d9f13SBarry Smith @*/ 1907273d9f13SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1908273d9f13SBarry Smith { 1909273d9f13SBarry Smith int ierr; 1910273d9f13SBarry Smith 1911273d9f13SBarry Smith PetscFunctionBegin; 1912273d9f13SBarry Smith ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1913273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1914273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1915273d9f13SBarry Smith PetscFunctionReturn(0); 1916273d9f13SBarry Smith } 1917273d9f13SBarry Smith 19184a2ae208SSatish Balay #undef __FUNCT__ 19194a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1920273d9f13SBarry Smith /*@C 1921273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1922273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 1923273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 1924273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 1925273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 1926273d9f13SBarry Smith 1927273d9f13SBarry Smith Collective on MPI_Comm 1928273d9f13SBarry Smith 1929273d9f13SBarry Smith Input Parameters: 1930273d9f13SBarry Smith + A - the matrix 1931273d9f13SBarry Smith . bs - size of block 1932273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 1933273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 1934273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 1935273d9f13SBarry Smith 1936273d9f13SBarry Smith Options Database Keys: 1937273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 1938273d9f13SBarry Smith block calculations (much slower) 1939273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 1940273d9f13SBarry Smith 1941273d9f13SBarry Smith Level: intermediate 1942273d9f13SBarry Smith 1943273d9f13SBarry Smith Notes: 1944273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 1945273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 1946273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 1947273d9f13SBarry Smith 1948273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 1949273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1950273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 1951273d9f13SBarry Smith matrices. 1952273d9f13SBarry Smith 1953273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1954273d9f13SBarry Smith @*/ 1955273d9f13SBarry Smith int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1956273d9f13SBarry Smith { 1957273d9f13SBarry Smith Mat_SeqBAIJ *b; 1958273d9f13SBarry Smith int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1959273d9f13SBarry Smith PetscTruth flg; 1960273d9f13SBarry Smith 1961273d9f13SBarry Smith PetscFunctionBegin; 1962273d9f13SBarry Smith ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1963273d9f13SBarry Smith if (!flg) PetscFunctionReturn(0); 1964273d9f13SBarry Smith 1965273d9f13SBarry Smith B->preallocated = PETSC_TRUE; 1966b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1967273d9f13SBarry Smith if (nnz && newbs != bs) { 1968273d9f13SBarry Smith SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1969273d9f13SBarry Smith } 1970273d9f13SBarry Smith bs = newbs; 1971273d9f13SBarry Smith 1972273d9f13SBarry Smith mbs = B->m/bs; 1973273d9f13SBarry Smith nbs = B->n/bs; 1974273d9f13SBarry Smith bs2 = bs*bs; 1975273d9f13SBarry Smith 1976273d9f13SBarry Smith if (mbs*bs!=B->m || nbs*bs!=B->n) { 197735d8aa7fSBarry Smith SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1978273d9f13SBarry Smith } 1979273d9f13SBarry Smith 1980435da068SBarry Smith if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1981435da068SBarry Smith if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1982273d9f13SBarry Smith if (nnz) { 1983273d9f13SBarry Smith for (i=0; i<mbs; i++) { 1984273d9f13SBarry Smith if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 19853a7fca6bSBarry Smith if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1986273d9f13SBarry Smith } 1987273d9f13SBarry Smith } 1988273d9f13SBarry Smith 1989273d9f13SBarry Smith b = (Mat_SeqBAIJ*)B->data; 1990b0a32e0cSBarry Smith ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 199154138f6bSKris Buschelman B->ops->solve = MatSolve_SeqBAIJ_Update; 199254138f6bSKris Buschelman B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1993273d9f13SBarry Smith if (!flg) { 1994273d9f13SBarry Smith switch (bs) { 1995273d9f13SBarry Smith case 1: 1996273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1997273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_1; 1998273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_1; 1999273d9f13SBarry Smith break; 2000273d9f13SBarry Smith case 2: 2001273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2002273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_2; 2003273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2004273d9f13SBarry Smith break; 2005273d9f13SBarry Smith case 3: 2006273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2007273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_3; 2008273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2009273d9f13SBarry Smith break; 2010273d9f13SBarry Smith case 4: 2011273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2012273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_4; 2013273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2014273d9f13SBarry Smith break; 2015273d9f13SBarry Smith case 5: 2016273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2017273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_5; 2018273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2019273d9f13SBarry Smith break; 2020273d9f13SBarry Smith case 6: 2021273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2022273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_6; 2023273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2024273d9f13SBarry Smith break; 2025273d9f13SBarry Smith case 7: 202654138f6bSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2027273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_7; 2028273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2029273d9f13SBarry Smith break; 2030273d9f13SBarry Smith default: 203154138f6bSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2032273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 2033273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2034273d9f13SBarry Smith break; 2035273d9f13SBarry Smith } 2036273d9f13SBarry Smith } 2037273d9f13SBarry Smith b->bs = bs; 2038273d9f13SBarry Smith b->mbs = mbs; 2039273d9f13SBarry Smith b->nbs = nbs; 2040b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2041273d9f13SBarry Smith if (!nnz) { 2042435da068SBarry Smith if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2043273d9f13SBarry Smith else if (nz <= 0) nz = 1; 2044273d9f13SBarry Smith for (i=0; i<mbs; i++) b->imax[i] = nz; 2045273d9f13SBarry Smith nz = nz*mbs; 2046273d9f13SBarry Smith } else { 2047273d9f13SBarry Smith nz = 0; 2048273d9f13SBarry Smith for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2049273d9f13SBarry Smith } 2050273d9f13SBarry Smith 2051273d9f13SBarry Smith /* allocate the matrix space */ 2052273d9f13SBarry Smith len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2053b0a32e0cSBarry Smith ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2054273d9f13SBarry Smith ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2055273d9f13SBarry Smith b->j = (int*)(b->a + nz*bs2); 2056273d9f13SBarry Smith ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2057273d9f13SBarry Smith b->i = b->j + nz; 2058273d9f13SBarry Smith b->singlemalloc = PETSC_TRUE; 2059273d9f13SBarry Smith 2060273d9f13SBarry Smith b->i[0] = 0; 2061273d9f13SBarry Smith for (i=1; i<mbs+1; i++) { 2062273d9f13SBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 2063273d9f13SBarry Smith } 2064273d9f13SBarry Smith 2065273d9f13SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 206616d6aa21SSatish Balay ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2067b0a32e0cSBarry Smith PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2068273d9f13SBarry Smith for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2069273d9f13SBarry Smith 2070273d9f13SBarry Smith b->bs = bs; 2071273d9f13SBarry Smith b->bs2 = bs2; 2072273d9f13SBarry Smith b->mbs = mbs; 2073273d9f13SBarry Smith b->nz = 0; 2074273d9f13SBarry Smith b->maxnz = nz*bs2; 2075273d9f13SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 2076273d9f13SBarry Smith PetscFunctionReturn(0); 2077273d9f13SBarry Smith } 20782593348eSBarry Smith 2079