1bba1ac68SSatish Balay /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 22593348eSBarry Smith 32593348eSBarry Smith /* 4b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 52593348eSBarry Smith matrix storage format. 62593348eSBarry Smith */ 770f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 81a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 91a6a6d98SBarry Smith #include "src/inline/spops.h" 10325e03aeSBarry Smith #include "petscsys.h" /*I "petscmat.h" I*/ 113b547af2SSatish Balay 12af674e45SBarry Smith /* 13af674e45SBarry Smith Special version for Fun3d sequential benchmark 14af674e45SBarry Smith */ 15af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 16af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 19af674e45SBarry Smith #endif 20af674e45SBarry Smith 21af674e45SBarry Smith #undef __FUNCT__ 22af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_" 234bb09213Spetsc void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 24af674e45SBarry Smith { 25af674e45SBarry Smith Mat A = *AA; 26af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27a037b02bSBarry Smith int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 2838fde467SHong Zhang int *ai=a->i,*ailen=a->ilen; 29a037b02bSBarry Smith int *aj=a->j,stepval; 304bb09213Spetsc PetscScalar *value = v; 314bb09213Spetsc MatScalar *ap,*aa = a->a,*bap; 32af674e45SBarry Smith 33af674e45SBarry Smith PetscFunctionBegin; 34af674e45SBarry Smith stepval = (n-1)*4; 35af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 36af674e45SBarry Smith row = im[k]; 37af674e45SBarry Smith rp = aj + ai[row]; 38af674e45SBarry Smith ap = aa + 16*ai[row]; 39af674e45SBarry Smith nrow = ailen[row]; 40af674e45SBarry Smith low = 0; 41af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 42af674e45SBarry Smith col = in[l]; 43af674e45SBarry Smith value = v + k*(stepval+4)*4 + l*4; 44af674e45SBarry Smith low = 0; high = nrow; 45af674e45SBarry Smith while (high-low > 7) { 46af674e45SBarry Smith t = (low+high)/2; 47af674e45SBarry Smith if (rp[t] > col) high = t; 48af674e45SBarry Smith else low = t; 49af674e45SBarry Smith } 50af674e45SBarry Smith for (i=low; i<high; i++) { 51af674e45SBarry Smith if (rp[i] > col) break; 52af674e45SBarry Smith if (rp[i] == col) { 53af674e45SBarry Smith bap = ap + 16*i; 54af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 55af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 56af674e45SBarry Smith bap[jj] += *value++; 57af674e45SBarry Smith } 58af674e45SBarry Smith } 59af674e45SBarry Smith goto noinsert2; 60af674e45SBarry Smith } 61af674e45SBarry Smith } 62af674e45SBarry Smith N = nrow++ - 1; 63af674e45SBarry Smith /* shift up all the later entries in this row */ 64af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 65af674e45SBarry Smith rp[ii+1] = rp[ii]; 66a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 67af674e45SBarry Smith } 68af674e45SBarry Smith if (N >= i) { 69a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 70af674e45SBarry Smith } 71af674e45SBarry Smith rp[i] = col; 72af674e45SBarry Smith bap = ap + 16*i; 73af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 74af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 75af674e45SBarry Smith bap[jj] = *value++; 76af674e45SBarry Smith } 77af674e45SBarry Smith } 78af674e45SBarry Smith noinsert2:; 79af674e45SBarry Smith low = i; 80af674e45SBarry Smith } 81af674e45SBarry Smith ailen[row] = nrow; 82af674e45SBarry Smith } 83af674e45SBarry Smith } 84af674e45SBarry Smith 85af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 86af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 87af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 88af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 89af674e45SBarry Smith #endif 90af674e45SBarry Smith 91af674e45SBarry Smith #undef __FUNCT__ 92af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_" 934bb09213Spetsc void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 94af674e45SBarry Smith { 95af674e45SBarry Smith Mat A = *AA; 96af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 97a037b02bSBarry Smith int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 9838fde467SHong Zhang int *ai=a->i,*ailen=a->ilen; 99af674e45SBarry Smith int *aj=a->j,brow,bcol; 10038fde467SHong Zhang int ridx,cidx; 101af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 102af674e45SBarry Smith 103af674e45SBarry Smith PetscFunctionBegin; 104af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 105af674e45SBarry Smith row = im[k]; brow = row/4; 106af674e45SBarry Smith rp = aj + ai[brow]; 107af674e45SBarry Smith ap = aa + 16*ai[brow]; 108af674e45SBarry Smith nrow = ailen[brow]; 109af674e45SBarry Smith low = 0; 110af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 111af674e45SBarry Smith col = in[l]; bcol = col/4; 112af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 113af674e45SBarry Smith value = v[l + k*n]; 114af674e45SBarry Smith low = 0; high = nrow; 115af674e45SBarry Smith while (high-low > 7) { 116af674e45SBarry Smith t = (low+high)/2; 117af674e45SBarry Smith if (rp[t] > bcol) high = t; 118af674e45SBarry Smith else low = t; 119af674e45SBarry Smith } 120af674e45SBarry Smith for (i=low; i<high; i++) { 121af674e45SBarry Smith if (rp[i] > bcol) break; 122af674e45SBarry Smith if (rp[i] == bcol) { 123af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 124af674e45SBarry Smith *bap += value; 125af674e45SBarry Smith goto noinsert1; 126af674e45SBarry Smith } 127af674e45SBarry Smith } 128af674e45SBarry Smith N = nrow++ - 1; 129af674e45SBarry Smith /* shift up all the later entries in this row */ 130af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 131af674e45SBarry Smith rp[ii+1] = rp[ii]; 132a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 133af674e45SBarry Smith } 134af674e45SBarry Smith if (N>=i) { 135a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 136af674e45SBarry Smith } 137af674e45SBarry Smith rp[i] = bcol; 138af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 139af674e45SBarry Smith noinsert1:; 140af674e45SBarry Smith low = i; 141af674e45SBarry Smith } 142af674e45SBarry Smith ailen[brow] = nrow; 143af674e45SBarry Smith } 144af674e45SBarry Smith } 145af674e45SBarry Smith 14695d5f7c2SBarry Smith /* UGLY, ugly, ugly 14787828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 1483477af42SKris Buschelman not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 14995d5f7c2SBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 15095d5f7c2SBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 15195d5f7c2SBarry Smith into the single precision data structures. 15295d5f7c2SBarry Smith */ 15395d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 154ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 15595d5f7c2SBarry Smith #else 15695d5f7c2SBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 15795d5f7c2SBarry Smith #endif 15804929863SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 15904929863SHong Zhang EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 16004929863SHong Zhang #endif 16195d5f7c2SBarry Smith 1622d61bbb3SSatish Balay #define CHUNKSIZE 10 163de6a44a3SBarry Smith 164be5855fcSBarry Smith /* 165be5855fcSBarry Smith Checks for missing diagonals 166be5855fcSBarry Smith */ 1674a2ae208SSatish Balay #undef __FUNCT__ 1684a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 169c4992f7dSBarry Smith int MatMissingDiagonal_SeqBAIJ(Mat A) 170be5855fcSBarry Smith { 171be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 172883fce79SBarry Smith int *diag,*jj = a->j,i,ierr; 173be5855fcSBarry Smith 174be5855fcSBarry Smith PetscFunctionBegin; 175c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 176883fce79SBarry Smith diag = a->diag; 1770e8e8aceSBarry Smith for (i=0; i<a->mbs; i++) { 178be5855fcSBarry Smith if (jj[diag[i]] != i) { 17929bbc08cSBarry Smith SETERRQ1(1,"Matrix is missing diagonal number %d",i); 180be5855fcSBarry Smith } 181be5855fcSBarry Smith } 182be5855fcSBarry Smith PetscFunctionReturn(0); 183be5855fcSBarry Smith } 184be5855fcSBarry Smith 1854a2ae208SSatish Balay #undef __FUNCT__ 1864a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 187c4992f7dSBarry Smith int MatMarkDiagonal_SeqBAIJ(Mat A) 188de6a44a3SBarry Smith { 189de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 19082502324SSatish Balay int i,j,*diag,m = a->mbs,ierr; 191de6a44a3SBarry Smith 1923a40ed3dSBarry Smith PetscFunctionBegin; 193883fce79SBarry Smith if (a->diag) PetscFunctionReturn(0); 194883fce79SBarry Smith 195b0a32e0cSBarry Smith ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 196b0a32e0cSBarry Smith PetscLogObjectMemory(A,(m+1)*sizeof(int)); 1977fc0212eSBarry Smith for (i=0; i<m; i++) { 198ecc615b2SBarry Smith diag[i] = a->i[i+1]; 199de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 200de6a44a3SBarry Smith if (a->j[j] == i) { 201de6a44a3SBarry Smith diag[i] = j; 202de6a44a3SBarry Smith break; 203de6a44a3SBarry Smith } 204de6a44a3SBarry Smith } 205de6a44a3SBarry Smith } 206de6a44a3SBarry Smith a->diag = diag; 2073a40ed3dSBarry Smith PetscFunctionReturn(0); 208de6a44a3SBarry Smith } 2092593348eSBarry Smith 2102593348eSBarry Smith 211ca44d042SBarry Smith EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 2123b2fbd54SBarry Smith 2134a2ae208SSatish Balay #undef __FUNCT__ 2144a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 2150e6d2581SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 2163b2fbd54SBarry Smith { 2173b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2183b2fbd54SBarry Smith int ierr,n = a->mbs,i; 2193b2fbd54SBarry Smith 2203a40ed3dSBarry Smith PetscFunctionBegin; 2213b2fbd54SBarry Smith *nn = n; 2223a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 2233b2fbd54SBarry Smith if (symmetric) { 2243b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 2253b2fbd54SBarry Smith } else if (oshift == 1) { 2263b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 227435da068SBarry Smith int nz = a->i[n]; 2283b2fbd54SBarry Smith for (i=0; i<nz; i++) a->j[i]++; 2293b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]++; 2303b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 2313b2fbd54SBarry Smith } else { 2323b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 2333b2fbd54SBarry Smith } 2343b2fbd54SBarry Smith 2353a40ed3dSBarry Smith PetscFunctionReturn(0); 2363b2fbd54SBarry Smith } 2373b2fbd54SBarry Smith 2384a2ae208SSatish Balay #undef __FUNCT__ 2394a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 240435da068SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 2413b2fbd54SBarry Smith { 2423b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 243606d414cSSatish Balay int i,n = a->mbs,ierr; 2443b2fbd54SBarry Smith 2453a40ed3dSBarry Smith PetscFunctionBegin; 2463a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 2473b2fbd54SBarry Smith if (symmetric) { 248606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 249606d414cSSatish Balay ierr = PetscFree(*ja);CHKERRQ(ierr); 250af5da2bfSBarry Smith } else if (oshift == 1) { 251435da068SBarry Smith int nz = a->i[n]-1; 2523b2fbd54SBarry Smith for (i=0; i<nz; i++) a->j[i]--; 2533b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]--; 2543b2fbd54SBarry Smith } 2553a40ed3dSBarry Smith PetscFunctionReturn(0); 2563b2fbd54SBarry Smith } 2573b2fbd54SBarry Smith 2584a2ae208SSatish Balay #undef __FUNCT__ 2594a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 2602d61bbb3SSatish Balay int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 2612d61bbb3SSatish Balay { 2622d61bbb3SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2632d61bbb3SSatish Balay 2642d61bbb3SSatish Balay PetscFunctionBegin; 2652d61bbb3SSatish Balay *bs = baij->bs; 2662d61bbb3SSatish Balay PetscFunctionReturn(0); 2672d61bbb3SSatish Balay } 2682d61bbb3SSatish Balay 2692d61bbb3SSatish Balay 2704a2ae208SSatish Balay #undef __FUNCT__ 2714a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ" 272e1311b90SBarry Smith int MatDestroy_SeqBAIJ(Mat A) 2732d61bbb3SSatish Balay { 2742d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 275e51c0b9cSSatish Balay int ierr; 2762d61bbb3SSatish Balay 277433994e6SBarry Smith PetscFunctionBegin; 278aa482453SBarry Smith #if defined(PETSC_USE_LOG) 279b0a32e0cSBarry Smith PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 2802d61bbb3SSatish Balay #endif 281606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 282606d414cSSatish Balay if (!a->singlemalloc) { 283606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 284606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 285606d414cSSatish Balay } 286c38d4ed2SBarry Smith if (a->row) { 287c38d4ed2SBarry Smith ierr = ISDestroy(a->row);CHKERRQ(ierr); 288c38d4ed2SBarry Smith } 289c38d4ed2SBarry Smith if (a->col) { 290c38d4ed2SBarry Smith ierr = ISDestroy(a->col);CHKERRQ(ierr); 291c38d4ed2SBarry Smith } 292606d414cSSatish Balay if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 293606d414cSSatish Balay if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 294606d414cSSatish Balay if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 295606d414cSSatish Balay if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 296606d414cSSatish Balay if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 297e51c0b9cSSatish Balay if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 298606d414cSSatish Balay if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 299563b5814SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 300563b5814SBarry Smith if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 301563b5814SBarry Smith #endif 302606d414cSSatish Balay ierr = PetscFree(a);CHKERRQ(ierr); 3032d61bbb3SSatish Balay PetscFunctionReturn(0); 3042d61bbb3SSatish Balay } 3052d61bbb3SSatish Balay 3064a2ae208SSatish Balay #undef __FUNCT__ 3074a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ" 3082d61bbb3SSatish Balay int MatSetOption_SeqBAIJ(Mat A,MatOption op) 3092d61bbb3SSatish Balay { 3102d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 3112d61bbb3SSatish Balay 3122d61bbb3SSatish Balay PetscFunctionBegin; 313aa275fccSKris Buschelman switch (op) { 314aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 315aa275fccSKris Buschelman a->roworiented = PETSC_TRUE; 316aa275fccSKris Buschelman break; 317aa275fccSKris Buschelman case MAT_COLUMN_ORIENTED: 318aa275fccSKris Buschelman a->roworiented = PETSC_FALSE; 319aa275fccSKris Buschelman break; 320aa275fccSKris Buschelman case MAT_COLUMNS_SORTED: 321aa275fccSKris Buschelman a->sorted = PETSC_TRUE; 322aa275fccSKris Buschelman break; 323aa275fccSKris Buschelman case MAT_COLUMNS_UNSORTED: 324aa275fccSKris Buschelman a->sorted = PETSC_FALSE; 325aa275fccSKris Buschelman break; 326aa275fccSKris Buschelman case MAT_KEEP_ZEROED_ROWS: 327aa275fccSKris Buschelman a->keepzeroedrows = PETSC_TRUE; 328aa275fccSKris Buschelman break; 329aa275fccSKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 330aa275fccSKris Buschelman a->nonew = 1; 331aa275fccSKris Buschelman break; 332aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 333aa275fccSKris Buschelman a->nonew = -1; 334aa275fccSKris Buschelman break; 335aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 336aa275fccSKris Buschelman a->nonew = -2; 337aa275fccSKris Buschelman break; 338aa275fccSKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 339aa275fccSKris Buschelman a->nonew = 0; 340aa275fccSKris Buschelman break; 341aa275fccSKris Buschelman case MAT_ROWS_SORTED: 342aa275fccSKris Buschelman case MAT_ROWS_UNSORTED: 343aa275fccSKris Buschelman case MAT_YES_NEW_DIAGONALS: 344aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 345aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 346b0a32e0cSBarry Smith PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 347aa275fccSKris Buschelman break; 348aa275fccSKris Buschelman case MAT_NO_NEW_DIAGONALS: 34929bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 350bd648c37SKris Buschelman case MAT_USE_SINGLE_PRECISION_SOLVES: 351bd648c37SKris Buschelman if (a->bs==4) { 35254138f6bSKris Buschelman a->single_precision_solves = PETSC_TRUE; 35354138f6bSKris Buschelman A->ops->solve = MatSolve_SeqBAIJ_Update; 35454138f6bSKris Buschelman A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 355cf242676SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 35694ee7fc8SKris Buschelman } else { 35794ee7fc8SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 3588661488fSKris Buschelman } 359bd648c37SKris Buschelman break; 360aa275fccSKris Buschelman default: 36129bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 3622d61bbb3SSatish Balay } 3632d61bbb3SSatish Balay PetscFunctionReturn(0); 3642d61bbb3SSatish Balay } 3652d61bbb3SSatish Balay 3664a2ae208SSatish Balay #undef __FUNCT__ 3674a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ" 36887828ca2SBarry Smith int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 3692d61bbb3SSatish Balay { 3702d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 37182502324SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 3723f1db9ecSBarry Smith MatScalar *aa,*aa_i; 37387828ca2SBarry Smith PetscScalar *v_i; 3742d61bbb3SSatish Balay 3752d61bbb3SSatish Balay PetscFunctionBegin; 3762d61bbb3SSatish Balay bs = a->bs; 3772d61bbb3SSatish Balay ai = a->i; 3782d61bbb3SSatish Balay aj = a->j; 3792d61bbb3SSatish Balay aa = a->a; 3802d61bbb3SSatish Balay bs2 = a->bs2; 3812d61bbb3SSatish Balay 382273d9f13SBarry Smith if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 3832d61bbb3SSatish Balay 3842d61bbb3SSatish Balay bn = row/bs; /* Block number */ 3852d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 3862d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 3872d61bbb3SSatish Balay *nz = bs*M; 3882d61bbb3SSatish Balay 3892d61bbb3SSatish Balay if (v) { 3902d61bbb3SSatish Balay *v = 0; 3912d61bbb3SSatish Balay if (*nz) { 39287828ca2SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 3932d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 3942d61bbb3SSatish Balay v_i = *v + i*bs; 3952d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 3962d61bbb3SSatish Balay for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 3972d61bbb3SSatish Balay } 3982d61bbb3SSatish Balay } 3992d61bbb3SSatish Balay } 4002d61bbb3SSatish Balay 4012d61bbb3SSatish Balay if (idx) { 4022d61bbb3SSatish Balay *idx = 0; 4032d61bbb3SSatish Balay if (*nz) { 404b0a32e0cSBarry Smith ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 4052d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 4062d61bbb3SSatish Balay idx_i = *idx + i*bs; 4072d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 4082d61bbb3SSatish Balay for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 4092d61bbb3SSatish Balay } 4102d61bbb3SSatish Balay } 4112d61bbb3SSatish Balay } 4122d61bbb3SSatish Balay PetscFunctionReturn(0); 4132d61bbb3SSatish Balay } 4142d61bbb3SSatish Balay 4154a2ae208SSatish Balay #undef __FUNCT__ 4164a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 41787828ca2SBarry Smith int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 4182d61bbb3SSatish Balay { 419606d414cSSatish Balay int ierr; 420606d414cSSatish Balay 4212d61bbb3SSatish Balay PetscFunctionBegin; 422606d414cSSatish Balay if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 423606d414cSSatish Balay if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 4242d61bbb3SSatish Balay PetscFunctionReturn(0); 4252d61bbb3SSatish Balay } 4262d61bbb3SSatish Balay 4274a2ae208SSatish Balay #undef __FUNCT__ 4284a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ" 4292d61bbb3SSatish Balay int MatTranspose_SeqBAIJ(Mat A,Mat *B) 4302d61bbb3SSatish Balay { 4312d61bbb3SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 4322d61bbb3SSatish Balay Mat C; 4332d61bbb3SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 434273d9f13SBarry Smith int *rows,*cols,bs2=a->bs2; 43587828ca2SBarry Smith PetscScalar *array; 4362d61bbb3SSatish Balay 4372d61bbb3SSatish Balay PetscFunctionBegin; 43829bbc08cSBarry Smith if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 439b0a32e0cSBarry Smith ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 440549d3d68SSatish Balay ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 4412d61bbb3SSatish Balay 442375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 44387828ca2SBarry Smith ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 44487828ca2SBarry Smith for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 445375fe846SBarry Smith #else 4463eda8832SBarry Smith array = a->a; 447375fe846SBarry Smith #endif 448375fe846SBarry Smith 4492d61bbb3SSatish Balay for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 450273d9f13SBarry Smith ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 451606d414cSSatish Balay ierr = PetscFree(col);CHKERRQ(ierr); 452b0a32e0cSBarry Smith ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 4532d61bbb3SSatish Balay cols = rows + bs; 4542d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 4552d61bbb3SSatish Balay cols[0] = i*bs; 4562d61bbb3SSatish Balay for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 4572d61bbb3SSatish Balay len = ai[i+1] - ai[i]; 4582d61bbb3SSatish Balay for (j=0; j<len; j++) { 4592d61bbb3SSatish Balay rows[0] = (*aj++)*bs; 4602d61bbb3SSatish Balay for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 4612d61bbb3SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 4622d61bbb3SSatish Balay array += bs2; 4632d61bbb3SSatish Balay } 4642d61bbb3SSatish Balay } 465606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 466375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 467375fe846SBarry Smith ierr = PetscFree(array); 468375fe846SBarry Smith #endif 4692d61bbb3SSatish Balay 4702d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4712d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4722d61bbb3SSatish Balay 473c4992f7dSBarry Smith if (B) { 4742d61bbb3SSatish Balay *B = C; 4752d61bbb3SSatish Balay } else { 476273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 4772d61bbb3SSatish Balay } 4782d61bbb3SSatish Balay PetscFunctionReturn(0); 4792d61bbb3SSatish Balay } 4802d61bbb3SSatish Balay 4814a2ae208SSatish Balay #undef __FUNCT__ 4824a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary" 483b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 4842593348eSBarry Smith { 485b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 4869df24120SSatish Balay int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 48787828ca2SBarry Smith PetscScalar *aa; 488ce6f0cecSBarry Smith FILE *file; 4892593348eSBarry Smith 4903a40ed3dSBarry Smith PetscFunctionBegin; 491b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 492b0a32e0cSBarry Smith ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 493552e946dSBarry Smith col_lens[0] = MAT_FILE_COOKIE; 4943b2fbd54SBarry Smith 495273d9f13SBarry Smith col_lens[1] = A->m; 496273d9f13SBarry Smith col_lens[2] = A->n; 4977e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 4982593348eSBarry Smith 4992593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 500b6490206SBarry Smith count = 0; 501b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 502b6490206SBarry Smith for (j=0; j<bs; j++) { 503b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 504b6490206SBarry Smith } 5052593348eSBarry Smith } 506273d9f13SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 507606d414cSSatish Balay ierr = PetscFree(col_lens);CHKERRQ(ierr); 5082593348eSBarry Smith 5092593348eSBarry Smith /* store column indices (zero start index) */ 510b0a32e0cSBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 511b6490206SBarry Smith count = 0; 512b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 513b6490206SBarry Smith for (j=0; j<bs; j++) { 514b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 515b6490206SBarry Smith for (l=0; l<bs; l++) { 516b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 5172593348eSBarry Smith } 5182593348eSBarry Smith } 519b6490206SBarry Smith } 520b6490206SBarry Smith } 5210752156aSBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 522606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 5232593348eSBarry Smith 5242593348eSBarry Smith /* store nonzero values */ 52587828ca2SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 526b6490206SBarry Smith count = 0; 527b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 528b6490206SBarry Smith for (j=0; j<bs; j++) { 529b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 530b6490206SBarry Smith for (l=0; l<bs; l++) { 5317e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 532b6490206SBarry Smith } 533b6490206SBarry Smith } 534b6490206SBarry Smith } 535b6490206SBarry Smith } 5360752156aSBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 537606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 538ce6f0cecSBarry Smith 539b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 540ce6f0cecSBarry Smith if (file) { 541ce6f0cecSBarry Smith fprintf(file,"-matload_block_size %d\n",a->bs); 542ce6f0cecSBarry Smith } 5433a40ed3dSBarry Smith PetscFunctionReturn(0); 5442593348eSBarry Smith } 5452593348eSBarry Smith 54604929863SHong Zhang extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 54704929863SHong Zhang 5484a2ae208SSatish Balay #undef __FUNCT__ 5494a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 550b0a32e0cSBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 5512593348eSBarry Smith { 552b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 553fb9695e5SSatish Balay int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 554f3ef73ceSBarry Smith PetscViewerFormat format; 5552593348eSBarry Smith 5563a40ed3dSBarry Smith PetscFunctionBegin; 557b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 558456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 559b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 560fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 561bcd9e38bSBarry Smith Mat aij; 562bcd9e38bSBarry Smith ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 563bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 564bcd9e38bSBarry Smith ierr = MatDestroy(aij);CHKERRQ(ierr); 56504929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 56604929863SHong Zhang #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 56704929863SHong Zhang ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 56804929863SHong Zhang #endif 56904929863SHong Zhang PetscFunctionReturn(0); 570fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 571b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 57244cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 57344cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 574b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 57544cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 57644cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 577aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 5780e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 57962b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 5800e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 5810e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 58262b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 5830e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 5840e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 58562b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 5860ef38995SBarry Smith } 58744cd7ae7SLois Curfman McInnes #else 5880ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 58962b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 5900ef38995SBarry Smith } 59144cd7ae7SLois Curfman McInnes #endif 59244cd7ae7SLois Curfman McInnes } 59344cd7ae7SLois Curfman McInnes } 594b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 59544cd7ae7SLois Curfman McInnes } 59644cd7ae7SLois Curfman McInnes } 597b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 5980ef38995SBarry Smith } else { 599b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 600b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 601b6490206SBarry Smith for (j=0; j<bs; j++) { 602b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 603b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 604b6490206SBarry Smith for (l=0; l<bs; l++) { 605aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 6060e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 60762b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 6080e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6090e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 61062b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 6110e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6120ef38995SBarry Smith } else { 61362b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 61488685aaeSLois Curfman McInnes } 61588685aaeSLois Curfman McInnes #else 61662b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 61788685aaeSLois Curfman McInnes #endif 6182593348eSBarry Smith } 6192593348eSBarry Smith } 620b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 6212593348eSBarry Smith } 6222593348eSBarry Smith } 623b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 624b6490206SBarry Smith } 625b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6263a40ed3dSBarry Smith PetscFunctionReturn(0); 6272593348eSBarry Smith } 6282593348eSBarry Smith 6294a2ae208SSatish Balay #undef __FUNCT__ 6304a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 631b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 6323270192aSSatish Balay { 63377ed5343SBarry Smith Mat A = (Mat) Aa; 6343270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 635b65db4caSBarry Smith int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 6360e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 6373f1db9ecSBarry Smith MatScalar *aa; 638b0a32e0cSBarry Smith PetscViewer viewer; 6393270192aSSatish Balay 6403a40ed3dSBarry Smith PetscFunctionBegin; 6413270192aSSatish Balay 642b65db4caSBarry Smith /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 64377ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 64477ed5343SBarry Smith 645b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 64677ed5343SBarry Smith 6473270192aSSatish Balay /* loop over matrix elements drawing boxes */ 648b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 6493270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6503270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 651273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6523270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6533270192aSSatish Balay aa = a->a + j*bs2; 6543270192aSSatish Balay for (k=0; k<bs; k++) { 6553270192aSSatish Balay for (l=0; l<bs; l++) { 6560e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 657b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 6583270192aSSatish Balay } 6593270192aSSatish Balay } 6603270192aSSatish Balay } 6613270192aSSatish Balay } 662b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 6633270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6643270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 665273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6663270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6673270192aSSatish Balay aa = a->a + j*bs2; 6683270192aSSatish Balay for (k=0; k<bs; k++) { 6693270192aSSatish Balay for (l=0; l<bs; l++) { 6700e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 671b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 6723270192aSSatish Balay } 6733270192aSSatish Balay } 6743270192aSSatish Balay } 6753270192aSSatish Balay } 6763270192aSSatish Balay 677b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 6783270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6793270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 680273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6813270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6823270192aSSatish Balay aa = a->a + j*bs2; 6833270192aSSatish Balay for (k=0; k<bs; k++) { 6843270192aSSatish Balay for (l=0; l<bs; l++) { 6850e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 686b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 6873270192aSSatish Balay } 6883270192aSSatish Balay } 6893270192aSSatish Balay } 6903270192aSSatish Balay } 69177ed5343SBarry Smith PetscFunctionReturn(0); 69277ed5343SBarry Smith } 6933270192aSSatish Balay 6944a2ae208SSatish Balay #undef __FUNCT__ 6954a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw" 696b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 69777ed5343SBarry Smith { 6987dce120fSSatish Balay int ierr; 6990e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 700b0a32e0cSBarry Smith PetscDraw draw; 70177ed5343SBarry Smith PetscTruth isnull; 7023270192aSSatish Balay 70377ed5343SBarry Smith PetscFunctionBegin; 70477ed5343SBarry Smith 705b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 706b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 70777ed5343SBarry Smith 70877ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 709273d9f13SBarry Smith xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 71077ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 711b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 712b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 71377ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 7143a40ed3dSBarry Smith PetscFunctionReturn(0); 7153270192aSSatish Balay } 7163270192aSSatish Balay 7174a2ae208SSatish Balay #undef __FUNCT__ 7184a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ" 719b0a32e0cSBarry Smith int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 7202593348eSBarry Smith { 72119bcc07fSBarry Smith int ierr; 7226831982aSBarry Smith PetscTruth issocket,isascii,isbinary,isdraw; 7232593348eSBarry Smith 7243a40ed3dSBarry Smith PetscFunctionBegin; 725b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 726b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 727fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 728fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 7290f5bd95cSBarry Smith if (issocket) { 73029bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 7310f5bd95cSBarry Smith } else if (isascii){ 7323a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 7330f5bd95cSBarry Smith } else if (isbinary) { 7343a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 7350f5bd95cSBarry Smith } else if (isdraw) { 7363a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 7375cd90555SBarry Smith } else { 73829bbc08cSBarry Smith SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 7392593348eSBarry Smith } 7403a40ed3dSBarry Smith PetscFunctionReturn(0); 7412593348eSBarry Smith } 742b6490206SBarry Smith 743cd0e1443SSatish Balay 7444a2ae208SSatish Balay #undef __FUNCT__ 7454a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ" 74687828ca2SBarry Smith int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 747cd0e1443SSatish Balay { 748cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 7492d61bbb3SSatish Balay int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 7502d61bbb3SSatish Balay int *ai = a->i,*ailen = a->ilen; 7512d61bbb3SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 7523f1db9ecSBarry Smith MatScalar *ap,*aa = a->a,zero = 0.0; 753cd0e1443SSatish Balay 7543a40ed3dSBarry Smith PetscFunctionBegin; 7552d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 756cd0e1443SSatish Balay row = im[k]; brow = row/bs; 75729bbc08cSBarry Smith if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 758273d9f13SBarry Smith if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 7592d61bbb3SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 7602c3acbe9SBarry Smith nrow = ailen[brow]; 7612d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 76229bbc08cSBarry Smith if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 763273d9f13SBarry Smith if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 7642d61bbb3SSatish Balay col = in[l] ; 7652d61bbb3SSatish Balay bcol = col/bs; 7662d61bbb3SSatish Balay cidx = col%bs; 7672d61bbb3SSatish Balay ridx = row%bs; 7682d61bbb3SSatish Balay high = nrow; 7692d61bbb3SSatish Balay low = 0; /* assume unsorted */ 7702d61bbb3SSatish Balay while (high-low > 5) { 771cd0e1443SSatish Balay t = (low+high)/2; 772cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 773cd0e1443SSatish Balay else low = t; 774cd0e1443SSatish Balay } 775cd0e1443SSatish Balay for (i=low; i<high; i++) { 776cd0e1443SSatish Balay if (rp[i] > bcol) break; 777cd0e1443SSatish Balay if (rp[i] == bcol) { 7782d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 7792d61bbb3SSatish Balay goto finished; 780cd0e1443SSatish Balay } 781cd0e1443SSatish Balay } 7822d61bbb3SSatish Balay *v++ = zero; 7832d61bbb3SSatish Balay finished:; 784cd0e1443SSatish Balay } 785cd0e1443SSatish Balay } 7863a40ed3dSBarry Smith PetscFunctionReturn(0); 787cd0e1443SSatish Balay } 788cd0e1443SSatish Balay 78995d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 7904a2ae208SSatish Balay #undef __FUNCT__ 7914a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 79287828ca2SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 79395d5f7c2SBarry Smith { 794563b5814SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 795563b5814SBarry Smith int ierr,i,N = m*n*b->bs2; 796563b5814SBarry Smith MatScalar *vsingle; 79795d5f7c2SBarry Smith 79895d5f7c2SBarry Smith PetscFunctionBegin; 799563b5814SBarry Smith if (N > b->setvalueslen) { 800563b5814SBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 801b0a32e0cSBarry Smith ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 802563b5814SBarry Smith b->setvalueslen = N; 803563b5814SBarry Smith } 804563b5814SBarry Smith vsingle = b->setvaluescopy; 80595d5f7c2SBarry Smith for (i=0; i<N; i++) { 80695d5f7c2SBarry Smith vsingle[i] = v[i]; 80795d5f7c2SBarry Smith } 80895d5f7c2SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 80995d5f7c2SBarry Smith PetscFunctionReturn(0); 81095d5f7c2SBarry Smith } 81195d5f7c2SBarry Smith #endif 81295d5f7c2SBarry Smith 8132d61bbb3SSatish Balay 8144a2ae208SSatish Balay #undef __FUNCT__ 8154a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 81695d5f7c2SBarry Smith int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 81792c4ed94SBarry Smith { 81892c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 8198a84c255SSatish Balay int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 820273d9f13SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 821549d3d68SSatish Balay int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 822273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 82395d5f7c2SBarry Smith MatScalar *value = v,*ap,*aa = a->a,*bap; 82492c4ed94SBarry Smith 8253a40ed3dSBarry Smith PetscFunctionBegin; 8260e324ae4SSatish Balay if (roworiented) { 8270e324ae4SSatish Balay stepval = (n-1)*bs; 8280e324ae4SSatish Balay } else { 8290e324ae4SSatish Balay stepval = (m-1)*bs; 8300e324ae4SSatish Balay } 83192c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 83292c4ed94SBarry Smith row = im[k]; 8335ef9f2a5SBarry Smith if (row < 0) continue; 834aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 83529bbc08cSBarry Smith if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 83692c4ed94SBarry Smith #endif 83792c4ed94SBarry Smith rp = aj + ai[row]; 83892c4ed94SBarry Smith ap = aa + bs2*ai[row]; 83992c4ed94SBarry Smith rmax = imax[row]; 84092c4ed94SBarry Smith nrow = ailen[row]; 84192c4ed94SBarry Smith low = 0; 84292c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 8435ef9f2a5SBarry Smith if (in[l] < 0) continue; 844aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 84529bbc08cSBarry Smith if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 84692c4ed94SBarry Smith #endif 84792c4ed94SBarry Smith col = in[l]; 84892c4ed94SBarry Smith if (roworiented) { 8490e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 8500e324ae4SSatish Balay } else { 8510e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 85292c4ed94SBarry Smith } 85392c4ed94SBarry Smith if (!sorted) low = 0; high = nrow; 85492c4ed94SBarry Smith while (high-low > 7) { 85592c4ed94SBarry Smith t = (low+high)/2; 85692c4ed94SBarry Smith if (rp[t] > col) high = t; 85792c4ed94SBarry Smith else low = t; 85892c4ed94SBarry Smith } 85992c4ed94SBarry Smith for (i=low; i<high; i++) { 86092c4ed94SBarry Smith if (rp[i] > col) break; 86192c4ed94SBarry Smith if (rp[i] == col) { 8628a84c255SSatish Balay bap = ap + bs2*i; 8630e324ae4SSatish Balay if (roworiented) { 8648a84c255SSatish Balay if (is == ADD_VALUES) { 865dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 866dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 8678a84c255SSatish Balay bap[jj] += *value++; 868dd9472c6SBarry Smith } 869dd9472c6SBarry Smith } 8700e324ae4SSatish Balay } else { 871dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 872dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 8730e324ae4SSatish Balay bap[jj] = *value++; 8748a84c255SSatish Balay } 875dd9472c6SBarry Smith } 876dd9472c6SBarry Smith } 8770e324ae4SSatish Balay } else { 8780e324ae4SSatish Balay if (is == ADD_VALUES) { 879dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 880dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 8810e324ae4SSatish Balay *bap++ += *value++; 882dd9472c6SBarry Smith } 883dd9472c6SBarry Smith } 8840e324ae4SSatish Balay } else { 885dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 886dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 8870e324ae4SSatish Balay *bap++ = *value++; 8880e324ae4SSatish Balay } 8898a84c255SSatish Balay } 890dd9472c6SBarry Smith } 891dd9472c6SBarry Smith } 892f1241b54SBarry Smith goto noinsert2; 89392c4ed94SBarry Smith } 89492c4ed94SBarry Smith } 89589280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 89629bbc08cSBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 89792c4ed94SBarry Smith if (nrow >= rmax) { 89892c4ed94SBarry Smith /* there is no extra room in row, therefore enlarge */ 89992c4ed94SBarry Smith int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 9003f1db9ecSBarry Smith MatScalar *new_a; 90192c4ed94SBarry Smith 90229bbc08cSBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 90389280ab3SLois Curfman McInnes 90492c4ed94SBarry Smith /* malloc new storage space */ 9053f1db9ecSBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 906b0a32e0cSBarry Smith ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 90792c4ed94SBarry Smith new_j = (int*)(new_a + bs2*new_nz); 90892c4ed94SBarry Smith new_i = new_j + new_nz; 90992c4ed94SBarry Smith 91092c4ed94SBarry Smith /* copy over old data into new slots */ 91192c4ed94SBarry Smith for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 91292c4ed94SBarry Smith for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 913549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 91492c4ed94SBarry Smith len = (new_nz - CHUNKSIZE - ai[row] - nrow); 915549d3d68SSatish Balay ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 916549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 917549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 918549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 91992c4ed94SBarry Smith /* free up old matrix storage */ 920606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 921606d414cSSatish Balay if (!a->singlemalloc) { 922606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 923606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 924606d414cSSatish Balay } 92592c4ed94SBarry Smith aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 926c4992f7dSBarry Smith a->singlemalloc = PETSC_TRUE; 92792c4ed94SBarry Smith 92892c4ed94SBarry Smith rp = aj + ai[row]; ap = aa + bs2*ai[row]; 92992c4ed94SBarry Smith rmax = imax[row] = imax[row] + CHUNKSIZE; 930b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 93192c4ed94SBarry Smith a->maxnz += bs2*CHUNKSIZE; 93292c4ed94SBarry Smith a->reallocs++; 93392c4ed94SBarry Smith a->nz++; 93492c4ed94SBarry Smith } 93592c4ed94SBarry Smith N = nrow++ - 1; 93692c4ed94SBarry Smith /* shift up all the later entries in this row */ 93792c4ed94SBarry Smith for (ii=N; ii>=i; ii--) { 93892c4ed94SBarry Smith rp[ii+1] = rp[ii]; 939549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 94092c4ed94SBarry Smith } 941549d3d68SSatish Balay if (N >= i) { 942549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 943549d3d68SSatish Balay } 94492c4ed94SBarry Smith rp[i] = col; 9458a84c255SSatish Balay bap = ap + bs2*i; 9460e324ae4SSatish Balay if (roworiented) { 947dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 948dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 9490e324ae4SSatish Balay bap[jj] = *value++; 950dd9472c6SBarry Smith } 951dd9472c6SBarry Smith } 9520e324ae4SSatish Balay } else { 953dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 954dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 9550e324ae4SSatish Balay *bap++ = *value++; 9560e324ae4SSatish Balay } 957dd9472c6SBarry Smith } 958dd9472c6SBarry Smith } 959f1241b54SBarry Smith noinsert2:; 96092c4ed94SBarry Smith low = i; 96192c4ed94SBarry Smith } 96292c4ed94SBarry Smith ailen[row] = nrow; 96392c4ed94SBarry Smith } 9643a40ed3dSBarry Smith PetscFunctionReturn(0); 96592c4ed94SBarry Smith } 96692c4ed94SBarry Smith 9674a2ae208SSatish Balay #undef __FUNCT__ 9684a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 9698f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 970584200bdSSatish Balay { 971584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 972584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 973273d9f13SBarry Smith int m = A->m,*ip,N,*ailen = a->ilen; 974549d3d68SSatish Balay int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 9753f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 976a56a16c8SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 977a56a16c8SHong Zhang PetscTruth flag; 978a56a16c8SHong Zhang #endif 979584200bdSSatish Balay 9803a40ed3dSBarry Smith PetscFunctionBegin; 9813a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 982584200bdSSatish Balay 98343ee02c3SBarry Smith if (m) rmax = ailen[0]; 984584200bdSSatish Balay for (i=1; i<mbs; i++) { 985584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 986584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 987d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 988584200bdSSatish Balay if (fshift) { 989a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 990584200bdSSatish Balay N = ailen[i]; 991584200bdSSatish Balay for (j=0; j<N; j++) { 992584200bdSSatish Balay ip[j-fshift] = ip[j]; 993549d3d68SSatish Balay ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 994584200bdSSatish Balay } 995584200bdSSatish Balay } 996584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 997584200bdSSatish Balay } 998584200bdSSatish Balay if (mbs) { 999584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1000584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1001584200bdSSatish Balay } 1002584200bdSSatish Balay /* reset ilen and imax for each row */ 1003584200bdSSatish Balay for (i=0; i<mbs; i++) { 1004584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 1005584200bdSSatish Balay } 1006a7c10996SSatish Balay a->nz = ai[mbs]; 1007584200bdSSatish Balay 1008584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 1009584200bdSSatish Balay if (fshift && a->diag) { 1010606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 1011b424e231SHong Zhang PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1012584200bdSSatish Balay a->diag = 0; 1013584200bdSSatish Balay } 1014bba1ac68SSatish Balay PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1015bba1ac68SSatish Balay PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1016b0a32e0cSBarry Smith PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1017e2f3b5e9SSatish Balay a->reallocs = 0; 10180e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 1019cf4441caSHong Zhang 1020a56a16c8SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 10212c535e4dSHong Zhang ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1022a56a16c8SHong Zhang if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1023a56a16c8SHong Zhang #endif 1024a56a16c8SHong Zhang 10253a40ed3dSBarry Smith PetscFunctionReturn(0); 1026584200bdSSatish Balay } 1027584200bdSSatish Balay 10285a838052SSatish Balay 1029bea157c4SSatish Balay 1030bea157c4SSatish Balay /* 1031bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 1032bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1033bea157c4SSatish Balay then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1034bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 1035bea157c4SSatish Balay */ 10364a2ae208SSatish Balay #undef __FUNCT__ 10374a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1038bea157c4SSatish Balay static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1039d9b7c43dSSatish Balay { 1040bea157c4SSatish Balay int i,j,k,row; 1041bea157c4SSatish Balay PetscTruth flg; 10423a40ed3dSBarry Smith 1043433994e6SBarry Smith PetscFunctionBegin; 1044bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 1045bea157c4SSatish Balay row = idx[i]; 1046bea157c4SSatish Balay if (row%bs!=0) { /* Not the begining of a block */ 1047bea157c4SSatish Balay sizes[j] = 1; 1048bea157c4SSatish Balay i++; 1049e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1050bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1051bea157c4SSatish Balay i++; 1052bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 1053bea157c4SSatish Balay flg = PETSC_TRUE; 1054bea157c4SSatish Balay for (k=1; k<bs; k++) { 1055bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 1056bea157c4SSatish Balay flg = PETSC_FALSE; 1057bea157c4SSatish Balay break; 1058d9b7c43dSSatish Balay } 1059bea157c4SSatish Balay } 1060bea157c4SSatish Balay if (flg == PETSC_TRUE) { /* No break in the bs */ 1061bea157c4SSatish Balay sizes[j] = bs; 1062bea157c4SSatish Balay i+= bs; 1063bea157c4SSatish Balay } else { 1064bea157c4SSatish Balay sizes[j] = 1; 1065bea157c4SSatish Balay i++; 1066bea157c4SSatish Balay } 1067bea157c4SSatish Balay } 1068bea157c4SSatish Balay } 1069bea157c4SSatish Balay *bs_max = j; 10703a40ed3dSBarry Smith PetscFunctionReturn(0); 1071d9b7c43dSSatish Balay } 1072d9b7c43dSSatish Balay 10734a2ae208SSatish Balay #undef __FUNCT__ 10744a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ" 107587828ca2SBarry Smith int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1076d9b7c43dSSatish Balay { 1077d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1078b6815fffSBarry Smith int ierr,i,j,k,count,is_n,*is_idx,*rows; 1079bea157c4SSatish Balay int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 108087828ca2SBarry Smith PetscScalar zero = 0.0; 10813f1db9ecSBarry Smith MatScalar *aa; 1082d9b7c43dSSatish Balay 10833a40ed3dSBarry Smith PetscFunctionBegin; 1084d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1085b9b97703SBarry Smith ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1086d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1087d9b7c43dSSatish Balay 1088bea157c4SSatish Balay /* allocate memory for rows,sizes */ 1089b0a32e0cSBarry Smith ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1090bea157c4SSatish Balay sizes = rows + is_n; 1091bea157c4SSatish Balay 1092563b5814SBarry Smith /* copy IS values to rows, and sort them */ 1093bea157c4SSatish Balay for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1094bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1095dffd3267SBarry Smith if (baij->keepzeroedrows) { 1096dffd3267SBarry Smith for (i=0; i<is_n; i++) { sizes[i] = 1; } 1097dffd3267SBarry Smith bs_max = is_n; 1098dffd3267SBarry Smith } else { 1099bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1100dffd3267SBarry Smith } 1101b97a56abSSatish Balay ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1102bea157c4SSatish Balay 1103bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1104bea157c4SSatish Balay row = rows[j]; 1105273d9f13SBarry Smith if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1106bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1107bea157c4SSatish Balay aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1108dffd3267SBarry Smith if (sizes[i] == bs && !baij->keepzeroedrows) { 1109bea157c4SSatish Balay if (diag) { 1110bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 1111bea157c4SSatish Balay baij->ilen[row/bs] = 1; 1112bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 1113bea157c4SSatish Balay ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1114a07cd24cSSatish Balay } 1115563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 1116bea157c4SSatish Balay for (k=0; k<bs; k++) { 1117bea157c4SSatish Balay ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1118bea157c4SSatish Balay } 1119bea157c4SSatish Balay } else { /* (!diag) */ 1120bea157c4SSatish Balay baij->ilen[row/bs] = 0; 1121bea157c4SSatish Balay } /* end (!diag) */ 1122bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 1123aa482453SBarry Smith #if defined (PETSC_USE_DEBUG) 112429bbc08cSBarry Smith if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1125bea157c4SSatish Balay #endif 1126bea157c4SSatish Balay for (k=0; k<count; k++) { 1127d9b7c43dSSatish Balay aa[0] = zero; 1128d9b7c43dSSatish Balay aa += bs; 1129d9b7c43dSSatish Balay } 1130d9b7c43dSSatish Balay if (diag) { 1131f830108cSBarry Smith ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1132d9b7c43dSSatish Balay } 1133d9b7c43dSSatish Balay } 1134bea157c4SSatish Balay } 1135bea157c4SSatish Balay 1136606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 11379a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11383a40ed3dSBarry Smith PetscFunctionReturn(0); 1139d9b7c43dSSatish Balay } 11401c351548SSatish Balay 11414a2ae208SSatish Balay #undef __FUNCT__ 11424a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ" 114387828ca2SBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 11442d61bbb3SSatish Balay { 11452d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11462d61bbb3SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1147273d9f13SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 11482d61bbb3SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1149549d3d68SSatish Balay int ridx,cidx,bs2=a->bs2,ierr; 1150273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 11513f1db9ecSBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 11522d61bbb3SSatish Balay 11532d61bbb3SSatish Balay PetscFunctionBegin; 11542d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 11552d61bbb3SSatish Balay row = im[k]; brow = row/bs; 11565ef9f2a5SBarry Smith if (row < 0) continue; 1157aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1158273d9f13SBarry Smith if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 11592d61bbb3SSatish Balay #endif 11602d61bbb3SSatish Balay rp = aj + ai[brow]; 11612d61bbb3SSatish Balay ap = aa + bs2*ai[brow]; 11622d61bbb3SSatish Balay rmax = imax[brow]; 11632d61bbb3SSatish Balay nrow = ailen[brow]; 11642d61bbb3SSatish Balay low = 0; 11652d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 11665ef9f2a5SBarry Smith if (in[l] < 0) continue; 1167aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1168273d9f13SBarry Smith if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 11692d61bbb3SSatish Balay #endif 11702d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 11712d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 11722d61bbb3SSatish Balay if (roworiented) { 11735ef9f2a5SBarry Smith value = v[l + k*n]; 11742d61bbb3SSatish Balay } else { 11752d61bbb3SSatish Balay value = v[k + l*m]; 11762d61bbb3SSatish Balay } 11772d61bbb3SSatish Balay if (!sorted) low = 0; high = nrow; 11782d61bbb3SSatish Balay while (high-low > 7) { 11792d61bbb3SSatish Balay t = (low+high)/2; 11802d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 11812d61bbb3SSatish Balay else low = t; 11822d61bbb3SSatish Balay } 11832d61bbb3SSatish Balay for (i=low; i<high; i++) { 11842d61bbb3SSatish Balay if (rp[i] > bcol) break; 11852d61bbb3SSatish Balay if (rp[i] == bcol) { 11862d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 11872d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 11882d61bbb3SSatish Balay else *bap = value; 11892d61bbb3SSatish Balay goto noinsert1; 11902d61bbb3SSatish Balay } 11912d61bbb3SSatish Balay } 11922d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 119329bbc08cSBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 11942d61bbb3SSatish Balay if (nrow >= rmax) { 11952d61bbb3SSatish Balay /* there is no extra room in row, therefore enlarge */ 11962d61bbb3SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 11973f1db9ecSBarry Smith MatScalar *new_a; 11982d61bbb3SSatish Balay 119929bbc08cSBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 12002d61bbb3SSatish Balay 12012d61bbb3SSatish Balay /* Malloc new storage space */ 12023f1db9ecSBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1203b0a32e0cSBarry Smith ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 12042d61bbb3SSatish Balay new_j = (int*)(new_a + bs2*new_nz); 12052d61bbb3SSatish Balay new_i = new_j + new_nz; 12062d61bbb3SSatish Balay 12072d61bbb3SSatish Balay /* copy over old data into new slots */ 12082d61bbb3SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 12092d61bbb3SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1210549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 12112d61bbb3SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1212549d3d68SSatish Balay ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1213549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1214549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1215549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 12162d61bbb3SSatish Balay /* free up old matrix storage */ 1217606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 1218606d414cSSatish Balay if (!a->singlemalloc) { 1219606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 1220606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 1221606d414cSSatish Balay } 12222d61bbb3SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1223c4992f7dSBarry Smith a->singlemalloc = PETSC_TRUE; 12242d61bbb3SSatish Balay 12252d61bbb3SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 12262d61bbb3SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1227b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 12282d61bbb3SSatish Balay a->maxnz += bs2*CHUNKSIZE; 12292d61bbb3SSatish Balay a->reallocs++; 12302d61bbb3SSatish Balay a->nz++; 12312d61bbb3SSatish Balay } 12322d61bbb3SSatish Balay N = nrow++ - 1; 12332d61bbb3SSatish Balay /* shift up all the later entries in this row */ 12342d61bbb3SSatish Balay for (ii=N; ii>=i; ii--) { 12352d61bbb3SSatish Balay rp[ii+1] = rp[ii]; 1236549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 12372d61bbb3SSatish Balay } 1238549d3d68SSatish Balay if (N>=i) { 1239549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1240549d3d68SSatish Balay } 12412d61bbb3SSatish Balay rp[i] = bcol; 12422d61bbb3SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 12432d61bbb3SSatish Balay noinsert1:; 12442d61bbb3SSatish Balay low = i; 12452d61bbb3SSatish Balay } 12462d61bbb3SSatish Balay ailen[brow] = nrow; 12472d61bbb3SSatish Balay } 12482d61bbb3SSatish Balay PetscFunctionReturn(0); 12492d61bbb3SSatish Balay } 12502d61bbb3SSatish Balay 12512d61bbb3SSatish Balay 12524a2ae208SSatish Balay #undef __FUNCT__ 12534a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ" 12545ef9f2a5SBarry Smith int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 12552d61bbb3SSatish Balay { 12562d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 12572d61bbb3SSatish Balay Mat outA; 12582d61bbb3SSatish Balay int ierr; 1259667159a5SBarry Smith PetscTruth row_identity,col_identity; 12602d61bbb3SSatish Balay 12612d61bbb3SSatish Balay PetscFunctionBegin; 1262d3d32019SBarry Smith if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1263667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1264667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1265667159a5SBarry Smith if (!row_identity || !col_identity) { 126629bbc08cSBarry Smith SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1267667159a5SBarry Smith } 12682d61bbb3SSatish Balay 12692d61bbb3SSatish Balay outA = inA; 12702d61bbb3SSatish Balay inA->factor = FACTOR_LU; 12712d61bbb3SSatish Balay 12722d61bbb3SSatish Balay if (!a->diag) { 1273c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 12742d61bbb3SSatish Balay } 1275cf242676SKris Buschelman 1276c38d4ed2SBarry Smith a->row = row; 1277c38d4ed2SBarry Smith a->col = col; 1278c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1279c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1280c38d4ed2SBarry Smith 1281c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 12824c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1283b0a32e0cSBarry Smith PetscLogObjectParent(inA,a->icol); 1284c38d4ed2SBarry Smith 1285cf242676SKris Buschelman /* 1286cf242676SKris Buschelman Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1287cf242676SKris Buschelman for ILU(0) factorization with natural ordering 1288cf242676SKris Buschelman */ 1289cf242676SKris Buschelman if (a->bs < 8) { 1290cf242676SKris Buschelman ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1291cf242676SKris Buschelman } else { 1292c38d4ed2SBarry Smith if (!a->solve_work) { 129387828ca2SBarry Smith ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 129487828ca2SBarry Smith PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1295c38d4ed2SBarry Smith } 12962d61bbb3SSatish Balay } 12972d61bbb3SSatish Balay 1298667159a5SBarry Smith ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1299667159a5SBarry Smith 13002d61bbb3SSatish Balay PetscFunctionReturn(0); 13012d61bbb3SSatish Balay } 13024a2ae208SSatish Balay #undef __FUNCT__ 13034a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1304ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 1305ba4ca20aSSatish Balay { 1306c38d4ed2SBarry Smith static PetscTruth called = PETSC_FALSE; 1307ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 1308d132466eSBarry Smith int ierr; 1309ba4ca20aSSatish Balay 13103a40ed3dSBarry Smith PetscFunctionBegin; 1311c38d4ed2SBarry Smith if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1312d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1313d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 13143a40ed3dSBarry Smith PetscFunctionReturn(0); 1315ba4ca20aSSatish Balay } 1316d9b7c43dSSatish Balay 1317fb2e594dSBarry Smith EXTERN_C_BEGIN 13184a2ae208SSatish Balay #undef __FUNCT__ 13194a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 132027a8da17SBarry Smith int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 132127a8da17SBarry Smith { 132227a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 132314db4f74SSatish Balay int i,nz,nbs; 132427a8da17SBarry Smith 132527a8da17SBarry Smith PetscFunctionBegin; 132614db4f74SSatish Balay nz = baij->maxnz/baij->bs2; 132714db4f74SSatish Balay nbs = baij->nbs; 132827a8da17SBarry Smith for (i=0; i<nz; i++) { 132927a8da17SBarry Smith baij->j[i] = indices[i]; 133027a8da17SBarry Smith } 133127a8da17SBarry Smith baij->nz = nz; 133214db4f74SSatish Balay for (i=0; i<nbs; i++) { 133327a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 133427a8da17SBarry Smith } 133527a8da17SBarry Smith 133627a8da17SBarry Smith PetscFunctionReturn(0); 133727a8da17SBarry Smith } 1338fb2e594dSBarry Smith EXTERN_C_END 133927a8da17SBarry Smith 13404a2ae208SSatish Balay #undef __FUNCT__ 13414a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 134227a8da17SBarry Smith /*@ 134327a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 134427a8da17SBarry Smith in the matrix. 134527a8da17SBarry Smith 134627a8da17SBarry Smith Input Parameters: 134727a8da17SBarry Smith + mat - the SeqBAIJ matrix 134827a8da17SBarry Smith - indices - the column indices 134927a8da17SBarry Smith 135015091d37SBarry Smith Level: advanced 135115091d37SBarry Smith 135227a8da17SBarry Smith Notes: 135327a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 135427a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 135527a8da17SBarry Smith of the MatSetValues() operation. 135627a8da17SBarry Smith 135727a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 135827a8da17SBarry Smith MatCreateSeqBAIJ(). 135927a8da17SBarry Smith 136027a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 136127a8da17SBarry Smith 136227a8da17SBarry Smith @*/ 136327a8da17SBarry Smith int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 136427a8da17SBarry Smith { 136527a8da17SBarry Smith int ierr,(*f)(Mat,int *); 136627a8da17SBarry Smith 136727a8da17SBarry Smith PetscFunctionBegin; 136827a8da17SBarry Smith PetscValidHeaderSpecific(mat,MAT_COOKIE); 1369c134de8dSSatish Balay ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 137027a8da17SBarry Smith if (f) { 137127a8da17SBarry Smith ierr = (*f)(mat,indices);CHKERRQ(ierr); 137227a8da17SBarry Smith } else { 137329bbc08cSBarry Smith SETERRQ(1,"Wrong type of matrix to set column indices"); 137427a8da17SBarry Smith } 137527a8da17SBarry Smith PetscFunctionReturn(0); 137627a8da17SBarry Smith } 137727a8da17SBarry Smith 13784a2ae208SSatish Balay #undef __FUNCT__ 13794a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1380273d9f13SBarry Smith int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1381273d9f13SBarry Smith { 1382273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1383273d9f13SBarry Smith int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1384273d9f13SBarry Smith PetscReal atmp; 138587828ca2SBarry Smith PetscScalar *x,zero = 0.0; 1386273d9f13SBarry Smith MatScalar *aa; 1387273d9f13SBarry Smith int ncols,brow,krow,kcol; 1388273d9f13SBarry Smith 1389273d9f13SBarry Smith PetscFunctionBegin; 1390273d9f13SBarry Smith if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1391273d9f13SBarry Smith bs = a->bs; 1392273d9f13SBarry Smith aa = a->a; 1393273d9f13SBarry Smith ai = a->i; 1394273d9f13SBarry Smith aj = a->j; 1395273d9f13SBarry Smith mbs = a->mbs; 1396273d9f13SBarry Smith 1397273d9f13SBarry Smith ierr = VecSet(&zero,v);CHKERRQ(ierr); 1398273d9f13SBarry Smith ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1399273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1400273d9f13SBarry Smith if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1401273d9f13SBarry Smith for (i=0; i<mbs; i++) { 1402273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 1403273d9f13SBarry Smith brow = bs*i; 1404273d9f13SBarry Smith for (j=0; j<ncols; j++){ 1405273d9f13SBarry Smith /* bcol = bs*(*aj); */ 1406273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++){ 1407273d9f13SBarry Smith for (krow=0; krow<bs; krow++){ 1408273d9f13SBarry Smith atmp = PetscAbsScalar(*aa); aa++; 1409273d9f13SBarry Smith row = brow + krow; /* row index */ 1410273d9f13SBarry Smith /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1411273d9f13SBarry Smith if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1412273d9f13SBarry Smith } 1413273d9f13SBarry Smith } 1414273d9f13SBarry Smith aj++; 1415273d9f13SBarry Smith } 1416273d9f13SBarry Smith } 1417273d9f13SBarry Smith ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1418273d9f13SBarry Smith PetscFunctionReturn(0); 1419273d9f13SBarry Smith } 1420273d9f13SBarry Smith 14214a2ae208SSatish Balay #undef __FUNCT__ 14224a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1423273d9f13SBarry Smith int MatSetUpPreallocation_SeqBAIJ(Mat A) 1424273d9f13SBarry Smith { 1425273d9f13SBarry Smith int ierr; 1426273d9f13SBarry Smith 1427273d9f13SBarry Smith PetscFunctionBegin; 1428273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1429273d9f13SBarry Smith PetscFunctionReturn(0); 1430273d9f13SBarry Smith } 1431273d9f13SBarry Smith 14324a2ae208SSatish Balay #undef __FUNCT__ 14334a2ae208SSatish Balay #define __FUNCT__ "MatGetArray_SeqBAIJ" 143487828ca2SBarry Smith int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1435f2a5309cSSatish Balay { 1436f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1437f2a5309cSSatish Balay PetscFunctionBegin; 1438f2a5309cSSatish Balay *array = a->a; 1439f2a5309cSSatish Balay PetscFunctionReturn(0); 1440f2a5309cSSatish Balay } 1441f2a5309cSSatish Balay 14424a2ae208SSatish Balay #undef __FUNCT__ 14434a2ae208SSatish Balay #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 144487828ca2SBarry Smith int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1445f2a5309cSSatish Balay { 1446f2a5309cSSatish Balay PetscFunctionBegin; 1447f2a5309cSSatish Balay PetscFunctionReturn(0); 1448f2a5309cSSatish Balay } 1449f2a5309cSSatish Balay 145042ee4b1aSHong Zhang #include "petscblaslapack.h" 145142ee4b1aSHong Zhang #undef __FUNCT__ 145242ee4b1aSHong Zhang #define __FUNCT__ "MatAXPY_SeqBAIJ" 145342ee4b1aSHong Zhang int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 145442ee4b1aSHong Zhang { 145542ee4b1aSHong Zhang int ierr,one=1; 145642ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 145742ee4b1aSHong Zhang 145842ee4b1aSHong Zhang PetscFunctionBegin; 145942ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 146042ee4b1aSHong Zhang BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 146142ee4b1aSHong Zhang } else { 146242ee4b1aSHong Zhang ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 146342ee4b1aSHong Zhang } 146442ee4b1aSHong Zhang PetscFunctionReturn(0); 146542ee4b1aSHong Zhang } 146642ee4b1aSHong Zhang 14672593348eSBarry Smith /* -------------------------------------------------------------------*/ 1468cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1469cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 1470cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 1471cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 1472cc2dc46cSBarry Smith MatMultAdd_SeqBAIJ_N, 14737c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 14747c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 1475cc2dc46cSBarry Smith MatSolve_SeqBAIJ_N, 1476cc2dc46cSBarry Smith 0, 1477cc2dc46cSBarry Smith 0, 1478cc2dc46cSBarry Smith 0, 1479cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 1480cc2dc46cSBarry Smith 0, 1481b6490206SBarry Smith 0, 1482f2501298SSatish Balay MatTranspose_SeqBAIJ, 1483cc2dc46cSBarry Smith MatGetInfo_SeqBAIJ, 1484cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 1485cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 1486cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 1487cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 1488b6490206SBarry Smith 0, 1489cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 1490cc2dc46cSBarry Smith 0, 1491cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 1492cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 1493cc2dc46cSBarry Smith MatZeroRows_SeqBAIJ, 1494cc2dc46cSBarry Smith MatLUFactorSymbolic_SeqBAIJ, 1495cc2dc46cSBarry Smith MatLUFactorNumeric_SeqBAIJ_N, 1496cc2dc46cSBarry Smith 0, 1497cc2dc46cSBarry Smith 0, 1498273d9f13SBarry Smith MatSetUpPreallocation_SeqBAIJ, 1499cc2dc46cSBarry Smith MatILUFactorSymbolic_SeqBAIJ, 1500cc2dc46cSBarry Smith 0, 1501f2a5309cSSatish Balay MatGetArray_SeqBAIJ, 1502f2a5309cSSatish Balay MatRestoreArray_SeqBAIJ, 15032e8a6d31SBarry Smith MatDuplicate_SeqBAIJ, 1504cc2dc46cSBarry Smith 0, 1505cc2dc46cSBarry Smith 0, 1506cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 1507cc2dc46cSBarry Smith 0, 150842ee4b1aSHong Zhang MatAXPY_SeqBAIJ, 1509cc2dc46cSBarry Smith MatGetSubMatrices_SeqBAIJ, 1510cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 1511cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 1512cc2dc46cSBarry Smith 0, 1513cc2dc46cSBarry Smith MatPrintHelp_SeqBAIJ, 1514cc2dc46cSBarry Smith MatScale_SeqBAIJ, 1515cc2dc46cSBarry Smith 0, 1516cc2dc46cSBarry Smith 0, 1517cc2dc46cSBarry Smith 0, 1518cc2dc46cSBarry Smith MatGetBlockSize_SeqBAIJ, 15193b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 152092c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 1521cc2dc46cSBarry Smith 0, 1522cc2dc46cSBarry Smith 0, 1523cc2dc46cSBarry Smith 0, 1524cc2dc46cSBarry Smith 0, 1525cc2dc46cSBarry Smith 0, 1526cc2dc46cSBarry Smith 0, 1527d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 1528cc2dc46cSBarry Smith MatGetSubMatrix_SeqBAIJ, 1529b9b97703SBarry Smith MatDestroy_SeqBAIJ, 1530b9b97703SBarry Smith MatView_SeqBAIJ, 15313a64cc32SBarry Smith MatGetPetscMaps_Petsc, 1532273d9f13SBarry Smith 0, 1533273d9f13SBarry Smith 0, 1534273d9f13SBarry Smith 0, 1535273d9f13SBarry Smith 0, 1536273d9f13SBarry Smith 0, 1537273d9f13SBarry Smith 0, 1538273d9f13SBarry Smith MatGetRowMax_SeqBAIJ, 1539273d9f13SBarry Smith MatConvert_Basic}; 15402593348eSBarry Smith 15413e90b805SBarry Smith EXTERN_C_BEGIN 15424a2ae208SSatish Balay #undef __FUNCT__ 15434a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ" 15443e90b805SBarry Smith int MatStoreValues_SeqBAIJ(Mat mat) 15453e90b805SBarry Smith { 15463e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1547273d9f13SBarry Smith int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1548d132466eSBarry Smith int ierr; 15493e90b805SBarry Smith 15503e90b805SBarry Smith PetscFunctionBegin; 15513e90b805SBarry Smith if (aij->nonew != 1) { 155229bbc08cSBarry Smith SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 15533e90b805SBarry Smith } 15543e90b805SBarry Smith 15553e90b805SBarry Smith /* allocate space for values if not already there */ 15563e90b805SBarry Smith if (!aij->saved_values) { 155787828ca2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 15583e90b805SBarry Smith } 15593e90b805SBarry Smith 15603e90b805SBarry Smith /* copy values over */ 156187828ca2SBarry Smith ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 15623e90b805SBarry Smith PetscFunctionReturn(0); 15633e90b805SBarry Smith } 15643e90b805SBarry Smith EXTERN_C_END 15653e90b805SBarry Smith 15663e90b805SBarry Smith EXTERN_C_BEGIN 15674a2ae208SSatish Balay #undef __FUNCT__ 15684a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 156932e87ba3SBarry Smith int MatRetrieveValues_SeqBAIJ(Mat mat) 15703e90b805SBarry Smith { 15713e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1572273d9f13SBarry Smith int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 15733e90b805SBarry Smith 15743e90b805SBarry Smith PetscFunctionBegin; 15753e90b805SBarry Smith if (aij->nonew != 1) { 157629bbc08cSBarry Smith SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 15773e90b805SBarry Smith } 15783e90b805SBarry Smith if (!aij->saved_values) { 157929bbc08cSBarry Smith SETERRQ(1,"Must call MatStoreValues(A);first"); 15803e90b805SBarry Smith } 15813e90b805SBarry Smith 15823e90b805SBarry Smith /* copy values over */ 158387828ca2SBarry Smith ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 15843e90b805SBarry Smith PetscFunctionReturn(0); 15853e90b805SBarry Smith } 15863e90b805SBarry Smith EXTERN_C_END 15873e90b805SBarry Smith 1588273d9f13SBarry Smith EXTERN_C_BEGIN 1589273d9f13SBarry Smith extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1590273d9f13SBarry Smith EXTERN_C_END 1591273d9f13SBarry Smith 1592273d9f13SBarry Smith EXTERN_C_BEGIN 15934a2ae208SSatish Balay #undef __FUNCT__ 15944a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ" 1595273d9f13SBarry Smith int MatCreate_SeqBAIJ(Mat B) 15962593348eSBarry Smith { 1597273d9f13SBarry Smith int ierr,size; 1598b6490206SBarry Smith Mat_SeqBAIJ *b; 15993b2fbd54SBarry Smith 16003a40ed3dSBarry Smith PetscFunctionBegin; 1601273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 160229bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1603b6490206SBarry Smith 1604273d9f13SBarry Smith B->m = B->M = PetscMax(B->m,B->M); 1605273d9f13SBarry Smith B->n = B->N = PetscMax(B->n,B->N); 1606b0a32e0cSBarry Smith ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1607b0a32e0cSBarry Smith B->data = (void*)b; 1608549d3d68SSatish Balay ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1609549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 16102593348eSBarry Smith B->factor = 0; 16112593348eSBarry Smith B->lupivotthreshold = 1.0; 161290f02eecSBarry Smith B->mapping = 0; 16132593348eSBarry Smith b->row = 0; 16142593348eSBarry Smith b->col = 0; 1615e51c0b9cSSatish Balay b->icol = 0; 16162593348eSBarry Smith b->reallocs = 0; 16173e90b805SBarry Smith b->saved_values = 0; 1618cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 1619563b5814SBarry Smith b->setvalueslen = 0; 1620563b5814SBarry Smith b->setvaluescopy = PETSC_NULL; 1621563b5814SBarry Smith #endif 16228661488fSKris Buschelman b->single_precision_solves = PETSC_FALSE; 16232593348eSBarry Smith 16243a64cc32SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 16253a64cc32SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1626a5ae1ecdSBarry Smith 1627c4992f7dSBarry Smith b->sorted = PETSC_FALSE; 1628c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 16292593348eSBarry Smith b->nonew = 0; 16302593348eSBarry Smith b->diag = 0; 16312593348eSBarry Smith b->solve_work = 0; 1632de6a44a3SBarry Smith b->mult_work = 0; 16332a1b7f2aSHong Zhang B->spptr = 0; 16340e6d2581SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 1635883fce79SBarry Smith b->keepzeroedrows = PETSC_FALSE; 16364e220ebcSLois Curfman McInnes 1637f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 16383e90b805SBarry Smith "MatStoreValues_SeqBAIJ", 1639bc4b532fSSatish Balay MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1640f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 16413e90b805SBarry Smith "MatRetrieveValues_SeqBAIJ", 1642bc4b532fSSatish Balay MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1643f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 164427a8da17SBarry Smith "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1645bc4b532fSSatish Balay MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1646*a6175056SHong Zhang ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C", 1647273d9f13SBarry Smith "MatConvert_SeqBAIJ_SeqAIJ", 1648273d9f13SBarry Smith MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 16493a40ed3dSBarry Smith PetscFunctionReturn(0); 16502593348eSBarry Smith } 1651273d9f13SBarry Smith EXTERN_C_END 16522593348eSBarry Smith 16534a2ae208SSatish Balay #undef __FUNCT__ 16544a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_SeqBAIJ" 16552e8a6d31SBarry Smith int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 16562593348eSBarry Smith { 16572593348eSBarry Smith Mat C; 1658b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 165927a8da17SBarry Smith int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1660de6a44a3SBarry Smith 16613a40ed3dSBarry Smith PetscFunctionBegin; 166229bbc08cSBarry Smith if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 16632593348eSBarry Smith 16642593348eSBarry Smith *B = 0; 1665273d9f13SBarry Smith ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1666273d9f13SBarry Smith ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1667273d9f13SBarry Smith c = (Mat_SeqBAIJ*)C->data; 166844cd7ae7SLois Curfman McInnes 1669b6490206SBarry Smith c->bs = a->bs; 16709df24120SSatish Balay c->bs2 = a->bs2; 1671b6490206SBarry Smith c->mbs = a->mbs; 1672b6490206SBarry Smith c->nbs = a->nbs; 167335d8aa7fSBarry Smith ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 16742593348eSBarry Smith 1675b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1676b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1677b6490206SBarry Smith for (i=0; i<mbs; i++) { 16782593348eSBarry Smith c->imax[i] = a->imax[i]; 16792593348eSBarry Smith c->ilen[i] = a->ilen[i]; 16802593348eSBarry Smith } 16812593348eSBarry Smith 16822593348eSBarry Smith /* allocate the matrix space */ 1683c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 16843f1db9ecSBarry Smith len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1685b0a32e0cSBarry Smith ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 16867e67e3f9SSatish Balay c->j = (int*)(c->a + nz*bs2); 1687de6a44a3SBarry Smith c->i = c->j + nz; 1688549d3d68SSatish Balay ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1689b6490206SBarry Smith if (mbs > 0) { 1690549d3d68SSatish Balay ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 16912e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 1692549d3d68SSatish Balay ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 16932e8a6d31SBarry Smith } else { 1694549d3d68SSatish Balay ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 16952593348eSBarry Smith } 16962593348eSBarry Smith } 16972593348eSBarry Smith 1698b0a32e0cSBarry Smith PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 16992593348eSBarry Smith c->sorted = a->sorted; 17002593348eSBarry Smith c->roworiented = a->roworiented; 17012593348eSBarry Smith c->nonew = a->nonew; 17022593348eSBarry Smith 17032593348eSBarry Smith if (a->diag) { 1704b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1705b0a32e0cSBarry Smith PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1706b6490206SBarry Smith for (i=0; i<mbs; i++) { 17072593348eSBarry Smith c->diag[i] = a->diag[i]; 17082593348eSBarry Smith } 170998305bb5SBarry Smith } else c->diag = 0; 17102593348eSBarry Smith c->nz = a->nz; 17112593348eSBarry Smith c->maxnz = a->maxnz; 17122593348eSBarry Smith c->solve_work = 0; 17132a1b7f2aSHong Zhang C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 17147fc0212eSBarry Smith c->mult_work = 0; 1715273d9f13SBarry Smith C->preallocated = PETSC_TRUE; 1716273d9f13SBarry Smith C->assembled = PETSC_TRUE; 17172593348eSBarry Smith *B = C; 1718b0a32e0cSBarry Smith ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 17193a40ed3dSBarry Smith PetscFunctionReturn(0); 17202593348eSBarry Smith } 17212593348eSBarry Smith 1722273d9f13SBarry Smith EXTERN_C_BEGIN 17234a2ae208SSatish Balay #undef __FUNCT__ 17244a2ae208SSatish Balay #define __FUNCT__ "MatLoad_SeqBAIJ" 1725b0a32e0cSBarry Smith int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 17262593348eSBarry Smith { 1727b6490206SBarry Smith Mat_SeqBAIJ *a; 17282593348eSBarry Smith Mat B; 1729f1af5d2fSBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1730b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 173135aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1732a7c10996SSatish Balay int *masked,nmask,tmp,bs2,ishift; 173387828ca2SBarry Smith PetscScalar *aa; 173419bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 17352593348eSBarry Smith 17363a40ed3dSBarry Smith PetscFunctionBegin; 1737b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1738de6a44a3SBarry Smith bs2 = bs*bs; 1739b6490206SBarry Smith 1740d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 174129bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1742b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 17430752156aSBarry Smith ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1744552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 17452593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 17462593348eSBarry Smith 1747d64ed03dSBarry Smith if (header[3] < 0) { 174829bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1749d64ed03dSBarry Smith } 1750d64ed03dSBarry Smith 175129bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 175235aab85fSBarry Smith 175335aab85fSBarry Smith /* 175435aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 175535aab85fSBarry Smith divisible by the blocksize 175635aab85fSBarry Smith */ 1757b6490206SBarry Smith mbs = M/bs; 175835aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 175935aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 176035aab85fSBarry Smith else mbs++; 176135aab85fSBarry Smith if (extra_rows) { 1762b0a32e0cSBarry Smith PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 176335aab85fSBarry Smith } 1764b6490206SBarry Smith 17652593348eSBarry Smith /* read in row lengths */ 1766b0a32e0cSBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 17670752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 176835aab85fSBarry Smith for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 17692593348eSBarry Smith 1770b6490206SBarry Smith /* read in column indices */ 1771b0a32e0cSBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 17720752156aSBarry Smith ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 177335aab85fSBarry Smith for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1774b6490206SBarry Smith 1775b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 1776b0a32e0cSBarry Smith ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1777549d3d68SSatish Balay ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1778b0a32e0cSBarry Smith ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1779549d3d68SSatish Balay ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 178035aab85fSBarry Smith masked = mask + mbs; 1781b6490206SBarry Smith rowcount = 0; nzcount = 0; 1782b6490206SBarry Smith for (i=0; i<mbs; i++) { 178335aab85fSBarry Smith nmask = 0; 1784b6490206SBarry Smith for (j=0; j<bs; j++) { 1785b6490206SBarry Smith kmax = rowlengths[rowcount]; 1786b6490206SBarry Smith for (k=0; k<kmax; k++) { 178735aab85fSBarry Smith tmp = jj[nzcount++]/bs; 178835aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1789b6490206SBarry Smith } 1790b6490206SBarry Smith rowcount++; 1791b6490206SBarry Smith } 179235aab85fSBarry Smith browlengths[i] += nmask; 179335aab85fSBarry Smith /* zero out the mask elements we set */ 179435aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1795b6490206SBarry Smith } 1796b6490206SBarry Smith 17972593348eSBarry Smith /* create our matrix */ 17983eee16b0SBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 17992593348eSBarry Smith B = *A; 1800b6490206SBarry Smith a = (Mat_SeqBAIJ*)B->data; 18012593348eSBarry Smith 18022593348eSBarry Smith /* set matrix "i" values */ 1803de6a44a3SBarry Smith a->i[0] = 0; 1804b6490206SBarry Smith for (i=1; i<= mbs; i++) { 1805b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 1806b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 18072593348eSBarry Smith } 1808b6490206SBarry Smith a->nz = 0; 1809b6490206SBarry Smith for (i=0; i<mbs; i++) a->nz += browlengths[i]; 18102593348eSBarry Smith 1811b6490206SBarry Smith /* read in nonzero values */ 181287828ca2SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 18130752156aSBarry Smith ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 181435aab85fSBarry Smith for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1815b6490206SBarry Smith 1816b6490206SBarry Smith /* set "a" and "j" values into matrix */ 1817b6490206SBarry Smith nzcount = 0; jcount = 0; 1818b6490206SBarry Smith for (i=0; i<mbs; i++) { 1819b6490206SBarry Smith nzcountb = nzcount; 182035aab85fSBarry Smith nmask = 0; 1821b6490206SBarry Smith for (j=0; j<bs; j++) { 1822b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1823b6490206SBarry Smith for (k=0; k<kmax; k++) { 182435aab85fSBarry Smith tmp = jj[nzcount++]/bs; 182535aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1826b6490206SBarry Smith } 1827b6490206SBarry Smith } 1828de6a44a3SBarry Smith /* sort the masked values */ 1829433994e6SBarry Smith ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1830de6a44a3SBarry Smith 1831b6490206SBarry Smith /* set "j" values into matrix */ 1832b6490206SBarry Smith maskcount = 1; 183335aab85fSBarry Smith for (j=0; j<nmask; j++) { 183435aab85fSBarry Smith a->j[jcount++] = masked[j]; 1835de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 1836b6490206SBarry Smith } 1837b6490206SBarry Smith /* set "a" values into matrix */ 1838de6a44a3SBarry Smith ishift = bs2*a->i[i]; 1839b6490206SBarry Smith for (j=0; j<bs; j++) { 1840b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1841b6490206SBarry Smith for (k=0; k<kmax; k++) { 1842de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 1843de6a44a3SBarry Smith block = mask[tmp] - 1; 1844de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 1845de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 1846375fe846SBarry Smith a->a[idx] = (MatScalar)aa[nzcountb++]; 1847b6490206SBarry Smith } 1848b6490206SBarry Smith } 184935aab85fSBarry Smith /* zero out the mask elements we set */ 185035aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1851b6490206SBarry Smith } 185229bbc08cSBarry Smith if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1853b6490206SBarry Smith 1854606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1855606d414cSSatish Balay ierr = PetscFree(browlengths);CHKERRQ(ierr); 1856606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 1857606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 1858606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 1859b6490206SBarry Smith 1860b6490206SBarry Smith B->assembled = PETSC_TRUE; 1861de6a44a3SBarry Smith 18629c01be13SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 18633a40ed3dSBarry Smith PetscFunctionReturn(0); 18642593348eSBarry Smith } 1865273d9f13SBarry Smith EXTERN_C_END 18662593348eSBarry Smith 18674a2ae208SSatish Balay #undef __FUNCT__ 18684a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ" 1869273d9f13SBarry Smith /*@C 1870273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1871273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 1872273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 1873273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 1874273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 18752593348eSBarry Smith 1876273d9f13SBarry Smith Collective on MPI_Comm 1877273d9f13SBarry Smith 1878273d9f13SBarry Smith Input Parameters: 1879273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 1880273d9f13SBarry Smith . bs - size of block 1881273d9f13SBarry Smith . m - number of rows 1882273d9f13SBarry Smith . n - number of columns 188335d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 188435d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 1885273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 1886273d9f13SBarry Smith 1887273d9f13SBarry Smith Output Parameter: 1888273d9f13SBarry Smith . A - the matrix 1889273d9f13SBarry Smith 1890273d9f13SBarry Smith Options Database Keys: 1891273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 1892273d9f13SBarry Smith block calculations (much slower) 1893273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 1894273d9f13SBarry Smith 1895273d9f13SBarry Smith Level: intermediate 1896273d9f13SBarry Smith 1897273d9f13SBarry Smith Notes: 189835d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 189935d8aa7fSBarry Smith 1900273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 1901273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 1902273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 1903273d9f13SBarry Smith 1904273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 1905273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1906273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 1907273d9f13SBarry Smith matrices. 1908273d9f13SBarry Smith 1909273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1910273d9f13SBarry Smith @*/ 1911273d9f13SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1912273d9f13SBarry Smith { 1913273d9f13SBarry Smith int ierr; 1914273d9f13SBarry Smith 1915273d9f13SBarry Smith PetscFunctionBegin; 1916273d9f13SBarry Smith ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1917273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1918273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1919273d9f13SBarry Smith PetscFunctionReturn(0); 1920273d9f13SBarry Smith } 1921273d9f13SBarry Smith 19224a2ae208SSatish Balay #undef __FUNCT__ 19234a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1924273d9f13SBarry Smith /*@C 1925273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1926273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 1927273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 1928273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 1929273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 1930273d9f13SBarry Smith 1931273d9f13SBarry Smith Collective on MPI_Comm 1932273d9f13SBarry Smith 1933273d9f13SBarry Smith Input Parameters: 1934273d9f13SBarry Smith + A - the matrix 1935273d9f13SBarry Smith . bs - size of block 1936273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 1937273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 1938273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 1939273d9f13SBarry Smith 1940273d9f13SBarry Smith Options Database Keys: 1941273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 1942273d9f13SBarry Smith block calculations (much slower) 1943273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 1944273d9f13SBarry Smith 1945273d9f13SBarry Smith Level: intermediate 1946273d9f13SBarry Smith 1947273d9f13SBarry Smith Notes: 1948273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 1949273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 1950273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 1951273d9f13SBarry Smith 1952273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 1953273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1954273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 1955273d9f13SBarry Smith matrices. 1956273d9f13SBarry Smith 1957273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1958273d9f13SBarry Smith @*/ 1959273d9f13SBarry Smith int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1960273d9f13SBarry Smith { 1961273d9f13SBarry Smith Mat_SeqBAIJ *b; 1962273d9f13SBarry Smith int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1963273d9f13SBarry Smith PetscTruth flg; 1964273d9f13SBarry Smith 1965273d9f13SBarry Smith PetscFunctionBegin; 1966273d9f13SBarry Smith ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1967273d9f13SBarry Smith if (!flg) PetscFunctionReturn(0); 1968273d9f13SBarry Smith 1969273d9f13SBarry Smith B->preallocated = PETSC_TRUE; 1970b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1971273d9f13SBarry Smith if (nnz && newbs != bs) { 1972273d9f13SBarry Smith SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1973273d9f13SBarry Smith } 1974273d9f13SBarry Smith bs = newbs; 1975273d9f13SBarry Smith 1976273d9f13SBarry Smith mbs = B->m/bs; 1977273d9f13SBarry Smith nbs = B->n/bs; 1978273d9f13SBarry Smith bs2 = bs*bs; 1979273d9f13SBarry Smith 1980273d9f13SBarry Smith if (mbs*bs!=B->m || nbs*bs!=B->n) { 198135d8aa7fSBarry Smith SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1982273d9f13SBarry Smith } 1983273d9f13SBarry Smith 1984435da068SBarry Smith if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1985435da068SBarry Smith if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1986273d9f13SBarry Smith if (nnz) { 1987273d9f13SBarry Smith for (i=0; i<mbs; i++) { 1988273d9f13SBarry Smith if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 19893a7fca6bSBarry Smith if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1990273d9f13SBarry Smith } 1991273d9f13SBarry Smith } 1992273d9f13SBarry Smith 1993273d9f13SBarry Smith b = (Mat_SeqBAIJ*)B->data; 1994b0a32e0cSBarry Smith ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 199554138f6bSKris Buschelman B->ops->solve = MatSolve_SeqBAIJ_Update; 199654138f6bSKris Buschelman B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1997273d9f13SBarry Smith if (!flg) { 1998273d9f13SBarry Smith switch (bs) { 1999273d9f13SBarry Smith case 1: 2000273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2001273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_1; 2002273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2003273d9f13SBarry Smith break; 2004273d9f13SBarry Smith case 2: 2005273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2006273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_2; 2007273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2008273d9f13SBarry Smith break; 2009273d9f13SBarry Smith case 3: 2010273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2011273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_3; 2012273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2013273d9f13SBarry Smith break; 2014273d9f13SBarry Smith case 4: 2015273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2016273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_4; 2017273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2018273d9f13SBarry Smith break; 2019273d9f13SBarry Smith case 5: 2020273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2021273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_5; 2022273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2023273d9f13SBarry Smith break; 2024273d9f13SBarry Smith case 6: 2025273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2026273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_6; 2027273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2028273d9f13SBarry Smith break; 2029273d9f13SBarry Smith case 7: 203054138f6bSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2031273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_7; 2032273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2033273d9f13SBarry Smith break; 2034273d9f13SBarry Smith default: 203554138f6bSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2036273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 2037273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2038273d9f13SBarry Smith break; 2039273d9f13SBarry Smith } 2040273d9f13SBarry Smith } 2041273d9f13SBarry Smith b->bs = bs; 2042273d9f13SBarry Smith b->mbs = mbs; 2043273d9f13SBarry Smith b->nbs = nbs; 2044b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2045273d9f13SBarry Smith if (!nnz) { 2046435da068SBarry Smith if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2047273d9f13SBarry Smith else if (nz <= 0) nz = 1; 2048273d9f13SBarry Smith for (i=0; i<mbs; i++) b->imax[i] = nz; 2049273d9f13SBarry Smith nz = nz*mbs; 2050273d9f13SBarry Smith } else { 2051273d9f13SBarry Smith nz = 0; 2052273d9f13SBarry Smith for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2053273d9f13SBarry Smith } 2054273d9f13SBarry Smith 2055273d9f13SBarry Smith /* allocate the matrix space */ 2056273d9f13SBarry Smith len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2057b0a32e0cSBarry Smith ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2058273d9f13SBarry Smith ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2059273d9f13SBarry Smith b->j = (int*)(b->a + nz*bs2); 2060273d9f13SBarry Smith ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2061273d9f13SBarry Smith b->i = b->j + nz; 2062273d9f13SBarry Smith b->singlemalloc = PETSC_TRUE; 2063273d9f13SBarry Smith 2064273d9f13SBarry Smith b->i[0] = 0; 2065273d9f13SBarry Smith for (i=1; i<mbs+1; i++) { 2066273d9f13SBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 2067273d9f13SBarry Smith } 2068273d9f13SBarry Smith 2069273d9f13SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 207016d6aa21SSatish Balay ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2071b0a32e0cSBarry Smith PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2072273d9f13SBarry Smith for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2073273d9f13SBarry Smith 2074273d9f13SBarry Smith b->bs = bs; 2075273d9f13SBarry Smith b->bs2 = bs2; 2076273d9f13SBarry Smith b->mbs = mbs; 2077273d9f13SBarry Smith b->nz = 0; 2078273d9f13SBarry Smith b->maxnz = nz*bs2; 2079273d9f13SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 2080273d9f13SBarry Smith PetscFunctionReturn(0); 2081273d9f13SBarry Smith } 20822593348eSBarry Smith 2083