1bba1ac68SSatish Balay /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 22593348eSBarry Smith 32593348eSBarry Smith /* 4b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 52593348eSBarry Smith matrix storage format. 62593348eSBarry Smith */ 770f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 81a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 91a6a6d98SBarry Smith #include "src/inline/spops.h" 10325e03aeSBarry Smith #include "petscsys.h" /*I "petscmat.h" I*/ 113b547af2SSatish Balay 12af674e45SBarry Smith /* 13af674e45SBarry Smith Special version for Fun3d sequential benchmark 14af674e45SBarry Smith */ 15af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 16af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 19af674e45SBarry Smith #endif 20af674e45SBarry Smith 21af674e45SBarry Smith #undef __FUNCT__ 22af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_" 234bb09213Spetsc void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 24af674e45SBarry Smith { 25af674e45SBarry Smith Mat A = *AA; 26af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27a037b02bSBarry Smith int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 2838fde467SHong Zhang int *ai=a->i,*ailen=a->ilen; 29a037b02bSBarry Smith int *aj=a->j,stepval; 304bb09213Spetsc PetscScalar *value = v; 314bb09213Spetsc MatScalar *ap,*aa = a->a,*bap; 32af674e45SBarry Smith 33af674e45SBarry Smith PetscFunctionBegin; 34af674e45SBarry Smith stepval = (n-1)*4; 35af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 36af674e45SBarry Smith row = im[k]; 37af674e45SBarry Smith rp = aj + ai[row]; 38af674e45SBarry Smith ap = aa + 16*ai[row]; 39af674e45SBarry Smith nrow = ailen[row]; 40af674e45SBarry Smith low = 0; 41af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 42af674e45SBarry Smith col = in[l]; 43af674e45SBarry Smith value = v + k*(stepval+4)*4 + l*4; 44af674e45SBarry Smith low = 0; high = nrow; 45af674e45SBarry Smith while (high-low > 7) { 46af674e45SBarry Smith t = (low+high)/2; 47af674e45SBarry Smith if (rp[t] > col) high = t; 48af674e45SBarry Smith else low = t; 49af674e45SBarry Smith } 50af674e45SBarry Smith for (i=low; i<high; i++) { 51af674e45SBarry Smith if (rp[i] > col) break; 52af674e45SBarry Smith if (rp[i] == col) { 53af674e45SBarry Smith bap = ap + 16*i; 54af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 55af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 56af674e45SBarry Smith bap[jj] += *value++; 57af674e45SBarry Smith } 58af674e45SBarry Smith } 59af674e45SBarry Smith goto noinsert2; 60af674e45SBarry Smith } 61af674e45SBarry Smith } 62af674e45SBarry Smith N = nrow++ - 1; 63af674e45SBarry Smith /* shift up all the later entries in this row */ 64af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 65af674e45SBarry Smith rp[ii+1] = rp[ii]; 66a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 67af674e45SBarry Smith } 68af674e45SBarry Smith if (N >= i) { 69a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 70af674e45SBarry Smith } 71af674e45SBarry Smith rp[i] = col; 72af674e45SBarry Smith bap = ap + 16*i; 73af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 74af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 75af674e45SBarry Smith bap[jj] = *value++; 76af674e45SBarry Smith } 77af674e45SBarry Smith } 78af674e45SBarry Smith noinsert2:; 79af674e45SBarry Smith low = i; 80af674e45SBarry Smith } 81af674e45SBarry Smith ailen[row] = nrow; 82af674e45SBarry Smith } 83af674e45SBarry Smith } 84af674e45SBarry Smith 85af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 86af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 87af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 88af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 89af674e45SBarry Smith #endif 90af674e45SBarry Smith 91af674e45SBarry Smith #undef __FUNCT__ 92af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_" 934bb09213Spetsc void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 94af674e45SBarry Smith { 95af674e45SBarry Smith Mat A = *AA; 96af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 97a037b02bSBarry Smith int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 9838fde467SHong Zhang int *ai=a->i,*ailen=a->ilen; 99af674e45SBarry Smith int *aj=a->j,brow,bcol; 10038fde467SHong Zhang int ridx,cidx; 101af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 102af674e45SBarry Smith 103af674e45SBarry Smith PetscFunctionBegin; 104af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 105af674e45SBarry Smith row = im[k]; brow = row/4; 106af674e45SBarry Smith rp = aj + ai[brow]; 107af674e45SBarry Smith ap = aa + 16*ai[brow]; 108af674e45SBarry Smith nrow = ailen[brow]; 109af674e45SBarry Smith low = 0; 110af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 111af674e45SBarry Smith col = in[l]; bcol = col/4; 112af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 113af674e45SBarry Smith value = v[l + k*n]; 114af674e45SBarry Smith low = 0; high = nrow; 115af674e45SBarry Smith while (high-low > 7) { 116af674e45SBarry Smith t = (low+high)/2; 117af674e45SBarry Smith if (rp[t] > bcol) high = t; 118af674e45SBarry Smith else low = t; 119af674e45SBarry Smith } 120af674e45SBarry Smith for (i=low; i<high; i++) { 121af674e45SBarry Smith if (rp[i] > bcol) break; 122af674e45SBarry Smith if (rp[i] == bcol) { 123af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 124af674e45SBarry Smith *bap += value; 125af674e45SBarry Smith goto noinsert1; 126af674e45SBarry Smith } 127af674e45SBarry Smith } 128af674e45SBarry Smith N = nrow++ - 1; 129af674e45SBarry Smith /* shift up all the later entries in this row */ 130af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 131af674e45SBarry Smith rp[ii+1] = rp[ii]; 132a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 133af674e45SBarry Smith } 134af674e45SBarry Smith if (N>=i) { 135a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 136af674e45SBarry Smith } 137af674e45SBarry Smith rp[i] = bcol; 138af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 139af674e45SBarry Smith noinsert1:; 140af674e45SBarry Smith low = i; 141af674e45SBarry Smith } 142af674e45SBarry Smith ailen[brow] = nrow; 143af674e45SBarry Smith } 144af674e45SBarry Smith } 145af674e45SBarry Smith 14695d5f7c2SBarry Smith /* UGLY, ugly, ugly 14787828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 1483477af42SKris Buschelman not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 14995d5f7c2SBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 15095d5f7c2SBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 15195d5f7c2SBarry Smith into the single precision data structures. 15295d5f7c2SBarry Smith */ 15395d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 154ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 15595d5f7c2SBarry Smith #else 15695d5f7c2SBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 15795d5f7c2SBarry Smith #endif 15804929863SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 15904929863SHong Zhang EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 16004929863SHong Zhang #endif 16195d5f7c2SBarry Smith 1622d61bbb3SSatish Balay #define CHUNKSIZE 10 163de6a44a3SBarry Smith 164be5855fcSBarry Smith /* 165be5855fcSBarry Smith Checks for missing diagonals 166be5855fcSBarry Smith */ 1674a2ae208SSatish Balay #undef __FUNCT__ 1684a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 169c4992f7dSBarry Smith int MatMissingDiagonal_SeqBAIJ(Mat A) 170be5855fcSBarry Smith { 171be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 172883fce79SBarry Smith int *diag,*jj = a->j,i,ierr; 173be5855fcSBarry Smith 174be5855fcSBarry Smith PetscFunctionBegin; 175c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 176883fce79SBarry Smith diag = a->diag; 1770e8e8aceSBarry Smith for (i=0; i<a->mbs; i++) { 178be5855fcSBarry Smith if (jj[diag[i]] != i) { 17929bbc08cSBarry Smith SETERRQ1(1,"Matrix is missing diagonal number %d",i); 180be5855fcSBarry Smith } 181be5855fcSBarry Smith } 182be5855fcSBarry Smith PetscFunctionReturn(0); 183be5855fcSBarry Smith } 184be5855fcSBarry Smith 1854a2ae208SSatish Balay #undef __FUNCT__ 1864a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 187c4992f7dSBarry Smith int MatMarkDiagonal_SeqBAIJ(Mat A) 188de6a44a3SBarry Smith { 189de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 19082502324SSatish Balay int i,j,*diag,m = a->mbs,ierr; 191de6a44a3SBarry Smith 1923a40ed3dSBarry Smith PetscFunctionBegin; 193883fce79SBarry Smith if (a->diag) PetscFunctionReturn(0); 194883fce79SBarry Smith 195b0a32e0cSBarry Smith ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 196b0a32e0cSBarry Smith PetscLogObjectMemory(A,(m+1)*sizeof(int)); 1977fc0212eSBarry Smith for (i=0; i<m; i++) { 198ecc615b2SBarry Smith diag[i] = a->i[i+1]; 199de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 200de6a44a3SBarry Smith if (a->j[j] == i) { 201de6a44a3SBarry Smith diag[i] = j; 202de6a44a3SBarry Smith break; 203de6a44a3SBarry Smith } 204de6a44a3SBarry Smith } 205de6a44a3SBarry Smith } 206de6a44a3SBarry Smith a->diag = diag; 2073a40ed3dSBarry Smith PetscFunctionReturn(0); 208de6a44a3SBarry Smith } 2092593348eSBarry Smith 2102593348eSBarry Smith 211ca44d042SBarry Smith EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 2123b2fbd54SBarry Smith 2134a2ae208SSatish Balay #undef __FUNCT__ 2144a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 2150e6d2581SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 2163b2fbd54SBarry Smith { 2173b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2183b2fbd54SBarry Smith int ierr,n = a->mbs,i; 2193b2fbd54SBarry Smith 2203a40ed3dSBarry Smith PetscFunctionBegin; 2213b2fbd54SBarry Smith *nn = n; 2223a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 2233b2fbd54SBarry Smith if (symmetric) { 2243b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 2253b2fbd54SBarry Smith } else if (oshift == 1) { 2263b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 227435da068SBarry Smith int nz = a->i[n]; 2283b2fbd54SBarry Smith for (i=0; i<nz; i++) a->j[i]++; 2293b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]++; 2303b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 2313b2fbd54SBarry Smith } else { 2323b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 2333b2fbd54SBarry Smith } 2343b2fbd54SBarry Smith 2353a40ed3dSBarry Smith PetscFunctionReturn(0); 2363b2fbd54SBarry Smith } 2373b2fbd54SBarry Smith 2384a2ae208SSatish Balay #undef __FUNCT__ 2394a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 240435da068SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 2413b2fbd54SBarry Smith { 2423b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 243606d414cSSatish Balay int i,n = a->mbs,ierr; 2443b2fbd54SBarry Smith 2453a40ed3dSBarry Smith PetscFunctionBegin; 2463a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 2473b2fbd54SBarry Smith if (symmetric) { 248606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 249606d414cSSatish Balay ierr = PetscFree(*ja);CHKERRQ(ierr); 250af5da2bfSBarry Smith } else if (oshift == 1) { 251435da068SBarry Smith int nz = a->i[n]-1; 2523b2fbd54SBarry Smith for (i=0; i<nz; i++) a->j[i]--; 2533b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]--; 2543b2fbd54SBarry Smith } 2553a40ed3dSBarry Smith PetscFunctionReturn(0); 2563b2fbd54SBarry Smith } 2573b2fbd54SBarry Smith 2584a2ae208SSatish Balay #undef __FUNCT__ 2594a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 2602d61bbb3SSatish Balay int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 2612d61bbb3SSatish Balay { 2622d61bbb3SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2632d61bbb3SSatish Balay 2642d61bbb3SSatish Balay PetscFunctionBegin; 2652d61bbb3SSatish Balay *bs = baij->bs; 2662d61bbb3SSatish Balay PetscFunctionReturn(0); 2672d61bbb3SSatish Balay } 2682d61bbb3SSatish Balay 2692d61bbb3SSatish Balay 2704a2ae208SSatish Balay #undef __FUNCT__ 2714a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ" 272e1311b90SBarry Smith int MatDestroy_SeqBAIJ(Mat A) 2732d61bbb3SSatish Balay { 2742d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 275e51c0b9cSSatish Balay int ierr; 2762d61bbb3SSatish Balay 277433994e6SBarry Smith PetscFunctionBegin; 278aa482453SBarry Smith #if defined(PETSC_USE_LOG) 279b0a32e0cSBarry Smith PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 2802d61bbb3SSatish Balay #endif 281606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 282606d414cSSatish Balay if (!a->singlemalloc) { 283606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 284606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 285606d414cSSatish Balay } 286c38d4ed2SBarry Smith if (a->row) { 287c38d4ed2SBarry Smith ierr = ISDestroy(a->row);CHKERRQ(ierr); 288c38d4ed2SBarry Smith } 289c38d4ed2SBarry Smith if (a->col) { 290c38d4ed2SBarry Smith ierr = ISDestroy(a->col);CHKERRQ(ierr); 291c38d4ed2SBarry Smith } 292606d414cSSatish Balay if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 293606d414cSSatish Balay if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 294606d414cSSatish Balay if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 295606d414cSSatish Balay if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 296606d414cSSatish Balay if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 297e51c0b9cSSatish Balay if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 298606d414cSSatish Balay if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 299563b5814SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 300563b5814SBarry Smith if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 301563b5814SBarry Smith #endif 302606d414cSSatish Balay ierr = PetscFree(a);CHKERRQ(ierr); 3032d61bbb3SSatish Balay PetscFunctionReturn(0); 3042d61bbb3SSatish Balay } 3052d61bbb3SSatish Balay 3064a2ae208SSatish Balay #undef __FUNCT__ 3074a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ" 3082d61bbb3SSatish Balay int MatSetOption_SeqBAIJ(Mat A,MatOption op) 3092d61bbb3SSatish Balay { 3102d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 3112d61bbb3SSatish Balay 3122d61bbb3SSatish Balay PetscFunctionBegin; 313aa275fccSKris Buschelman switch (op) { 314aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 315aa275fccSKris Buschelman a->roworiented = PETSC_TRUE; 316aa275fccSKris Buschelman break; 317aa275fccSKris Buschelman case MAT_COLUMN_ORIENTED: 318aa275fccSKris Buschelman a->roworiented = PETSC_FALSE; 319aa275fccSKris Buschelman break; 320aa275fccSKris Buschelman case MAT_COLUMNS_SORTED: 321aa275fccSKris Buschelman a->sorted = PETSC_TRUE; 322aa275fccSKris Buschelman break; 323aa275fccSKris Buschelman case MAT_COLUMNS_UNSORTED: 324aa275fccSKris Buschelman a->sorted = PETSC_FALSE; 325aa275fccSKris Buschelman break; 326aa275fccSKris Buschelman case MAT_KEEP_ZEROED_ROWS: 327aa275fccSKris Buschelman a->keepzeroedrows = PETSC_TRUE; 328aa275fccSKris Buschelman break; 329aa275fccSKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 330aa275fccSKris Buschelman a->nonew = 1; 331aa275fccSKris Buschelman break; 332aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 333aa275fccSKris Buschelman a->nonew = -1; 334aa275fccSKris Buschelman break; 335aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 336aa275fccSKris Buschelman a->nonew = -2; 337aa275fccSKris Buschelman break; 338aa275fccSKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 339aa275fccSKris Buschelman a->nonew = 0; 340aa275fccSKris Buschelman break; 341aa275fccSKris Buschelman case MAT_ROWS_SORTED: 342aa275fccSKris Buschelman case MAT_ROWS_UNSORTED: 343aa275fccSKris Buschelman case MAT_YES_NEW_DIAGONALS: 344aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 345aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 346b0a32e0cSBarry Smith PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 347aa275fccSKris Buschelman break; 348aa275fccSKris Buschelman case MAT_NO_NEW_DIAGONALS: 34929bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 350bd648c37SKris Buschelman case MAT_USE_SINGLE_PRECISION_SOLVES: 351bd648c37SKris Buschelman if (a->bs==4) { 35294ee7fc8SKris Buschelman PetscTruth sse_enabled_local,sse_enabled_global; 353bd648c37SKris Buschelman int ierr; 35494ee7fc8SKris Buschelman 35594ee7fc8SKris Buschelman sse_enabled_local = PETSC_FALSE; 35694ee7fc8SKris Buschelman sse_enabled_global = PETSC_FALSE; 35794ee7fc8SKris Buschelman 35894ee7fc8SKris Buschelman ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 359e64df4b9SKris Buschelman #if defined(PETSC_HAVE_SSE) 36094ee7fc8SKris Buschelman if (sse_enabled_local) { 36154138f6bSKris Buschelman a->single_precision_solves = PETSC_TRUE; 36254138f6bSKris Buschelman A->ops->solve = MatSolve_SeqBAIJ_Update; 36354138f6bSKris Buschelman A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 364cf242676SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 36554138f6bSKris Buschelman break; 36694ee7fc8SKris Buschelman } else { 36794ee7fc8SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 3688661488fSKris Buschelman } 369e64df4b9SKris Buschelman #else 370bd648c37SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 371e64df4b9SKris Buschelman #endif 372bd648c37SKris Buschelman } 373bd648c37SKris Buschelman break; 374aa275fccSKris Buschelman default: 37529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 3762d61bbb3SSatish Balay } 3772d61bbb3SSatish Balay PetscFunctionReturn(0); 3782d61bbb3SSatish Balay } 3792d61bbb3SSatish Balay 3804a2ae208SSatish Balay #undef __FUNCT__ 3814a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ" 38287828ca2SBarry Smith int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 3832d61bbb3SSatish Balay { 3842d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 38582502324SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 3863f1db9ecSBarry Smith MatScalar *aa,*aa_i; 38787828ca2SBarry Smith PetscScalar *v_i; 3882d61bbb3SSatish Balay 3892d61bbb3SSatish Balay PetscFunctionBegin; 3902d61bbb3SSatish Balay bs = a->bs; 3912d61bbb3SSatish Balay ai = a->i; 3922d61bbb3SSatish Balay aj = a->j; 3932d61bbb3SSatish Balay aa = a->a; 3942d61bbb3SSatish Balay bs2 = a->bs2; 3952d61bbb3SSatish Balay 396273d9f13SBarry Smith if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 3972d61bbb3SSatish Balay 3982d61bbb3SSatish Balay bn = row/bs; /* Block number */ 3992d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 4002d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 4012d61bbb3SSatish Balay *nz = bs*M; 4022d61bbb3SSatish Balay 4032d61bbb3SSatish Balay if (v) { 4042d61bbb3SSatish Balay *v = 0; 4052d61bbb3SSatish Balay if (*nz) { 40687828ca2SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 4072d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 4082d61bbb3SSatish Balay v_i = *v + i*bs; 4092d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 4102d61bbb3SSatish Balay for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 4112d61bbb3SSatish Balay } 4122d61bbb3SSatish Balay } 4132d61bbb3SSatish Balay } 4142d61bbb3SSatish Balay 4152d61bbb3SSatish Balay if (idx) { 4162d61bbb3SSatish Balay *idx = 0; 4172d61bbb3SSatish Balay if (*nz) { 418b0a32e0cSBarry Smith ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 4192d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 4202d61bbb3SSatish Balay idx_i = *idx + i*bs; 4212d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 4222d61bbb3SSatish Balay for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 4232d61bbb3SSatish Balay } 4242d61bbb3SSatish Balay } 4252d61bbb3SSatish Balay } 4262d61bbb3SSatish Balay PetscFunctionReturn(0); 4272d61bbb3SSatish Balay } 4282d61bbb3SSatish Balay 4294a2ae208SSatish Balay #undef __FUNCT__ 4304a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 43187828ca2SBarry Smith int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 4322d61bbb3SSatish Balay { 433606d414cSSatish Balay int ierr; 434606d414cSSatish Balay 4352d61bbb3SSatish Balay PetscFunctionBegin; 436606d414cSSatish Balay if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 437606d414cSSatish Balay if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 4382d61bbb3SSatish Balay PetscFunctionReturn(0); 4392d61bbb3SSatish Balay } 4402d61bbb3SSatish Balay 4414a2ae208SSatish Balay #undef __FUNCT__ 4424a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ" 4432d61bbb3SSatish Balay int MatTranspose_SeqBAIJ(Mat A,Mat *B) 4442d61bbb3SSatish Balay { 4452d61bbb3SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 4462d61bbb3SSatish Balay Mat C; 4472d61bbb3SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 448273d9f13SBarry Smith int *rows,*cols,bs2=a->bs2; 44987828ca2SBarry Smith PetscScalar *array; 4502d61bbb3SSatish Balay 4512d61bbb3SSatish Balay PetscFunctionBegin; 45229bbc08cSBarry Smith if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 453b0a32e0cSBarry Smith ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 454549d3d68SSatish Balay ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 4552d61bbb3SSatish Balay 456375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 45787828ca2SBarry Smith ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 45887828ca2SBarry Smith for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 459375fe846SBarry Smith #else 4603eda8832SBarry Smith array = a->a; 461375fe846SBarry Smith #endif 462375fe846SBarry Smith 4632d61bbb3SSatish Balay for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 464273d9f13SBarry Smith ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 465606d414cSSatish Balay ierr = PetscFree(col);CHKERRQ(ierr); 466b0a32e0cSBarry Smith ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 4672d61bbb3SSatish Balay cols = rows + bs; 4682d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 4692d61bbb3SSatish Balay cols[0] = i*bs; 4702d61bbb3SSatish Balay for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 4712d61bbb3SSatish Balay len = ai[i+1] - ai[i]; 4722d61bbb3SSatish Balay for (j=0; j<len; j++) { 4732d61bbb3SSatish Balay rows[0] = (*aj++)*bs; 4742d61bbb3SSatish Balay for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 4752d61bbb3SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 4762d61bbb3SSatish Balay array += bs2; 4772d61bbb3SSatish Balay } 4782d61bbb3SSatish Balay } 479606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 480375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 481375fe846SBarry Smith ierr = PetscFree(array); 482375fe846SBarry Smith #endif 4832d61bbb3SSatish Balay 4842d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4852d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4862d61bbb3SSatish Balay 487c4992f7dSBarry Smith if (B) { 4882d61bbb3SSatish Balay *B = C; 4892d61bbb3SSatish Balay } else { 490273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 4912d61bbb3SSatish Balay } 4922d61bbb3SSatish Balay PetscFunctionReturn(0); 4932d61bbb3SSatish Balay } 4942d61bbb3SSatish Balay 4954a2ae208SSatish Balay #undef __FUNCT__ 4964a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary" 497b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 4982593348eSBarry Smith { 499b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 5009df24120SSatish Balay int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 50187828ca2SBarry Smith PetscScalar *aa; 502ce6f0cecSBarry Smith FILE *file; 5032593348eSBarry Smith 5043a40ed3dSBarry Smith PetscFunctionBegin; 505b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 506b0a32e0cSBarry Smith ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 507552e946dSBarry Smith col_lens[0] = MAT_FILE_COOKIE; 5083b2fbd54SBarry Smith 509273d9f13SBarry Smith col_lens[1] = A->m; 510273d9f13SBarry Smith col_lens[2] = A->n; 5117e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 5122593348eSBarry Smith 5132593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 514b6490206SBarry Smith count = 0; 515b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 516b6490206SBarry Smith for (j=0; j<bs; j++) { 517b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 518b6490206SBarry Smith } 5192593348eSBarry Smith } 520273d9f13SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 521606d414cSSatish Balay ierr = PetscFree(col_lens);CHKERRQ(ierr); 5222593348eSBarry Smith 5232593348eSBarry Smith /* store column indices (zero start index) */ 524b0a32e0cSBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 525b6490206SBarry Smith count = 0; 526b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 527b6490206SBarry Smith for (j=0; j<bs; j++) { 528b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 529b6490206SBarry Smith for (l=0; l<bs; l++) { 530b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 5312593348eSBarry Smith } 5322593348eSBarry Smith } 533b6490206SBarry Smith } 534b6490206SBarry Smith } 5350752156aSBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 536606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 5372593348eSBarry Smith 5382593348eSBarry Smith /* store nonzero values */ 53987828ca2SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 540b6490206SBarry Smith count = 0; 541b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 542b6490206SBarry Smith for (j=0; j<bs; j++) { 543b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 544b6490206SBarry Smith for (l=0; l<bs; l++) { 5457e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 546b6490206SBarry Smith } 547b6490206SBarry Smith } 548b6490206SBarry Smith } 549b6490206SBarry Smith } 5500752156aSBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 551606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 552ce6f0cecSBarry Smith 553b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 554ce6f0cecSBarry Smith if (file) { 555ce6f0cecSBarry Smith fprintf(file,"-matload_block_size %d\n",a->bs); 556ce6f0cecSBarry Smith } 5573a40ed3dSBarry Smith PetscFunctionReturn(0); 5582593348eSBarry Smith } 5592593348eSBarry Smith 56004929863SHong Zhang extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 56104929863SHong Zhang 5624a2ae208SSatish Balay #undef __FUNCT__ 5634a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 564b0a32e0cSBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 5652593348eSBarry Smith { 566b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 567fb9695e5SSatish Balay int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 568f3ef73ceSBarry Smith PetscViewerFormat format; 5692593348eSBarry Smith 5703a40ed3dSBarry Smith PetscFunctionBegin; 571b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 572*456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 573b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 574fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 575bcd9e38bSBarry Smith Mat aij; 576bcd9e38bSBarry Smith ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 577bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 578bcd9e38bSBarry Smith ierr = MatDestroy(aij);CHKERRQ(ierr); 57904929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 58004929863SHong Zhang #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 58104929863SHong Zhang ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 58204929863SHong Zhang #endif 58304929863SHong Zhang PetscFunctionReturn(0); 584fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 585b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 58644cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 58744cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 588b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 58944cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 59044cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 591aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 5920e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 59362b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 5940e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 5950e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 59662b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 5970e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 5980e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 59962b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6000ef38995SBarry Smith } 60144cd7ae7SLois Curfman McInnes #else 6020ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 60362b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 6040ef38995SBarry Smith } 60544cd7ae7SLois Curfman McInnes #endif 60644cd7ae7SLois Curfman McInnes } 60744cd7ae7SLois Curfman McInnes } 608b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 60944cd7ae7SLois Curfman McInnes } 61044cd7ae7SLois Curfman McInnes } 611b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 6120ef38995SBarry Smith } else { 613b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 614b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 615b6490206SBarry Smith for (j=0; j<bs; j++) { 616b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 617b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 618b6490206SBarry Smith for (l=0; l<bs; l++) { 619aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 6200e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 62162b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 6220e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6230e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 62462b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 6250e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6260ef38995SBarry Smith } else { 62762b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 62888685aaeSLois Curfman McInnes } 62988685aaeSLois Curfman McInnes #else 63062b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 63188685aaeSLois Curfman McInnes #endif 6322593348eSBarry Smith } 6332593348eSBarry Smith } 634b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 6352593348eSBarry Smith } 6362593348eSBarry Smith } 637b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 638b6490206SBarry Smith } 639b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6403a40ed3dSBarry Smith PetscFunctionReturn(0); 6412593348eSBarry Smith } 6422593348eSBarry Smith 6434a2ae208SSatish Balay #undef __FUNCT__ 6444a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 645b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 6463270192aSSatish Balay { 64777ed5343SBarry Smith Mat A = (Mat) Aa; 6483270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 649b65db4caSBarry Smith int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 6500e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 6513f1db9ecSBarry Smith MatScalar *aa; 652b0a32e0cSBarry Smith PetscViewer viewer; 6533270192aSSatish Balay 6543a40ed3dSBarry Smith PetscFunctionBegin; 6553270192aSSatish Balay 656b65db4caSBarry Smith /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 65777ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 65877ed5343SBarry Smith 659b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 66077ed5343SBarry Smith 6613270192aSSatish Balay /* loop over matrix elements drawing boxes */ 662b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 6633270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6643270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 665273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6663270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6673270192aSSatish Balay aa = a->a + j*bs2; 6683270192aSSatish Balay for (k=0; k<bs; k++) { 6693270192aSSatish Balay for (l=0; l<bs; l++) { 6700e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 671b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 6723270192aSSatish Balay } 6733270192aSSatish Balay } 6743270192aSSatish Balay } 6753270192aSSatish Balay } 676b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 6773270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6783270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 679273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6803270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6813270192aSSatish Balay aa = a->a + j*bs2; 6823270192aSSatish Balay for (k=0; k<bs; k++) { 6833270192aSSatish Balay for (l=0; l<bs; l++) { 6840e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 685b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 6863270192aSSatish Balay } 6873270192aSSatish Balay } 6883270192aSSatish Balay } 6893270192aSSatish Balay } 6903270192aSSatish Balay 691b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 6923270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6933270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 694273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6953270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6963270192aSSatish Balay aa = a->a + j*bs2; 6973270192aSSatish Balay for (k=0; k<bs; k++) { 6983270192aSSatish Balay for (l=0; l<bs; l++) { 6990e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 700b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 7013270192aSSatish Balay } 7023270192aSSatish Balay } 7033270192aSSatish Balay } 7043270192aSSatish Balay } 70577ed5343SBarry Smith PetscFunctionReturn(0); 70677ed5343SBarry Smith } 7073270192aSSatish Balay 7084a2ae208SSatish Balay #undef __FUNCT__ 7094a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw" 710b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 71177ed5343SBarry Smith { 7127dce120fSSatish Balay int ierr; 7130e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 714b0a32e0cSBarry Smith PetscDraw draw; 71577ed5343SBarry Smith PetscTruth isnull; 7163270192aSSatish Balay 71777ed5343SBarry Smith PetscFunctionBegin; 71877ed5343SBarry Smith 719b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 720b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 72177ed5343SBarry Smith 72277ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 723273d9f13SBarry Smith xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 72477ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 725b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 726b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 72777ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 7283a40ed3dSBarry Smith PetscFunctionReturn(0); 7293270192aSSatish Balay } 7303270192aSSatish Balay 7314a2ae208SSatish Balay #undef __FUNCT__ 7324a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ" 733b0a32e0cSBarry Smith int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 7342593348eSBarry Smith { 73519bcc07fSBarry Smith int ierr; 7366831982aSBarry Smith PetscTruth issocket,isascii,isbinary,isdraw; 7372593348eSBarry Smith 7383a40ed3dSBarry Smith PetscFunctionBegin; 739b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 740b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 741fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 742fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 7430f5bd95cSBarry Smith if (issocket) { 74429bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 7450f5bd95cSBarry Smith } else if (isascii){ 7463a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 7470f5bd95cSBarry Smith } else if (isbinary) { 7483a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 7490f5bd95cSBarry Smith } else if (isdraw) { 7503a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 7515cd90555SBarry Smith } else { 75229bbc08cSBarry Smith SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 7532593348eSBarry Smith } 7543a40ed3dSBarry Smith PetscFunctionReturn(0); 7552593348eSBarry Smith } 756b6490206SBarry Smith 757cd0e1443SSatish Balay 7584a2ae208SSatish Balay #undef __FUNCT__ 7594a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ" 76087828ca2SBarry Smith int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 761cd0e1443SSatish Balay { 762cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 7632d61bbb3SSatish Balay int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 7642d61bbb3SSatish Balay int *ai = a->i,*ailen = a->ilen; 7652d61bbb3SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 7663f1db9ecSBarry Smith MatScalar *ap,*aa = a->a,zero = 0.0; 767cd0e1443SSatish Balay 7683a40ed3dSBarry Smith PetscFunctionBegin; 7692d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 770cd0e1443SSatish Balay row = im[k]; brow = row/bs; 77129bbc08cSBarry Smith if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 772273d9f13SBarry Smith if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 7732d61bbb3SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 7742c3acbe9SBarry Smith nrow = ailen[brow]; 7752d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 77629bbc08cSBarry Smith if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 777273d9f13SBarry Smith if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 7782d61bbb3SSatish Balay col = in[l] ; 7792d61bbb3SSatish Balay bcol = col/bs; 7802d61bbb3SSatish Balay cidx = col%bs; 7812d61bbb3SSatish Balay ridx = row%bs; 7822d61bbb3SSatish Balay high = nrow; 7832d61bbb3SSatish Balay low = 0; /* assume unsorted */ 7842d61bbb3SSatish Balay while (high-low > 5) { 785cd0e1443SSatish Balay t = (low+high)/2; 786cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 787cd0e1443SSatish Balay else low = t; 788cd0e1443SSatish Balay } 789cd0e1443SSatish Balay for (i=low; i<high; i++) { 790cd0e1443SSatish Balay if (rp[i] > bcol) break; 791cd0e1443SSatish Balay if (rp[i] == bcol) { 7922d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 7932d61bbb3SSatish Balay goto finished; 794cd0e1443SSatish Balay } 795cd0e1443SSatish Balay } 7962d61bbb3SSatish Balay *v++ = zero; 7972d61bbb3SSatish Balay finished:; 798cd0e1443SSatish Balay } 799cd0e1443SSatish Balay } 8003a40ed3dSBarry Smith PetscFunctionReturn(0); 801cd0e1443SSatish Balay } 802cd0e1443SSatish Balay 80395d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 8044a2ae208SSatish Balay #undef __FUNCT__ 8054a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 80687828ca2SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 80795d5f7c2SBarry Smith { 808563b5814SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 809563b5814SBarry Smith int ierr,i,N = m*n*b->bs2; 810563b5814SBarry Smith MatScalar *vsingle; 81195d5f7c2SBarry Smith 81295d5f7c2SBarry Smith PetscFunctionBegin; 813563b5814SBarry Smith if (N > b->setvalueslen) { 814563b5814SBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 815b0a32e0cSBarry Smith ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 816563b5814SBarry Smith b->setvalueslen = N; 817563b5814SBarry Smith } 818563b5814SBarry Smith vsingle = b->setvaluescopy; 81995d5f7c2SBarry Smith for (i=0; i<N; i++) { 82095d5f7c2SBarry Smith vsingle[i] = v[i]; 82195d5f7c2SBarry Smith } 82295d5f7c2SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 82395d5f7c2SBarry Smith PetscFunctionReturn(0); 82495d5f7c2SBarry Smith } 82595d5f7c2SBarry Smith #endif 82695d5f7c2SBarry Smith 8272d61bbb3SSatish Balay 8284a2ae208SSatish Balay #undef __FUNCT__ 8294a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 83095d5f7c2SBarry Smith int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 83192c4ed94SBarry Smith { 83292c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 8338a84c255SSatish Balay int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 834273d9f13SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 835549d3d68SSatish Balay int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 836273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 83795d5f7c2SBarry Smith MatScalar *value = v,*ap,*aa = a->a,*bap; 83892c4ed94SBarry Smith 8393a40ed3dSBarry Smith PetscFunctionBegin; 8400e324ae4SSatish Balay if (roworiented) { 8410e324ae4SSatish Balay stepval = (n-1)*bs; 8420e324ae4SSatish Balay } else { 8430e324ae4SSatish Balay stepval = (m-1)*bs; 8440e324ae4SSatish Balay } 84592c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 84692c4ed94SBarry Smith row = im[k]; 8475ef9f2a5SBarry Smith if (row < 0) continue; 848aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 84929bbc08cSBarry Smith if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 85092c4ed94SBarry Smith #endif 85192c4ed94SBarry Smith rp = aj + ai[row]; 85292c4ed94SBarry Smith ap = aa + bs2*ai[row]; 85392c4ed94SBarry Smith rmax = imax[row]; 85492c4ed94SBarry Smith nrow = ailen[row]; 85592c4ed94SBarry Smith low = 0; 85692c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 8575ef9f2a5SBarry Smith if (in[l] < 0) continue; 858aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 85929bbc08cSBarry Smith if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 86092c4ed94SBarry Smith #endif 86192c4ed94SBarry Smith col = in[l]; 86292c4ed94SBarry Smith if (roworiented) { 8630e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 8640e324ae4SSatish Balay } else { 8650e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 86692c4ed94SBarry Smith } 86792c4ed94SBarry Smith if (!sorted) low = 0; high = nrow; 86892c4ed94SBarry Smith while (high-low > 7) { 86992c4ed94SBarry Smith t = (low+high)/2; 87092c4ed94SBarry Smith if (rp[t] > col) high = t; 87192c4ed94SBarry Smith else low = t; 87292c4ed94SBarry Smith } 87392c4ed94SBarry Smith for (i=low; i<high; i++) { 87492c4ed94SBarry Smith if (rp[i] > col) break; 87592c4ed94SBarry Smith if (rp[i] == col) { 8768a84c255SSatish Balay bap = ap + bs2*i; 8770e324ae4SSatish Balay if (roworiented) { 8788a84c255SSatish Balay if (is == ADD_VALUES) { 879dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 880dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 8818a84c255SSatish Balay bap[jj] += *value++; 882dd9472c6SBarry Smith } 883dd9472c6SBarry Smith } 8840e324ae4SSatish Balay } else { 885dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 886dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 8870e324ae4SSatish Balay bap[jj] = *value++; 8888a84c255SSatish Balay } 889dd9472c6SBarry Smith } 890dd9472c6SBarry Smith } 8910e324ae4SSatish Balay } else { 8920e324ae4SSatish Balay if (is == ADD_VALUES) { 893dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 894dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 8950e324ae4SSatish Balay *bap++ += *value++; 896dd9472c6SBarry Smith } 897dd9472c6SBarry Smith } 8980e324ae4SSatish Balay } else { 899dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 900dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 9010e324ae4SSatish Balay *bap++ = *value++; 9020e324ae4SSatish Balay } 9038a84c255SSatish Balay } 904dd9472c6SBarry Smith } 905dd9472c6SBarry Smith } 906f1241b54SBarry Smith goto noinsert2; 90792c4ed94SBarry Smith } 90892c4ed94SBarry Smith } 90989280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 91029bbc08cSBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 91192c4ed94SBarry Smith if (nrow >= rmax) { 91292c4ed94SBarry Smith /* there is no extra room in row, therefore enlarge */ 91392c4ed94SBarry Smith int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 9143f1db9ecSBarry Smith MatScalar *new_a; 91592c4ed94SBarry Smith 91629bbc08cSBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 91789280ab3SLois Curfman McInnes 91892c4ed94SBarry Smith /* malloc new storage space */ 9193f1db9ecSBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 920b0a32e0cSBarry Smith ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 92192c4ed94SBarry Smith new_j = (int*)(new_a + bs2*new_nz); 92292c4ed94SBarry Smith new_i = new_j + new_nz; 92392c4ed94SBarry Smith 92492c4ed94SBarry Smith /* copy over old data into new slots */ 92592c4ed94SBarry Smith for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 92692c4ed94SBarry Smith for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 927549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 92892c4ed94SBarry Smith len = (new_nz - CHUNKSIZE - ai[row] - nrow); 929549d3d68SSatish Balay ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 930549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 931549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 932549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 93392c4ed94SBarry Smith /* free up old matrix storage */ 934606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 935606d414cSSatish Balay if (!a->singlemalloc) { 936606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 937606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 938606d414cSSatish Balay } 93992c4ed94SBarry Smith aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 940c4992f7dSBarry Smith a->singlemalloc = PETSC_TRUE; 94192c4ed94SBarry Smith 94292c4ed94SBarry Smith rp = aj + ai[row]; ap = aa + bs2*ai[row]; 94392c4ed94SBarry Smith rmax = imax[row] = imax[row] + CHUNKSIZE; 944b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 94592c4ed94SBarry Smith a->maxnz += bs2*CHUNKSIZE; 94692c4ed94SBarry Smith a->reallocs++; 94792c4ed94SBarry Smith a->nz++; 94892c4ed94SBarry Smith } 94992c4ed94SBarry Smith N = nrow++ - 1; 95092c4ed94SBarry Smith /* shift up all the later entries in this row */ 95192c4ed94SBarry Smith for (ii=N; ii>=i; ii--) { 95292c4ed94SBarry Smith rp[ii+1] = rp[ii]; 953549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 95492c4ed94SBarry Smith } 955549d3d68SSatish Balay if (N >= i) { 956549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 957549d3d68SSatish Balay } 95892c4ed94SBarry Smith rp[i] = col; 9598a84c255SSatish Balay bap = ap + bs2*i; 9600e324ae4SSatish Balay if (roworiented) { 961dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 962dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 9630e324ae4SSatish Balay bap[jj] = *value++; 964dd9472c6SBarry Smith } 965dd9472c6SBarry Smith } 9660e324ae4SSatish Balay } else { 967dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 968dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 9690e324ae4SSatish Balay *bap++ = *value++; 9700e324ae4SSatish Balay } 971dd9472c6SBarry Smith } 972dd9472c6SBarry Smith } 973f1241b54SBarry Smith noinsert2:; 97492c4ed94SBarry Smith low = i; 97592c4ed94SBarry Smith } 97692c4ed94SBarry Smith ailen[row] = nrow; 97792c4ed94SBarry Smith } 9783a40ed3dSBarry Smith PetscFunctionReturn(0); 97992c4ed94SBarry Smith } 98092c4ed94SBarry Smith 9814a2ae208SSatish Balay #undef __FUNCT__ 9824a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 9838f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 984584200bdSSatish Balay { 985584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 986584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 987273d9f13SBarry Smith int m = A->m,*ip,N,*ailen = a->ilen; 988549d3d68SSatish Balay int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 9893f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 990a56a16c8SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 991a56a16c8SHong Zhang PetscTruth flag; 992a56a16c8SHong Zhang #endif 993584200bdSSatish Balay 9943a40ed3dSBarry Smith PetscFunctionBegin; 9953a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 996584200bdSSatish Balay 99743ee02c3SBarry Smith if (m) rmax = ailen[0]; 998584200bdSSatish Balay for (i=1; i<mbs; i++) { 999584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 1000584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 1001d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 1002584200bdSSatish Balay if (fshift) { 1003a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1004584200bdSSatish Balay N = ailen[i]; 1005584200bdSSatish Balay for (j=0; j<N; j++) { 1006584200bdSSatish Balay ip[j-fshift] = ip[j]; 1007549d3d68SSatish Balay ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1008584200bdSSatish Balay } 1009584200bdSSatish Balay } 1010584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 1011584200bdSSatish Balay } 1012584200bdSSatish Balay if (mbs) { 1013584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1014584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1015584200bdSSatish Balay } 1016584200bdSSatish Balay /* reset ilen and imax for each row */ 1017584200bdSSatish Balay for (i=0; i<mbs; i++) { 1018584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 1019584200bdSSatish Balay } 1020a7c10996SSatish Balay a->nz = ai[mbs]; 1021584200bdSSatish Balay 1022584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 1023584200bdSSatish Balay if (fshift && a->diag) { 1024606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 1025b424e231SHong Zhang PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1026584200bdSSatish Balay a->diag = 0; 1027584200bdSSatish Balay } 1028bba1ac68SSatish Balay PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1029bba1ac68SSatish Balay PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1030b0a32e0cSBarry Smith PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1031e2f3b5e9SSatish Balay a->reallocs = 0; 10320e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 1033cf4441caSHong Zhang 1034a56a16c8SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 1035a56a16c8SHong Zhang ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1036a56a16c8SHong Zhang if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1037a56a16c8SHong Zhang #endif 1038a56a16c8SHong Zhang 10393a40ed3dSBarry Smith PetscFunctionReturn(0); 1040584200bdSSatish Balay } 1041584200bdSSatish Balay 10425a838052SSatish Balay 1043bea157c4SSatish Balay 1044bea157c4SSatish Balay /* 1045bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 1046bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1047bea157c4SSatish Balay then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1048bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 1049bea157c4SSatish Balay */ 10504a2ae208SSatish Balay #undef __FUNCT__ 10514a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1052bea157c4SSatish Balay static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1053d9b7c43dSSatish Balay { 1054bea157c4SSatish Balay int i,j,k,row; 1055bea157c4SSatish Balay PetscTruth flg; 10563a40ed3dSBarry Smith 1057433994e6SBarry Smith PetscFunctionBegin; 1058bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 1059bea157c4SSatish Balay row = idx[i]; 1060bea157c4SSatish Balay if (row%bs!=0) { /* Not the begining of a block */ 1061bea157c4SSatish Balay sizes[j] = 1; 1062bea157c4SSatish Balay i++; 1063e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1064bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1065bea157c4SSatish Balay i++; 1066bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 1067bea157c4SSatish Balay flg = PETSC_TRUE; 1068bea157c4SSatish Balay for (k=1; k<bs; k++) { 1069bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 1070bea157c4SSatish Balay flg = PETSC_FALSE; 1071bea157c4SSatish Balay break; 1072d9b7c43dSSatish Balay } 1073bea157c4SSatish Balay } 1074bea157c4SSatish Balay if (flg == PETSC_TRUE) { /* No break in the bs */ 1075bea157c4SSatish Balay sizes[j] = bs; 1076bea157c4SSatish Balay i+= bs; 1077bea157c4SSatish Balay } else { 1078bea157c4SSatish Balay sizes[j] = 1; 1079bea157c4SSatish Balay i++; 1080bea157c4SSatish Balay } 1081bea157c4SSatish Balay } 1082bea157c4SSatish Balay } 1083bea157c4SSatish Balay *bs_max = j; 10843a40ed3dSBarry Smith PetscFunctionReturn(0); 1085d9b7c43dSSatish Balay } 1086d9b7c43dSSatish Balay 10874a2ae208SSatish Balay #undef __FUNCT__ 10884a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ" 108987828ca2SBarry Smith int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1090d9b7c43dSSatish Balay { 1091d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1092b6815fffSBarry Smith int ierr,i,j,k,count,is_n,*is_idx,*rows; 1093bea157c4SSatish Balay int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 109487828ca2SBarry Smith PetscScalar zero = 0.0; 10953f1db9ecSBarry Smith MatScalar *aa; 1096d9b7c43dSSatish Balay 10973a40ed3dSBarry Smith PetscFunctionBegin; 1098d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1099b9b97703SBarry Smith ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1100d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1101d9b7c43dSSatish Balay 1102bea157c4SSatish Balay /* allocate memory for rows,sizes */ 1103b0a32e0cSBarry Smith ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1104bea157c4SSatish Balay sizes = rows + is_n; 1105bea157c4SSatish Balay 1106563b5814SBarry Smith /* copy IS values to rows, and sort them */ 1107bea157c4SSatish Balay for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1108bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1109dffd3267SBarry Smith if (baij->keepzeroedrows) { 1110dffd3267SBarry Smith for (i=0; i<is_n; i++) { sizes[i] = 1; } 1111dffd3267SBarry Smith bs_max = is_n; 1112dffd3267SBarry Smith } else { 1113bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1114dffd3267SBarry Smith } 1115b97a56abSSatish Balay ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1116bea157c4SSatish Balay 1117bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1118bea157c4SSatish Balay row = rows[j]; 1119273d9f13SBarry Smith if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1120bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1121bea157c4SSatish Balay aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1122dffd3267SBarry Smith if (sizes[i] == bs && !baij->keepzeroedrows) { 1123bea157c4SSatish Balay if (diag) { 1124bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 1125bea157c4SSatish Balay baij->ilen[row/bs] = 1; 1126bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 1127bea157c4SSatish Balay ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1128a07cd24cSSatish Balay } 1129563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 1130bea157c4SSatish Balay for (k=0; k<bs; k++) { 1131bea157c4SSatish Balay ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1132bea157c4SSatish Balay } 1133bea157c4SSatish Balay } else { /* (!diag) */ 1134bea157c4SSatish Balay baij->ilen[row/bs] = 0; 1135bea157c4SSatish Balay } /* end (!diag) */ 1136bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 1137aa482453SBarry Smith #if defined (PETSC_USE_DEBUG) 113829bbc08cSBarry Smith if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1139bea157c4SSatish Balay #endif 1140bea157c4SSatish Balay for (k=0; k<count; k++) { 1141d9b7c43dSSatish Balay aa[0] = zero; 1142d9b7c43dSSatish Balay aa += bs; 1143d9b7c43dSSatish Balay } 1144d9b7c43dSSatish Balay if (diag) { 1145f830108cSBarry Smith ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1146d9b7c43dSSatish Balay } 1147d9b7c43dSSatish Balay } 1148bea157c4SSatish Balay } 1149bea157c4SSatish Balay 1150606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 11519a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11523a40ed3dSBarry Smith PetscFunctionReturn(0); 1153d9b7c43dSSatish Balay } 11541c351548SSatish Balay 11554a2ae208SSatish Balay #undef __FUNCT__ 11564a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ" 115787828ca2SBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 11582d61bbb3SSatish Balay { 11592d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11602d61bbb3SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1161273d9f13SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 11622d61bbb3SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1163549d3d68SSatish Balay int ridx,cidx,bs2=a->bs2,ierr; 1164273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 11653f1db9ecSBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 11662d61bbb3SSatish Balay 11672d61bbb3SSatish Balay PetscFunctionBegin; 11682d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 11692d61bbb3SSatish Balay row = im[k]; brow = row/bs; 11705ef9f2a5SBarry Smith if (row < 0) continue; 1171aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1172273d9f13SBarry Smith if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 11732d61bbb3SSatish Balay #endif 11742d61bbb3SSatish Balay rp = aj + ai[brow]; 11752d61bbb3SSatish Balay ap = aa + bs2*ai[brow]; 11762d61bbb3SSatish Balay rmax = imax[brow]; 11772d61bbb3SSatish Balay nrow = ailen[brow]; 11782d61bbb3SSatish Balay low = 0; 11792d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 11805ef9f2a5SBarry Smith if (in[l] < 0) continue; 1181aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1182273d9f13SBarry Smith if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 11832d61bbb3SSatish Balay #endif 11842d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 11852d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 11862d61bbb3SSatish Balay if (roworiented) { 11875ef9f2a5SBarry Smith value = v[l + k*n]; 11882d61bbb3SSatish Balay } else { 11892d61bbb3SSatish Balay value = v[k + l*m]; 11902d61bbb3SSatish Balay } 11912d61bbb3SSatish Balay if (!sorted) low = 0; high = nrow; 11922d61bbb3SSatish Balay while (high-low > 7) { 11932d61bbb3SSatish Balay t = (low+high)/2; 11942d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 11952d61bbb3SSatish Balay else low = t; 11962d61bbb3SSatish Balay } 11972d61bbb3SSatish Balay for (i=low; i<high; i++) { 11982d61bbb3SSatish Balay if (rp[i] > bcol) break; 11992d61bbb3SSatish Balay if (rp[i] == bcol) { 12002d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 12012d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 12022d61bbb3SSatish Balay else *bap = value; 12032d61bbb3SSatish Balay goto noinsert1; 12042d61bbb3SSatish Balay } 12052d61bbb3SSatish Balay } 12062d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 120729bbc08cSBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 12082d61bbb3SSatish Balay if (nrow >= rmax) { 12092d61bbb3SSatish Balay /* there is no extra room in row, therefore enlarge */ 12102d61bbb3SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 12113f1db9ecSBarry Smith MatScalar *new_a; 12122d61bbb3SSatish Balay 121329bbc08cSBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 12142d61bbb3SSatish Balay 12152d61bbb3SSatish Balay /* Malloc new storage space */ 12163f1db9ecSBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1217b0a32e0cSBarry Smith ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 12182d61bbb3SSatish Balay new_j = (int*)(new_a + bs2*new_nz); 12192d61bbb3SSatish Balay new_i = new_j + new_nz; 12202d61bbb3SSatish Balay 12212d61bbb3SSatish Balay /* copy over old data into new slots */ 12222d61bbb3SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 12232d61bbb3SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1224549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 12252d61bbb3SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1226549d3d68SSatish Balay ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1227549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1228549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1229549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 12302d61bbb3SSatish Balay /* free up old matrix storage */ 1231606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 1232606d414cSSatish Balay if (!a->singlemalloc) { 1233606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 1234606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 1235606d414cSSatish Balay } 12362d61bbb3SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1237c4992f7dSBarry Smith a->singlemalloc = PETSC_TRUE; 12382d61bbb3SSatish Balay 12392d61bbb3SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 12402d61bbb3SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1241b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 12422d61bbb3SSatish Balay a->maxnz += bs2*CHUNKSIZE; 12432d61bbb3SSatish Balay a->reallocs++; 12442d61bbb3SSatish Balay a->nz++; 12452d61bbb3SSatish Balay } 12462d61bbb3SSatish Balay N = nrow++ - 1; 12472d61bbb3SSatish Balay /* shift up all the later entries in this row */ 12482d61bbb3SSatish Balay for (ii=N; ii>=i; ii--) { 12492d61bbb3SSatish Balay rp[ii+1] = rp[ii]; 1250549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 12512d61bbb3SSatish Balay } 1252549d3d68SSatish Balay if (N>=i) { 1253549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1254549d3d68SSatish Balay } 12552d61bbb3SSatish Balay rp[i] = bcol; 12562d61bbb3SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 12572d61bbb3SSatish Balay noinsert1:; 12582d61bbb3SSatish Balay low = i; 12592d61bbb3SSatish Balay } 12602d61bbb3SSatish Balay ailen[brow] = nrow; 12612d61bbb3SSatish Balay } 12622d61bbb3SSatish Balay PetscFunctionReturn(0); 12632d61bbb3SSatish Balay } 12642d61bbb3SSatish Balay 12652d61bbb3SSatish Balay 12664a2ae208SSatish Balay #undef __FUNCT__ 12674a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ" 12685ef9f2a5SBarry Smith int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 12692d61bbb3SSatish Balay { 12702d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 12712d61bbb3SSatish Balay Mat outA; 12722d61bbb3SSatish Balay int ierr; 1273667159a5SBarry Smith PetscTruth row_identity,col_identity; 12742d61bbb3SSatish Balay 12752d61bbb3SSatish Balay PetscFunctionBegin; 127629bbc08cSBarry Smith if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1277667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1278667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1279667159a5SBarry Smith if (!row_identity || !col_identity) { 128029bbc08cSBarry Smith SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1281667159a5SBarry Smith } 12822d61bbb3SSatish Balay 12832d61bbb3SSatish Balay outA = inA; 12842d61bbb3SSatish Balay inA->factor = FACTOR_LU; 12852d61bbb3SSatish Balay 12862d61bbb3SSatish Balay if (!a->diag) { 1287c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 12882d61bbb3SSatish Balay } 1289cf242676SKris Buschelman 1290c38d4ed2SBarry Smith a->row = row; 1291c38d4ed2SBarry Smith a->col = col; 1292c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1293c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1294c38d4ed2SBarry Smith 1295c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 12964c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1297b0a32e0cSBarry Smith PetscLogObjectParent(inA,a->icol); 1298c38d4ed2SBarry Smith 1299cf242676SKris Buschelman /* 1300cf242676SKris Buschelman Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1301cf242676SKris Buschelman for ILU(0) factorization with natural ordering 1302cf242676SKris Buschelman */ 1303cf242676SKris Buschelman if (a->bs < 8) { 1304cf242676SKris Buschelman ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1305cf242676SKris Buschelman } else { 1306c38d4ed2SBarry Smith if (!a->solve_work) { 130787828ca2SBarry Smith ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 130887828ca2SBarry Smith PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1309c38d4ed2SBarry Smith } 13102d61bbb3SSatish Balay } 13112d61bbb3SSatish Balay 1312667159a5SBarry Smith ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1313667159a5SBarry Smith 13142d61bbb3SSatish Balay PetscFunctionReturn(0); 13152d61bbb3SSatish Balay } 13164a2ae208SSatish Balay #undef __FUNCT__ 13174a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1318ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 1319ba4ca20aSSatish Balay { 1320c38d4ed2SBarry Smith static PetscTruth called = PETSC_FALSE; 1321ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 1322d132466eSBarry Smith int ierr; 1323ba4ca20aSSatish Balay 13243a40ed3dSBarry Smith PetscFunctionBegin; 1325c38d4ed2SBarry Smith if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1326d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1327d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 13283a40ed3dSBarry Smith PetscFunctionReturn(0); 1329ba4ca20aSSatish Balay } 1330d9b7c43dSSatish Balay 1331fb2e594dSBarry Smith EXTERN_C_BEGIN 13324a2ae208SSatish Balay #undef __FUNCT__ 13334a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 133427a8da17SBarry Smith int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 133527a8da17SBarry Smith { 133627a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 133714db4f74SSatish Balay int i,nz,nbs; 133827a8da17SBarry Smith 133927a8da17SBarry Smith PetscFunctionBegin; 134014db4f74SSatish Balay nz = baij->maxnz/baij->bs2; 134114db4f74SSatish Balay nbs = baij->nbs; 134227a8da17SBarry Smith for (i=0; i<nz; i++) { 134327a8da17SBarry Smith baij->j[i] = indices[i]; 134427a8da17SBarry Smith } 134527a8da17SBarry Smith baij->nz = nz; 134614db4f74SSatish Balay for (i=0; i<nbs; i++) { 134727a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 134827a8da17SBarry Smith } 134927a8da17SBarry Smith 135027a8da17SBarry Smith PetscFunctionReturn(0); 135127a8da17SBarry Smith } 1352fb2e594dSBarry Smith EXTERN_C_END 135327a8da17SBarry Smith 13544a2ae208SSatish Balay #undef __FUNCT__ 13554a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 135627a8da17SBarry Smith /*@ 135727a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 135827a8da17SBarry Smith in the matrix. 135927a8da17SBarry Smith 136027a8da17SBarry Smith Input Parameters: 136127a8da17SBarry Smith + mat - the SeqBAIJ matrix 136227a8da17SBarry Smith - indices - the column indices 136327a8da17SBarry Smith 136415091d37SBarry Smith Level: advanced 136515091d37SBarry Smith 136627a8da17SBarry Smith Notes: 136727a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 136827a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 136927a8da17SBarry Smith of the MatSetValues() operation. 137027a8da17SBarry Smith 137127a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 137227a8da17SBarry Smith MatCreateSeqBAIJ(). 137327a8da17SBarry Smith 137427a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 137527a8da17SBarry Smith 137627a8da17SBarry Smith @*/ 137727a8da17SBarry Smith int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 137827a8da17SBarry Smith { 137927a8da17SBarry Smith int ierr,(*f)(Mat,int *); 138027a8da17SBarry Smith 138127a8da17SBarry Smith PetscFunctionBegin; 138227a8da17SBarry Smith PetscValidHeaderSpecific(mat,MAT_COOKIE); 1383c134de8dSSatish Balay ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 138427a8da17SBarry Smith if (f) { 138527a8da17SBarry Smith ierr = (*f)(mat,indices);CHKERRQ(ierr); 138627a8da17SBarry Smith } else { 138729bbc08cSBarry Smith SETERRQ(1,"Wrong type of matrix to set column indices"); 138827a8da17SBarry Smith } 138927a8da17SBarry Smith PetscFunctionReturn(0); 139027a8da17SBarry Smith } 139127a8da17SBarry Smith 13924a2ae208SSatish Balay #undef __FUNCT__ 13934a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1394273d9f13SBarry Smith int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1395273d9f13SBarry Smith { 1396273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1397273d9f13SBarry Smith int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1398273d9f13SBarry Smith PetscReal atmp; 139987828ca2SBarry Smith PetscScalar *x,zero = 0.0; 1400273d9f13SBarry Smith MatScalar *aa; 1401273d9f13SBarry Smith int ncols,brow,krow,kcol; 1402273d9f13SBarry Smith 1403273d9f13SBarry Smith PetscFunctionBegin; 1404273d9f13SBarry Smith if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1405273d9f13SBarry Smith bs = a->bs; 1406273d9f13SBarry Smith aa = a->a; 1407273d9f13SBarry Smith ai = a->i; 1408273d9f13SBarry Smith aj = a->j; 1409273d9f13SBarry Smith mbs = a->mbs; 1410273d9f13SBarry Smith 1411273d9f13SBarry Smith ierr = VecSet(&zero,v);CHKERRQ(ierr); 1412273d9f13SBarry Smith ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1413273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1414273d9f13SBarry Smith if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1415273d9f13SBarry Smith for (i=0; i<mbs; i++) { 1416273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 1417273d9f13SBarry Smith brow = bs*i; 1418273d9f13SBarry Smith for (j=0; j<ncols; j++){ 1419273d9f13SBarry Smith /* bcol = bs*(*aj); */ 1420273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++){ 1421273d9f13SBarry Smith for (krow=0; krow<bs; krow++){ 1422273d9f13SBarry Smith atmp = PetscAbsScalar(*aa); aa++; 1423273d9f13SBarry Smith row = brow + krow; /* row index */ 1424273d9f13SBarry Smith /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1425273d9f13SBarry Smith if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1426273d9f13SBarry Smith } 1427273d9f13SBarry Smith } 1428273d9f13SBarry Smith aj++; 1429273d9f13SBarry Smith } 1430273d9f13SBarry Smith } 1431273d9f13SBarry Smith ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1432273d9f13SBarry Smith PetscFunctionReturn(0); 1433273d9f13SBarry Smith } 1434273d9f13SBarry Smith 14354a2ae208SSatish Balay #undef __FUNCT__ 14364a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1437273d9f13SBarry Smith int MatSetUpPreallocation_SeqBAIJ(Mat A) 1438273d9f13SBarry Smith { 1439273d9f13SBarry Smith int ierr; 1440273d9f13SBarry Smith 1441273d9f13SBarry Smith PetscFunctionBegin; 1442273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1443273d9f13SBarry Smith PetscFunctionReturn(0); 1444273d9f13SBarry Smith } 1445273d9f13SBarry Smith 14464a2ae208SSatish Balay #undef __FUNCT__ 14474a2ae208SSatish Balay #define __FUNCT__ "MatGetArray_SeqBAIJ" 144887828ca2SBarry Smith int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1449f2a5309cSSatish Balay { 1450f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1451f2a5309cSSatish Balay PetscFunctionBegin; 1452f2a5309cSSatish Balay *array = a->a; 1453f2a5309cSSatish Balay PetscFunctionReturn(0); 1454f2a5309cSSatish Balay } 1455f2a5309cSSatish Balay 14564a2ae208SSatish Balay #undef __FUNCT__ 14574a2ae208SSatish Balay #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 145887828ca2SBarry Smith int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1459f2a5309cSSatish Balay { 1460f2a5309cSSatish Balay PetscFunctionBegin; 1461f2a5309cSSatish Balay PetscFunctionReturn(0); 1462f2a5309cSSatish Balay } 1463f2a5309cSSatish Balay 146442ee4b1aSHong Zhang #include "petscblaslapack.h" 146542ee4b1aSHong Zhang #undef __FUNCT__ 146642ee4b1aSHong Zhang #define __FUNCT__ "MatAXPY_SeqBAIJ" 146742ee4b1aSHong Zhang int MatAXPY_SeqBAIJ(PetscScalar *a,Mat X,Mat Y,MatStructure str) 146842ee4b1aSHong Zhang { 146942ee4b1aSHong Zhang int ierr,one=1; 147042ee4b1aSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data; 147142ee4b1aSHong Zhang 147242ee4b1aSHong Zhang PetscFunctionBegin; 147342ee4b1aSHong Zhang if (str == SAME_NONZERO_PATTERN) { 147442ee4b1aSHong Zhang BLaxpy_(&x->nz,a,x->a,&one,y->a,&one); 147542ee4b1aSHong Zhang } else { 147642ee4b1aSHong Zhang ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr); 147742ee4b1aSHong Zhang } 147842ee4b1aSHong Zhang PetscFunctionReturn(0); 147942ee4b1aSHong Zhang } 148042ee4b1aSHong Zhang 14812593348eSBarry Smith /* -------------------------------------------------------------------*/ 1482cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1483cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 1484cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 1485cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 1486cc2dc46cSBarry Smith MatMultAdd_SeqBAIJ_N, 14877c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 14887c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 1489cc2dc46cSBarry Smith MatSolve_SeqBAIJ_N, 1490cc2dc46cSBarry Smith 0, 1491cc2dc46cSBarry Smith 0, 1492cc2dc46cSBarry Smith 0, 1493cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 1494cc2dc46cSBarry Smith 0, 1495b6490206SBarry Smith 0, 1496f2501298SSatish Balay MatTranspose_SeqBAIJ, 1497cc2dc46cSBarry Smith MatGetInfo_SeqBAIJ, 1498cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 1499cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 1500cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 1501cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 1502b6490206SBarry Smith 0, 1503cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 1504cc2dc46cSBarry Smith 0, 1505cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 1506cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 1507cc2dc46cSBarry Smith MatZeroRows_SeqBAIJ, 1508cc2dc46cSBarry Smith MatLUFactorSymbolic_SeqBAIJ, 1509cc2dc46cSBarry Smith MatLUFactorNumeric_SeqBAIJ_N, 1510cc2dc46cSBarry Smith 0, 1511cc2dc46cSBarry Smith 0, 1512273d9f13SBarry Smith MatSetUpPreallocation_SeqBAIJ, 1513cc2dc46cSBarry Smith MatILUFactorSymbolic_SeqBAIJ, 1514cc2dc46cSBarry Smith 0, 1515f2a5309cSSatish Balay MatGetArray_SeqBAIJ, 1516f2a5309cSSatish Balay MatRestoreArray_SeqBAIJ, 15172e8a6d31SBarry Smith MatDuplicate_SeqBAIJ, 1518cc2dc46cSBarry Smith 0, 1519cc2dc46cSBarry Smith 0, 1520cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 1521cc2dc46cSBarry Smith 0, 152242ee4b1aSHong Zhang MatAXPY_SeqBAIJ, 1523cc2dc46cSBarry Smith MatGetSubMatrices_SeqBAIJ, 1524cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 1525cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 1526cc2dc46cSBarry Smith 0, 1527cc2dc46cSBarry Smith MatPrintHelp_SeqBAIJ, 1528cc2dc46cSBarry Smith MatScale_SeqBAIJ, 1529cc2dc46cSBarry Smith 0, 1530cc2dc46cSBarry Smith 0, 1531cc2dc46cSBarry Smith 0, 1532cc2dc46cSBarry Smith MatGetBlockSize_SeqBAIJ, 15333b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 153492c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 1535cc2dc46cSBarry Smith 0, 1536cc2dc46cSBarry Smith 0, 1537cc2dc46cSBarry Smith 0, 1538cc2dc46cSBarry Smith 0, 1539cc2dc46cSBarry Smith 0, 1540cc2dc46cSBarry Smith 0, 1541d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 1542cc2dc46cSBarry Smith MatGetSubMatrix_SeqBAIJ, 1543b9b97703SBarry Smith MatDestroy_SeqBAIJ, 1544b9b97703SBarry Smith MatView_SeqBAIJ, 15453a64cc32SBarry Smith MatGetPetscMaps_Petsc, 1546273d9f13SBarry Smith 0, 1547273d9f13SBarry Smith 0, 1548273d9f13SBarry Smith 0, 1549273d9f13SBarry Smith 0, 1550273d9f13SBarry Smith 0, 1551273d9f13SBarry Smith 0, 1552273d9f13SBarry Smith MatGetRowMax_SeqBAIJ, 1553273d9f13SBarry Smith MatConvert_Basic}; 15542593348eSBarry Smith 15553e90b805SBarry Smith EXTERN_C_BEGIN 15564a2ae208SSatish Balay #undef __FUNCT__ 15574a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ" 15583e90b805SBarry Smith int MatStoreValues_SeqBAIJ(Mat mat) 15593e90b805SBarry Smith { 15603e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1561273d9f13SBarry Smith int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1562d132466eSBarry Smith int ierr; 15633e90b805SBarry Smith 15643e90b805SBarry Smith PetscFunctionBegin; 15653e90b805SBarry Smith if (aij->nonew != 1) { 156629bbc08cSBarry Smith SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 15673e90b805SBarry Smith } 15683e90b805SBarry Smith 15693e90b805SBarry Smith /* allocate space for values if not already there */ 15703e90b805SBarry Smith if (!aij->saved_values) { 157187828ca2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 15723e90b805SBarry Smith } 15733e90b805SBarry Smith 15743e90b805SBarry Smith /* copy values over */ 157587828ca2SBarry Smith ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 15763e90b805SBarry Smith PetscFunctionReturn(0); 15773e90b805SBarry Smith } 15783e90b805SBarry Smith EXTERN_C_END 15793e90b805SBarry Smith 15803e90b805SBarry Smith EXTERN_C_BEGIN 15814a2ae208SSatish Balay #undef __FUNCT__ 15824a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 158332e87ba3SBarry Smith int MatRetrieveValues_SeqBAIJ(Mat mat) 15843e90b805SBarry Smith { 15853e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1586273d9f13SBarry Smith int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 15873e90b805SBarry Smith 15883e90b805SBarry Smith PetscFunctionBegin; 15893e90b805SBarry Smith if (aij->nonew != 1) { 159029bbc08cSBarry Smith SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 15913e90b805SBarry Smith } 15923e90b805SBarry Smith if (!aij->saved_values) { 159329bbc08cSBarry Smith SETERRQ(1,"Must call MatStoreValues(A);first"); 15943e90b805SBarry Smith } 15953e90b805SBarry Smith 15963e90b805SBarry Smith /* copy values over */ 159787828ca2SBarry Smith ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 15983e90b805SBarry Smith PetscFunctionReturn(0); 15993e90b805SBarry Smith } 16003e90b805SBarry Smith EXTERN_C_END 16013e90b805SBarry Smith 1602273d9f13SBarry Smith EXTERN_C_BEGIN 1603273d9f13SBarry Smith extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1604273d9f13SBarry Smith EXTERN_C_END 1605273d9f13SBarry Smith 1606273d9f13SBarry Smith EXTERN_C_BEGIN 16074a2ae208SSatish Balay #undef __FUNCT__ 16084a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ" 1609273d9f13SBarry Smith int MatCreate_SeqBAIJ(Mat B) 16102593348eSBarry Smith { 1611273d9f13SBarry Smith int ierr,size; 1612b6490206SBarry Smith Mat_SeqBAIJ *b; 16133b2fbd54SBarry Smith 16143a40ed3dSBarry Smith PetscFunctionBegin; 1615273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 161629bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1617b6490206SBarry Smith 1618273d9f13SBarry Smith B->m = B->M = PetscMax(B->m,B->M); 1619273d9f13SBarry Smith B->n = B->N = PetscMax(B->n,B->N); 1620b0a32e0cSBarry Smith ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1621b0a32e0cSBarry Smith B->data = (void*)b; 1622549d3d68SSatish Balay ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1623549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 16242593348eSBarry Smith B->factor = 0; 16252593348eSBarry Smith B->lupivotthreshold = 1.0; 162690f02eecSBarry Smith B->mapping = 0; 16272593348eSBarry Smith b->row = 0; 16282593348eSBarry Smith b->col = 0; 1629e51c0b9cSSatish Balay b->icol = 0; 16302593348eSBarry Smith b->reallocs = 0; 16313e90b805SBarry Smith b->saved_values = 0; 1632cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 1633563b5814SBarry Smith b->setvalueslen = 0; 1634563b5814SBarry Smith b->setvaluescopy = PETSC_NULL; 1635563b5814SBarry Smith #endif 16368661488fSKris Buschelman b->single_precision_solves = PETSC_FALSE; 16372593348eSBarry Smith 16383a64cc32SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 16393a64cc32SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1640a5ae1ecdSBarry Smith 1641c4992f7dSBarry Smith b->sorted = PETSC_FALSE; 1642c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 16432593348eSBarry Smith b->nonew = 0; 16442593348eSBarry Smith b->diag = 0; 16452593348eSBarry Smith b->solve_work = 0; 1646de6a44a3SBarry Smith b->mult_work = 0; 16472a1b7f2aSHong Zhang B->spptr = 0; 16480e6d2581SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 1649883fce79SBarry Smith b->keepzeroedrows = PETSC_FALSE; 16504e220ebcSLois Curfman McInnes 1651f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 16523e90b805SBarry Smith "MatStoreValues_SeqBAIJ", 1653bc4b532fSSatish Balay MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1654f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 16553e90b805SBarry Smith "MatRetrieveValues_SeqBAIJ", 1656bc4b532fSSatish Balay MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1657f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 165827a8da17SBarry Smith "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1659bc4b532fSSatish Balay MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1660273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1661273d9f13SBarry Smith "MatConvert_SeqBAIJ_SeqAIJ", 1662273d9f13SBarry Smith MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 16633a40ed3dSBarry Smith PetscFunctionReturn(0); 16642593348eSBarry Smith } 1665273d9f13SBarry Smith EXTERN_C_END 16662593348eSBarry Smith 16674a2ae208SSatish Balay #undef __FUNCT__ 16684a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_SeqBAIJ" 16692e8a6d31SBarry Smith int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 16702593348eSBarry Smith { 16712593348eSBarry Smith Mat C; 1672b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 167327a8da17SBarry Smith int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1674de6a44a3SBarry Smith 16753a40ed3dSBarry Smith PetscFunctionBegin; 167629bbc08cSBarry Smith if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 16772593348eSBarry Smith 16782593348eSBarry Smith *B = 0; 1679273d9f13SBarry Smith ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1680273d9f13SBarry Smith ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1681273d9f13SBarry Smith c = (Mat_SeqBAIJ*)C->data; 168244cd7ae7SLois Curfman McInnes 1683b6490206SBarry Smith c->bs = a->bs; 16849df24120SSatish Balay c->bs2 = a->bs2; 1685b6490206SBarry Smith c->mbs = a->mbs; 1686b6490206SBarry Smith c->nbs = a->nbs; 168735d8aa7fSBarry Smith ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 16882593348eSBarry Smith 1689b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1690b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1691b6490206SBarry Smith for (i=0; i<mbs; i++) { 16922593348eSBarry Smith c->imax[i] = a->imax[i]; 16932593348eSBarry Smith c->ilen[i] = a->ilen[i]; 16942593348eSBarry Smith } 16952593348eSBarry Smith 16962593348eSBarry Smith /* allocate the matrix space */ 1697c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 16983f1db9ecSBarry Smith len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1699b0a32e0cSBarry Smith ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 17007e67e3f9SSatish Balay c->j = (int*)(c->a + nz*bs2); 1701de6a44a3SBarry Smith c->i = c->j + nz; 1702549d3d68SSatish Balay ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1703b6490206SBarry Smith if (mbs > 0) { 1704549d3d68SSatish Balay ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 17052e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 1706549d3d68SSatish Balay ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 17072e8a6d31SBarry Smith } else { 1708549d3d68SSatish Balay ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 17092593348eSBarry Smith } 17102593348eSBarry Smith } 17112593348eSBarry Smith 1712b0a32e0cSBarry Smith PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 17132593348eSBarry Smith c->sorted = a->sorted; 17142593348eSBarry Smith c->roworiented = a->roworiented; 17152593348eSBarry Smith c->nonew = a->nonew; 17162593348eSBarry Smith 17172593348eSBarry Smith if (a->diag) { 1718b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1719b0a32e0cSBarry Smith PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1720b6490206SBarry Smith for (i=0; i<mbs; i++) { 17212593348eSBarry Smith c->diag[i] = a->diag[i]; 17222593348eSBarry Smith } 172398305bb5SBarry Smith } else c->diag = 0; 17242593348eSBarry Smith c->nz = a->nz; 17252593348eSBarry Smith c->maxnz = a->maxnz; 17262593348eSBarry Smith c->solve_work = 0; 17272a1b7f2aSHong Zhang C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 17287fc0212eSBarry Smith c->mult_work = 0; 1729273d9f13SBarry Smith C->preallocated = PETSC_TRUE; 1730273d9f13SBarry Smith C->assembled = PETSC_TRUE; 17312593348eSBarry Smith *B = C; 1732b0a32e0cSBarry Smith ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 17333a40ed3dSBarry Smith PetscFunctionReturn(0); 17342593348eSBarry Smith } 17352593348eSBarry Smith 1736273d9f13SBarry Smith EXTERN_C_BEGIN 17374a2ae208SSatish Balay #undef __FUNCT__ 17384a2ae208SSatish Balay #define __FUNCT__ "MatLoad_SeqBAIJ" 1739b0a32e0cSBarry Smith int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 17402593348eSBarry Smith { 1741b6490206SBarry Smith Mat_SeqBAIJ *a; 17422593348eSBarry Smith Mat B; 1743f1af5d2fSBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1744b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 174535aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1746a7c10996SSatish Balay int *masked,nmask,tmp,bs2,ishift; 174787828ca2SBarry Smith PetscScalar *aa; 174819bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 17492593348eSBarry Smith 17503a40ed3dSBarry Smith PetscFunctionBegin; 1751b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1752de6a44a3SBarry Smith bs2 = bs*bs; 1753b6490206SBarry Smith 1754d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 175529bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1756b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 17570752156aSBarry Smith ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1758552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 17592593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 17602593348eSBarry Smith 1761d64ed03dSBarry Smith if (header[3] < 0) { 176229bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1763d64ed03dSBarry Smith } 1764d64ed03dSBarry Smith 176529bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 176635aab85fSBarry Smith 176735aab85fSBarry Smith /* 176835aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 176935aab85fSBarry Smith divisible by the blocksize 177035aab85fSBarry Smith */ 1771b6490206SBarry Smith mbs = M/bs; 177235aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 177335aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 177435aab85fSBarry Smith else mbs++; 177535aab85fSBarry Smith if (extra_rows) { 1776b0a32e0cSBarry Smith PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 177735aab85fSBarry Smith } 1778b6490206SBarry Smith 17792593348eSBarry Smith /* read in row lengths */ 1780b0a32e0cSBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 17810752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 178235aab85fSBarry Smith for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 17832593348eSBarry Smith 1784b6490206SBarry Smith /* read in column indices */ 1785b0a32e0cSBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 17860752156aSBarry Smith ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 178735aab85fSBarry Smith for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1788b6490206SBarry Smith 1789b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 1790b0a32e0cSBarry Smith ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1791549d3d68SSatish Balay ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1792b0a32e0cSBarry Smith ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1793549d3d68SSatish Balay ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 179435aab85fSBarry Smith masked = mask + mbs; 1795b6490206SBarry Smith rowcount = 0; nzcount = 0; 1796b6490206SBarry Smith for (i=0; i<mbs; i++) { 179735aab85fSBarry Smith nmask = 0; 1798b6490206SBarry Smith for (j=0; j<bs; j++) { 1799b6490206SBarry Smith kmax = rowlengths[rowcount]; 1800b6490206SBarry Smith for (k=0; k<kmax; k++) { 180135aab85fSBarry Smith tmp = jj[nzcount++]/bs; 180235aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1803b6490206SBarry Smith } 1804b6490206SBarry Smith rowcount++; 1805b6490206SBarry Smith } 180635aab85fSBarry Smith browlengths[i] += nmask; 180735aab85fSBarry Smith /* zero out the mask elements we set */ 180835aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1809b6490206SBarry Smith } 1810b6490206SBarry Smith 18112593348eSBarry Smith /* create our matrix */ 18123eee16b0SBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 18132593348eSBarry Smith B = *A; 1814b6490206SBarry Smith a = (Mat_SeqBAIJ*)B->data; 18152593348eSBarry Smith 18162593348eSBarry Smith /* set matrix "i" values */ 1817de6a44a3SBarry Smith a->i[0] = 0; 1818b6490206SBarry Smith for (i=1; i<= mbs; i++) { 1819b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 1820b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 18212593348eSBarry Smith } 1822b6490206SBarry Smith a->nz = 0; 1823b6490206SBarry Smith for (i=0; i<mbs; i++) a->nz += browlengths[i]; 18242593348eSBarry Smith 1825b6490206SBarry Smith /* read in nonzero values */ 182687828ca2SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 18270752156aSBarry Smith ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 182835aab85fSBarry Smith for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1829b6490206SBarry Smith 1830b6490206SBarry Smith /* set "a" and "j" values into matrix */ 1831b6490206SBarry Smith nzcount = 0; jcount = 0; 1832b6490206SBarry Smith for (i=0; i<mbs; i++) { 1833b6490206SBarry Smith nzcountb = nzcount; 183435aab85fSBarry Smith nmask = 0; 1835b6490206SBarry Smith for (j=0; j<bs; j++) { 1836b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1837b6490206SBarry Smith for (k=0; k<kmax; k++) { 183835aab85fSBarry Smith tmp = jj[nzcount++]/bs; 183935aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1840b6490206SBarry Smith } 1841b6490206SBarry Smith } 1842de6a44a3SBarry Smith /* sort the masked values */ 1843433994e6SBarry Smith ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1844de6a44a3SBarry Smith 1845b6490206SBarry Smith /* set "j" values into matrix */ 1846b6490206SBarry Smith maskcount = 1; 184735aab85fSBarry Smith for (j=0; j<nmask; j++) { 184835aab85fSBarry Smith a->j[jcount++] = masked[j]; 1849de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 1850b6490206SBarry Smith } 1851b6490206SBarry Smith /* set "a" values into matrix */ 1852de6a44a3SBarry Smith ishift = bs2*a->i[i]; 1853b6490206SBarry Smith for (j=0; j<bs; j++) { 1854b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1855b6490206SBarry Smith for (k=0; k<kmax; k++) { 1856de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 1857de6a44a3SBarry Smith block = mask[tmp] - 1; 1858de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 1859de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 1860375fe846SBarry Smith a->a[idx] = (MatScalar)aa[nzcountb++]; 1861b6490206SBarry Smith } 1862b6490206SBarry Smith } 186335aab85fSBarry Smith /* zero out the mask elements we set */ 186435aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1865b6490206SBarry Smith } 186629bbc08cSBarry Smith if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1867b6490206SBarry Smith 1868606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1869606d414cSSatish Balay ierr = PetscFree(browlengths);CHKERRQ(ierr); 1870606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 1871606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 1872606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 1873b6490206SBarry Smith 1874b6490206SBarry Smith B->assembled = PETSC_TRUE; 1875de6a44a3SBarry Smith 18769c01be13SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 18773a40ed3dSBarry Smith PetscFunctionReturn(0); 18782593348eSBarry Smith } 1879273d9f13SBarry Smith EXTERN_C_END 18802593348eSBarry Smith 18814a2ae208SSatish Balay #undef __FUNCT__ 18824a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ" 1883273d9f13SBarry Smith /*@C 1884273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1885273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 1886273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 1887273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 1888273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 18892593348eSBarry Smith 1890273d9f13SBarry Smith Collective on MPI_Comm 1891273d9f13SBarry Smith 1892273d9f13SBarry Smith Input Parameters: 1893273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 1894273d9f13SBarry Smith . bs - size of block 1895273d9f13SBarry Smith . m - number of rows 1896273d9f13SBarry Smith . n - number of columns 189735d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 189835d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 1899273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 1900273d9f13SBarry Smith 1901273d9f13SBarry Smith Output Parameter: 1902273d9f13SBarry Smith . A - the matrix 1903273d9f13SBarry Smith 1904273d9f13SBarry Smith Options Database Keys: 1905273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 1906273d9f13SBarry Smith block calculations (much slower) 1907273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 1908273d9f13SBarry Smith 1909273d9f13SBarry Smith Level: intermediate 1910273d9f13SBarry Smith 1911273d9f13SBarry Smith Notes: 191235d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 191335d8aa7fSBarry Smith 1914273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 1915273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 1916273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 1917273d9f13SBarry Smith 1918273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 1919273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1920273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 1921273d9f13SBarry Smith matrices. 1922273d9f13SBarry Smith 1923273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1924273d9f13SBarry Smith @*/ 1925273d9f13SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1926273d9f13SBarry Smith { 1927273d9f13SBarry Smith int ierr; 1928273d9f13SBarry Smith 1929273d9f13SBarry Smith PetscFunctionBegin; 1930273d9f13SBarry Smith ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1931273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1932273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1933273d9f13SBarry Smith PetscFunctionReturn(0); 1934273d9f13SBarry Smith } 1935273d9f13SBarry Smith 19364a2ae208SSatish Balay #undef __FUNCT__ 19374a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1938273d9f13SBarry Smith /*@C 1939273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1940273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 1941273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 1942273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 1943273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 1944273d9f13SBarry Smith 1945273d9f13SBarry Smith Collective on MPI_Comm 1946273d9f13SBarry Smith 1947273d9f13SBarry Smith Input Parameters: 1948273d9f13SBarry Smith + A - the matrix 1949273d9f13SBarry Smith . bs - size of block 1950273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 1951273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 1952273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 1953273d9f13SBarry Smith 1954273d9f13SBarry Smith Options Database Keys: 1955273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 1956273d9f13SBarry Smith block calculations (much slower) 1957273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 1958273d9f13SBarry Smith 1959273d9f13SBarry Smith Level: intermediate 1960273d9f13SBarry Smith 1961273d9f13SBarry Smith Notes: 1962273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 1963273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 1964273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 1965273d9f13SBarry Smith 1966273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 1967273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1968273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 1969273d9f13SBarry Smith matrices. 1970273d9f13SBarry Smith 1971273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1972273d9f13SBarry Smith @*/ 1973273d9f13SBarry Smith int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1974273d9f13SBarry Smith { 1975273d9f13SBarry Smith Mat_SeqBAIJ *b; 1976273d9f13SBarry Smith int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1977273d9f13SBarry Smith PetscTruth flg; 1978273d9f13SBarry Smith 1979273d9f13SBarry Smith PetscFunctionBegin; 1980273d9f13SBarry Smith ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1981273d9f13SBarry Smith if (!flg) PetscFunctionReturn(0); 1982273d9f13SBarry Smith 1983273d9f13SBarry Smith B->preallocated = PETSC_TRUE; 1984b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1985273d9f13SBarry Smith if (nnz && newbs != bs) { 1986273d9f13SBarry Smith SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1987273d9f13SBarry Smith } 1988273d9f13SBarry Smith bs = newbs; 1989273d9f13SBarry Smith 1990273d9f13SBarry Smith mbs = B->m/bs; 1991273d9f13SBarry Smith nbs = B->n/bs; 1992273d9f13SBarry Smith bs2 = bs*bs; 1993273d9f13SBarry Smith 1994273d9f13SBarry Smith if (mbs*bs!=B->m || nbs*bs!=B->n) { 199535d8aa7fSBarry Smith SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1996273d9f13SBarry Smith } 1997273d9f13SBarry Smith 1998435da068SBarry Smith if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1999435da068SBarry Smith if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 2000273d9f13SBarry Smith if (nnz) { 2001273d9f13SBarry Smith for (i=0; i<mbs; i++) { 2002273d9f13SBarry Smith if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 20033a7fca6bSBarry Smith if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 2004273d9f13SBarry Smith } 2005273d9f13SBarry Smith } 2006273d9f13SBarry Smith 2007273d9f13SBarry Smith b = (Mat_SeqBAIJ*)B->data; 2008b0a32e0cSBarry Smith ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 200954138f6bSKris Buschelman B->ops->solve = MatSolve_SeqBAIJ_Update; 201054138f6bSKris Buschelman B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 2011273d9f13SBarry Smith if (!flg) { 2012273d9f13SBarry Smith switch (bs) { 2013273d9f13SBarry Smith case 1: 2014273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 2015273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_1; 2016273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2017273d9f13SBarry Smith break; 2018273d9f13SBarry Smith case 2: 2019273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2020273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_2; 2021273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2022273d9f13SBarry Smith break; 2023273d9f13SBarry Smith case 3: 2024273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2025273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_3; 2026273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2027273d9f13SBarry Smith break; 2028273d9f13SBarry Smith case 4: 2029273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2030273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_4; 2031273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2032273d9f13SBarry Smith break; 2033273d9f13SBarry Smith case 5: 2034273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2035273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_5; 2036273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2037273d9f13SBarry Smith break; 2038273d9f13SBarry Smith case 6: 2039273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2040273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_6; 2041273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2042273d9f13SBarry Smith break; 2043273d9f13SBarry Smith case 7: 204454138f6bSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2045273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_7; 2046273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2047273d9f13SBarry Smith break; 2048273d9f13SBarry Smith default: 204954138f6bSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2050273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 2051273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2052273d9f13SBarry Smith break; 2053273d9f13SBarry Smith } 2054273d9f13SBarry Smith } 2055273d9f13SBarry Smith b->bs = bs; 2056273d9f13SBarry Smith b->mbs = mbs; 2057273d9f13SBarry Smith b->nbs = nbs; 2058b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2059273d9f13SBarry Smith if (!nnz) { 2060435da068SBarry Smith if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2061273d9f13SBarry Smith else if (nz <= 0) nz = 1; 2062273d9f13SBarry Smith for (i=0; i<mbs; i++) b->imax[i] = nz; 2063273d9f13SBarry Smith nz = nz*mbs; 2064273d9f13SBarry Smith } else { 2065273d9f13SBarry Smith nz = 0; 2066273d9f13SBarry Smith for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2067273d9f13SBarry Smith } 2068273d9f13SBarry Smith 2069273d9f13SBarry Smith /* allocate the matrix space */ 2070273d9f13SBarry Smith len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2071b0a32e0cSBarry Smith ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2072273d9f13SBarry Smith ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2073273d9f13SBarry Smith b->j = (int*)(b->a + nz*bs2); 2074273d9f13SBarry Smith ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2075273d9f13SBarry Smith b->i = b->j + nz; 2076273d9f13SBarry Smith b->singlemalloc = PETSC_TRUE; 2077273d9f13SBarry Smith 2078273d9f13SBarry Smith b->i[0] = 0; 2079273d9f13SBarry Smith for (i=1; i<mbs+1; i++) { 2080273d9f13SBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 2081273d9f13SBarry Smith } 2082273d9f13SBarry Smith 2083273d9f13SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 208416d6aa21SSatish Balay ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2085b0a32e0cSBarry Smith PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2086273d9f13SBarry Smith for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2087273d9f13SBarry Smith 2088273d9f13SBarry Smith b->bs = bs; 2089273d9f13SBarry Smith b->bs2 = bs2; 2090273d9f13SBarry Smith b->mbs = mbs; 2091273d9f13SBarry Smith b->nz = 0; 2092273d9f13SBarry Smith b->maxnz = nz*bs2; 2093273d9f13SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 2094273d9f13SBarry Smith PetscFunctionReturn(0); 2095273d9f13SBarry Smith } 20962593348eSBarry Smith 2097