1bba1ac68SSatish Balay /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/ 22593348eSBarry Smith 32593348eSBarry Smith /* 4b6490206SBarry Smith Defines the basic matrix operations for the BAIJ (compressed row) 52593348eSBarry Smith matrix storage format. 62593348eSBarry Smith */ 770f55243SBarry Smith #include "src/mat/impls/baij/seq/baij.h" 81a6a6d98SBarry Smith #include "src/vec/vecimpl.h" 91a6a6d98SBarry Smith #include "src/inline/spops.h" 10325e03aeSBarry Smith #include "petscsys.h" /*I "petscmat.h" I*/ 113b547af2SSatish Balay 12af674e45SBarry Smith /* 13af674e45SBarry Smith Special version for Fun3d sequential benchmark 14af674e45SBarry Smith */ 15af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 16af674e45SBarry Smith #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 17af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 18af674e45SBarry Smith #define matsetvaluesblocked4_ matsetvaluesblocked4 19af674e45SBarry Smith #endif 20af674e45SBarry Smith 21af674e45SBarry Smith #undef __FUNCT__ 22af674e45SBarry Smith #define __FUNCT__ "matsetvaluesblocked4_" 23*4bb09213Spetsc void matsetvaluesblocked4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 24af674e45SBarry Smith { 25af674e45SBarry Smith Mat A = *AA; 26af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 27a037b02bSBarry Smith int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 2838fde467SHong Zhang int *ai=a->i,*ailen=a->ilen; 29a037b02bSBarry Smith int *aj=a->j,stepval; 30*4bb09213Spetsc PetscScalar *value = v; 31*4bb09213Spetsc MatScalar *ap,*aa = a->a,*bap; 32af674e45SBarry Smith 33af674e45SBarry Smith PetscFunctionBegin; 34af674e45SBarry Smith stepval = (n-1)*4; 35af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 36af674e45SBarry Smith row = im[k]; 37af674e45SBarry Smith rp = aj + ai[row]; 38af674e45SBarry Smith ap = aa + 16*ai[row]; 39af674e45SBarry Smith nrow = ailen[row]; 40af674e45SBarry Smith low = 0; 41af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 42af674e45SBarry Smith col = in[l]; 43af674e45SBarry Smith value = v + k*(stepval+4)*4 + l*4; 44af674e45SBarry Smith low = 0; high = nrow; 45af674e45SBarry Smith while (high-low > 7) { 46af674e45SBarry Smith t = (low+high)/2; 47af674e45SBarry Smith if (rp[t] > col) high = t; 48af674e45SBarry Smith else low = t; 49af674e45SBarry Smith } 50af674e45SBarry Smith for (i=low; i<high; i++) { 51af674e45SBarry Smith if (rp[i] > col) break; 52af674e45SBarry Smith if (rp[i] == col) { 53af674e45SBarry Smith bap = ap + 16*i; 54af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 55af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 56af674e45SBarry Smith bap[jj] += *value++; 57af674e45SBarry Smith } 58af674e45SBarry Smith } 59af674e45SBarry Smith goto noinsert2; 60af674e45SBarry Smith } 61af674e45SBarry Smith } 62af674e45SBarry Smith N = nrow++ - 1; 63af674e45SBarry Smith /* shift up all the later entries in this row */ 64af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 65af674e45SBarry Smith rp[ii+1] = rp[ii]; 66a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 67af674e45SBarry Smith } 68af674e45SBarry Smith if (N >= i) { 69a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 70af674e45SBarry Smith } 71af674e45SBarry Smith rp[i] = col; 72af674e45SBarry Smith bap = ap + 16*i; 73af674e45SBarry Smith for (ii=0; ii<4; ii++,value+=stepval) { 74af674e45SBarry Smith for (jj=ii; jj<16; jj+=4) { 75af674e45SBarry Smith bap[jj] = *value++; 76af674e45SBarry Smith } 77af674e45SBarry Smith } 78af674e45SBarry Smith noinsert2:; 79af674e45SBarry Smith low = i; 80af674e45SBarry Smith } 81af674e45SBarry Smith ailen[row] = nrow; 82af674e45SBarry Smith } 83af674e45SBarry Smith } 84af674e45SBarry Smith 85af674e45SBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 86af674e45SBarry Smith #define matsetvalues4_ MATSETVALUES4 87af674e45SBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 88af674e45SBarry Smith #define matsetvalues4_ matsetvalues4 89af674e45SBarry Smith #endif 90af674e45SBarry Smith 91af674e45SBarry Smith #undef __FUNCT__ 92af674e45SBarry Smith #define __FUNCT__ "MatSetValues4_" 93*4bb09213Spetsc void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v) 94af674e45SBarry Smith { 95af674e45SBarry Smith Mat A = *AA; 96af674e45SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 97a037b02bSBarry Smith int *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 9838fde467SHong Zhang int *ai=a->i,*ailen=a->ilen; 99af674e45SBarry Smith int *aj=a->j,brow,bcol; 10038fde467SHong Zhang int ridx,cidx; 101af674e45SBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 102af674e45SBarry Smith 103af674e45SBarry Smith PetscFunctionBegin; 104af674e45SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 105af674e45SBarry Smith row = im[k]; brow = row/4; 106af674e45SBarry Smith rp = aj + ai[brow]; 107af674e45SBarry Smith ap = aa + 16*ai[brow]; 108af674e45SBarry Smith nrow = ailen[brow]; 109af674e45SBarry Smith low = 0; 110af674e45SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 111af674e45SBarry Smith col = in[l]; bcol = col/4; 112af674e45SBarry Smith ridx = row % 4; cidx = col % 4; 113af674e45SBarry Smith value = v[l + k*n]; 114af674e45SBarry Smith low = 0; high = nrow; 115af674e45SBarry Smith while (high-low > 7) { 116af674e45SBarry Smith t = (low+high)/2; 117af674e45SBarry Smith if (rp[t] > bcol) high = t; 118af674e45SBarry Smith else low = t; 119af674e45SBarry Smith } 120af674e45SBarry Smith for (i=low; i<high; i++) { 121af674e45SBarry Smith if (rp[i] > bcol) break; 122af674e45SBarry Smith if (rp[i] == bcol) { 123af674e45SBarry Smith bap = ap + 16*i + 4*cidx + ridx; 124af674e45SBarry Smith *bap += value; 125af674e45SBarry Smith goto noinsert1; 126af674e45SBarry Smith } 127af674e45SBarry Smith } 128af674e45SBarry Smith N = nrow++ - 1; 129af674e45SBarry Smith /* shift up all the later entries in this row */ 130af674e45SBarry Smith for (ii=N; ii>=i; ii--) { 131af674e45SBarry Smith rp[ii+1] = rp[ii]; 132a037b02bSBarry Smith PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 133af674e45SBarry Smith } 134af674e45SBarry Smith if (N>=i) { 135a037b02bSBarry Smith PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 136af674e45SBarry Smith } 137af674e45SBarry Smith rp[i] = bcol; 138af674e45SBarry Smith ap[16*i + 4*cidx + ridx] = value; 139af674e45SBarry Smith noinsert1:; 140af674e45SBarry Smith low = i; 141af674e45SBarry Smith } 142af674e45SBarry Smith ailen[brow] = nrow; 143af674e45SBarry Smith } 144af674e45SBarry Smith } 145af674e45SBarry Smith 14695d5f7c2SBarry Smith /* UGLY, ugly, ugly 14787828ca2SBarry Smith When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does 1483477af42SKris Buschelman not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and 14995d5f7c2SBarry Smith inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ() 15095d5f7c2SBarry Smith converts the entries into single precision and then calls ..._MatScalar() to put them 15195d5f7c2SBarry Smith into the single precision data structures. 15295d5f7c2SBarry Smith */ 15395d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 154ca44d042SBarry Smith EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 15595d5f7c2SBarry Smith #else 15695d5f7c2SBarry Smith #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ 15795d5f7c2SBarry Smith #endif 15804929863SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 15904929863SHong Zhang EXTERN int MatUseDSCPACK_MPIBAIJ(Mat); 16004929863SHong Zhang #endif 16195d5f7c2SBarry Smith 1622d61bbb3SSatish Balay #define CHUNKSIZE 10 163de6a44a3SBarry Smith 164be5855fcSBarry Smith /* 165be5855fcSBarry Smith Checks for missing diagonals 166be5855fcSBarry Smith */ 1674a2ae208SSatish Balay #undef __FUNCT__ 1684a2ae208SSatish Balay #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 169c4992f7dSBarry Smith int MatMissingDiagonal_SeqBAIJ(Mat A) 170be5855fcSBarry Smith { 171be5855fcSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 172883fce79SBarry Smith int *diag,*jj = a->j,i,ierr; 173be5855fcSBarry Smith 174be5855fcSBarry Smith PetscFunctionBegin; 175c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 176883fce79SBarry Smith diag = a->diag; 1770e8e8aceSBarry Smith for (i=0; i<a->mbs; i++) { 178be5855fcSBarry Smith if (jj[diag[i]] != i) { 17929bbc08cSBarry Smith SETERRQ1(1,"Matrix is missing diagonal number %d",i); 180be5855fcSBarry Smith } 181be5855fcSBarry Smith } 182be5855fcSBarry Smith PetscFunctionReturn(0); 183be5855fcSBarry Smith } 184be5855fcSBarry Smith 1854a2ae208SSatish Balay #undef __FUNCT__ 1864a2ae208SSatish Balay #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 187c4992f7dSBarry Smith int MatMarkDiagonal_SeqBAIJ(Mat A) 188de6a44a3SBarry Smith { 189de6a44a3SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 19082502324SSatish Balay int i,j,*diag,m = a->mbs,ierr; 191de6a44a3SBarry Smith 1923a40ed3dSBarry Smith PetscFunctionBegin; 193883fce79SBarry Smith if (a->diag) PetscFunctionReturn(0); 194883fce79SBarry Smith 195b0a32e0cSBarry Smith ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr); 196b0a32e0cSBarry Smith PetscLogObjectMemory(A,(m+1)*sizeof(int)); 1977fc0212eSBarry Smith for (i=0; i<m; i++) { 198ecc615b2SBarry Smith diag[i] = a->i[i+1]; 199de6a44a3SBarry Smith for (j=a->i[i]; j<a->i[i+1]; j++) { 200de6a44a3SBarry Smith if (a->j[j] == i) { 201de6a44a3SBarry Smith diag[i] = j; 202de6a44a3SBarry Smith break; 203de6a44a3SBarry Smith } 204de6a44a3SBarry Smith } 205de6a44a3SBarry Smith } 206de6a44a3SBarry Smith a->diag = diag; 2073a40ed3dSBarry Smith PetscFunctionReturn(0); 208de6a44a3SBarry Smith } 2092593348eSBarry Smith 2102593348eSBarry Smith 211ca44d042SBarry Smith EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**); 2123b2fbd54SBarry Smith 2134a2ae208SSatish Balay #undef __FUNCT__ 2144a2ae208SSatish Balay #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 2150e6d2581SBarry Smith static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 2163b2fbd54SBarry Smith { 2173b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2183b2fbd54SBarry Smith int ierr,n = a->mbs,i; 2193b2fbd54SBarry Smith 2203a40ed3dSBarry Smith PetscFunctionBegin; 2213b2fbd54SBarry Smith *nn = n; 2223a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 2233b2fbd54SBarry Smith if (symmetric) { 2243b2fbd54SBarry Smith ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr); 2253b2fbd54SBarry Smith } else if (oshift == 1) { 2263b2fbd54SBarry Smith /* temporarily add 1 to i and j indices */ 227435da068SBarry Smith int nz = a->i[n]; 2283b2fbd54SBarry Smith for (i=0; i<nz; i++) a->j[i]++; 2293b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]++; 2303b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 2313b2fbd54SBarry Smith } else { 2323b2fbd54SBarry Smith *ia = a->i; *ja = a->j; 2333b2fbd54SBarry Smith } 2343b2fbd54SBarry Smith 2353a40ed3dSBarry Smith PetscFunctionReturn(0); 2363b2fbd54SBarry Smith } 2373b2fbd54SBarry Smith 2384a2ae208SSatish Balay #undef __FUNCT__ 2394a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 240435da068SBarry Smith static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int **ia,int **ja,PetscTruth *done) 2413b2fbd54SBarry Smith { 2423b2fbd54SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 243606d414cSSatish Balay int i,n = a->mbs,ierr; 2443b2fbd54SBarry Smith 2453a40ed3dSBarry Smith PetscFunctionBegin; 2463a40ed3dSBarry Smith if (!ia) PetscFunctionReturn(0); 2473b2fbd54SBarry Smith if (symmetric) { 248606d414cSSatish Balay ierr = PetscFree(*ia);CHKERRQ(ierr); 249606d414cSSatish Balay ierr = PetscFree(*ja);CHKERRQ(ierr); 250af5da2bfSBarry Smith } else if (oshift == 1) { 251435da068SBarry Smith int nz = a->i[n]-1; 2523b2fbd54SBarry Smith for (i=0; i<nz; i++) a->j[i]--; 2533b2fbd54SBarry Smith for (i=0; i<n+1; i++) a->i[i]--; 2543b2fbd54SBarry Smith } 2553a40ed3dSBarry Smith PetscFunctionReturn(0); 2563b2fbd54SBarry Smith } 2573b2fbd54SBarry Smith 2584a2ae208SSatish Balay #undef __FUNCT__ 2594a2ae208SSatish Balay #define __FUNCT__ "MatGetBlockSize_SeqBAIJ" 2602d61bbb3SSatish Balay int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs) 2612d61bbb3SSatish Balay { 2622d61bbb3SSatish Balay Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2632d61bbb3SSatish Balay 2642d61bbb3SSatish Balay PetscFunctionBegin; 2652d61bbb3SSatish Balay *bs = baij->bs; 2662d61bbb3SSatish Balay PetscFunctionReturn(0); 2672d61bbb3SSatish Balay } 2682d61bbb3SSatish Balay 2692d61bbb3SSatish Balay 2704a2ae208SSatish Balay #undef __FUNCT__ 2714a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_SeqBAIJ" 272e1311b90SBarry Smith int MatDestroy_SeqBAIJ(Mat A) 2732d61bbb3SSatish Balay { 2742d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 275e51c0b9cSSatish Balay int ierr; 2762d61bbb3SSatish Balay 277433994e6SBarry Smith PetscFunctionBegin; 278aa482453SBarry Smith #if defined(PETSC_USE_LOG) 279b0a32e0cSBarry Smith PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz); 2802d61bbb3SSatish Balay #endif 281606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 282606d414cSSatish Balay if (!a->singlemalloc) { 283606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 284606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 285606d414cSSatish Balay } 286c38d4ed2SBarry Smith if (a->row) { 287c38d4ed2SBarry Smith ierr = ISDestroy(a->row);CHKERRQ(ierr); 288c38d4ed2SBarry Smith } 289c38d4ed2SBarry Smith if (a->col) { 290c38d4ed2SBarry Smith ierr = ISDestroy(a->col);CHKERRQ(ierr); 291c38d4ed2SBarry Smith } 292606d414cSSatish Balay if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 293606d414cSSatish Balay if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);} 294606d414cSSatish Balay if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);} 295606d414cSSatish Balay if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);} 296606d414cSSatish Balay if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);} 297e51c0b9cSSatish Balay if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);} 298606d414cSSatish Balay if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);} 299563b5814SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 300563b5814SBarry Smith if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);} 301563b5814SBarry Smith #endif 302606d414cSSatish Balay ierr = PetscFree(a);CHKERRQ(ierr); 3032d61bbb3SSatish Balay PetscFunctionReturn(0); 3042d61bbb3SSatish Balay } 3052d61bbb3SSatish Balay 3064a2ae208SSatish Balay #undef __FUNCT__ 3074a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_SeqBAIJ" 3082d61bbb3SSatish Balay int MatSetOption_SeqBAIJ(Mat A,MatOption op) 3092d61bbb3SSatish Balay { 3102d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 3112d61bbb3SSatish Balay 3122d61bbb3SSatish Balay PetscFunctionBegin; 313aa275fccSKris Buschelman switch (op) { 314aa275fccSKris Buschelman case MAT_ROW_ORIENTED: 315aa275fccSKris Buschelman a->roworiented = PETSC_TRUE; 316aa275fccSKris Buschelman break; 317aa275fccSKris Buschelman case MAT_COLUMN_ORIENTED: 318aa275fccSKris Buschelman a->roworiented = PETSC_FALSE; 319aa275fccSKris Buschelman break; 320aa275fccSKris Buschelman case MAT_COLUMNS_SORTED: 321aa275fccSKris Buschelman a->sorted = PETSC_TRUE; 322aa275fccSKris Buschelman break; 323aa275fccSKris Buschelman case MAT_COLUMNS_UNSORTED: 324aa275fccSKris Buschelman a->sorted = PETSC_FALSE; 325aa275fccSKris Buschelman break; 326aa275fccSKris Buschelman case MAT_KEEP_ZEROED_ROWS: 327aa275fccSKris Buschelman a->keepzeroedrows = PETSC_TRUE; 328aa275fccSKris Buschelman break; 329aa275fccSKris Buschelman case MAT_NO_NEW_NONZERO_LOCATIONS: 330aa275fccSKris Buschelman a->nonew = 1; 331aa275fccSKris Buschelman break; 332aa275fccSKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 333aa275fccSKris Buschelman a->nonew = -1; 334aa275fccSKris Buschelman break; 335aa275fccSKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 336aa275fccSKris Buschelman a->nonew = -2; 337aa275fccSKris Buschelman break; 338aa275fccSKris Buschelman case MAT_YES_NEW_NONZERO_LOCATIONS: 339aa275fccSKris Buschelman a->nonew = 0; 340aa275fccSKris Buschelman break; 341aa275fccSKris Buschelman case MAT_ROWS_SORTED: 342aa275fccSKris Buschelman case MAT_ROWS_UNSORTED: 343aa275fccSKris Buschelman case MAT_YES_NEW_DIAGONALS: 344aa275fccSKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 345aa275fccSKris Buschelman case MAT_USE_HASH_TABLE: 346b0a32e0cSBarry Smith PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n"); 347aa275fccSKris Buschelman break; 348aa275fccSKris Buschelman case MAT_NO_NEW_DIAGONALS: 34929bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS"); 350bd648c37SKris Buschelman case MAT_USE_SINGLE_PRECISION_SOLVES: 351bd648c37SKris Buschelman if (a->bs==4) { 35294ee7fc8SKris Buschelman PetscTruth sse_enabled_local,sse_enabled_global; 353bd648c37SKris Buschelman int ierr; 35494ee7fc8SKris Buschelman 35594ee7fc8SKris Buschelman sse_enabled_local = PETSC_FALSE; 35694ee7fc8SKris Buschelman sse_enabled_global = PETSC_FALSE; 35794ee7fc8SKris Buschelman 35894ee7fc8SKris Buschelman ierr = PetscSSEIsEnabled(A->comm,&sse_enabled_local,&sse_enabled_global);CHKERRQ(ierr); 359e64df4b9SKris Buschelman #if defined(PETSC_HAVE_SSE) 36094ee7fc8SKris Buschelman if (sse_enabled_local) { 36154138f6bSKris Buschelman a->single_precision_solves = PETSC_TRUE; 36254138f6bSKris Buschelman A->ops->solve = MatSolve_SeqBAIJ_Update; 36354138f6bSKris Buschelman A->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 364cf242676SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES set\n"); 36554138f6bSKris Buschelman break; 36694ee7fc8SKris Buschelman } else { 36794ee7fc8SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 3688661488fSKris Buschelman } 369e64df4b9SKris Buschelman #else 370bd648c37SKris Buschelman PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option MAT_USE_SINGLE_PRECISION_SOLVES ignored\n"); 371e64df4b9SKris Buschelman #endif 372bd648c37SKris Buschelman } 373bd648c37SKris Buschelman break; 374aa275fccSKris Buschelman default: 37529bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"unknown option"); 3762d61bbb3SSatish Balay } 3772d61bbb3SSatish Balay PetscFunctionReturn(0); 3782d61bbb3SSatish Balay } 3792d61bbb3SSatish Balay 3804a2ae208SSatish Balay #undef __FUNCT__ 3814a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_SeqBAIJ" 38287828ca2SBarry Smith int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 3832d61bbb3SSatish Balay { 3842d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 38582502324SSatish Balay int itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr; 3863f1db9ecSBarry Smith MatScalar *aa,*aa_i; 38787828ca2SBarry Smith PetscScalar *v_i; 3882d61bbb3SSatish Balay 3892d61bbb3SSatish Balay PetscFunctionBegin; 3902d61bbb3SSatish Balay bs = a->bs; 3912d61bbb3SSatish Balay ai = a->i; 3922d61bbb3SSatish Balay aj = a->j; 3932d61bbb3SSatish Balay aa = a->a; 3942d61bbb3SSatish Balay bs2 = a->bs2; 3952d61bbb3SSatish Balay 396273d9f13SBarry Smith if (row < 0 || row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row out of range"); 3972d61bbb3SSatish Balay 3982d61bbb3SSatish Balay bn = row/bs; /* Block number */ 3992d61bbb3SSatish Balay bp = row % bs; /* Block Position */ 4002d61bbb3SSatish Balay M = ai[bn+1] - ai[bn]; 4012d61bbb3SSatish Balay *nz = bs*M; 4022d61bbb3SSatish Balay 4032d61bbb3SSatish Balay if (v) { 4042d61bbb3SSatish Balay *v = 0; 4052d61bbb3SSatish Balay if (*nz) { 40687828ca2SBarry Smith ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr); 4072d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 4082d61bbb3SSatish Balay v_i = *v + i*bs; 4092d61bbb3SSatish Balay aa_i = aa + bs2*(ai[bn] + i); 4102d61bbb3SSatish Balay for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];} 4112d61bbb3SSatish Balay } 4122d61bbb3SSatish Balay } 4132d61bbb3SSatish Balay } 4142d61bbb3SSatish Balay 4152d61bbb3SSatish Balay if (idx) { 4162d61bbb3SSatish Balay *idx = 0; 4172d61bbb3SSatish Balay if (*nz) { 418b0a32e0cSBarry Smith ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr); 4192d61bbb3SSatish Balay for (i=0; i<M; i++) { /* for each block in the block row */ 4202d61bbb3SSatish Balay idx_i = *idx + i*bs; 4212d61bbb3SSatish Balay itmp = bs*aj[ai[bn] + i]; 4222d61bbb3SSatish Balay for (j=0; j<bs; j++) {idx_i[j] = itmp++;} 4232d61bbb3SSatish Balay } 4242d61bbb3SSatish Balay } 4252d61bbb3SSatish Balay } 4262d61bbb3SSatish Balay PetscFunctionReturn(0); 4272d61bbb3SSatish Balay } 4282d61bbb3SSatish Balay 4294a2ae208SSatish Balay #undef __FUNCT__ 4304a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 43187828ca2SBarry Smith int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v) 4322d61bbb3SSatish Balay { 433606d414cSSatish Balay int ierr; 434606d414cSSatish Balay 4352d61bbb3SSatish Balay PetscFunctionBegin; 436606d414cSSatish Balay if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}} 437606d414cSSatish Balay if (v) {if (*v) {ierr = PetscFree(*v);CHKERRQ(ierr);}} 4382d61bbb3SSatish Balay PetscFunctionReturn(0); 4392d61bbb3SSatish Balay } 4402d61bbb3SSatish Balay 4414a2ae208SSatish Balay #undef __FUNCT__ 4424a2ae208SSatish Balay #define __FUNCT__ "MatTranspose_SeqBAIJ" 4432d61bbb3SSatish Balay int MatTranspose_SeqBAIJ(Mat A,Mat *B) 4442d61bbb3SSatish Balay { 4452d61bbb3SSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data; 4462d61bbb3SSatish Balay Mat C; 4472d61bbb3SSatish Balay int i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 448273d9f13SBarry Smith int *rows,*cols,bs2=a->bs2; 44987828ca2SBarry Smith PetscScalar *array; 4502d61bbb3SSatish Balay 4512d61bbb3SSatish Balay PetscFunctionBegin; 45229bbc08cSBarry Smith if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place"); 453b0a32e0cSBarry Smith ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr); 454549d3d68SSatish Balay ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr); 4552d61bbb3SSatish Balay 456375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 45787828ca2SBarry Smith ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr); 45887828ca2SBarry Smith for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i]; 459375fe846SBarry Smith #else 4603eda8832SBarry Smith array = a->a; 461375fe846SBarry Smith #endif 462375fe846SBarry Smith 4632d61bbb3SSatish Balay for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 464273d9f13SBarry Smith ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr); 465606d414cSSatish Balay ierr = PetscFree(col);CHKERRQ(ierr); 466b0a32e0cSBarry Smith ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr); 4672d61bbb3SSatish Balay cols = rows + bs; 4682d61bbb3SSatish Balay for (i=0; i<mbs; i++) { 4692d61bbb3SSatish Balay cols[0] = i*bs; 4702d61bbb3SSatish Balay for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 4712d61bbb3SSatish Balay len = ai[i+1] - ai[i]; 4722d61bbb3SSatish Balay for (j=0; j<len; j++) { 4732d61bbb3SSatish Balay rows[0] = (*aj++)*bs; 4742d61bbb3SSatish Balay for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 4752d61bbb3SSatish Balay ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 4762d61bbb3SSatish Balay array += bs2; 4772d61bbb3SSatish Balay } 4782d61bbb3SSatish Balay } 479606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 480375fe846SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 481375fe846SBarry Smith ierr = PetscFree(array); 482375fe846SBarry Smith #endif 4832d61bbb3SSatish Balay 4842d61bbb3SSatish Balay ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4852d61bbb3SSatish Balay ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4862d61bbb3SSatish Balay 487c4992f7dSBarry Smith if (B) { 4882d61bbb3SSatish Balay *B = C; 4892d61bbb3SSatish Balay } else { 490273d9f13SBarry Smith ierr = MatHeaderCopy(A,C);CHKERRQ(ierr); 4912d61bbb3SSatish Balay } 4922d61bbb3SSatish Balay PetscFunctionReturn(0); 4932d61bbb3SSatish Balay } 4942d61bbb3SSatish Balay 4954a2ae208SSatish Balay #undef __FUNCT__ 4964a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Binary" 497b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 4982593348eSBarry Smith { 499b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 5009df24120SSatish Balay int i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2; 50187828ca2SBarry Smith PetscScalar *aa; 502ce6f0cecSBarry Smith FILE *file; 5032593348eSBarry Smith 5043a40ed3dSBarry Smith PetscFunctionBegin; 505b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 506b0a32e0cSBarry Smith ierr = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr); 507552e946dSBarry Smith col_lens[0] = MAT_FILE_COOKIE; 5083b2fbd54SBarry Smith 509273d9f13SBarry Smith col_lens[1] = A->m; 510273d9f13SBarry Smith col_lens[2] = A->n; 5117e67e3f9SSatish Balay col_lens[3] = a->nz*bs2; 5122593348eSBarry Smith 5132593348eSBarry Smith /* store lengths of each row and write (including header) to file */ 514b6490206SBarry Smith count = 0; 515b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 516b6490206SBarry Smith for (j=0; j<bs; j++) { 517b6490206SBarry Smith col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 518b6490206SBarry Smith } 5192593348eSBarry Smith } 520273d9f13SBarry Smith ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr); 521606d414cSSatish Balay ierr = PetscFree(col_lens);CHKERRQ(ierr); 5222593348eSBarry Smith 5232593348eSBarry Smith /* store column indices (zero start index) */ 524b0a32e0cSBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr); 525b6490206SBarry Smith count = 0; 526b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 527b6490206SBarry Smith for (j=0; j<bs; j++) { 528b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 529b6490206SBarry Smith for (l=0; l<bs; l++) { 530b6490206SBarry Smith jj[count++] = bs*a->j[k] + l; 5312593348eSBarry Smith } 5322593348eSBarry Smith } 533b6490206SBarry Smith } 534b6490206SBarry Smith } 5350752156aSBarry Smith ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr); 536606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 5372593348eSBarry Smith 5382593348eSBarry Smith /* store nonzero values */ 53987828ca2SBarry Smith ierr = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 540b6490206SBarry Smith count = 0; 541b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 542b6490206SBarry Smith for (j=0; j<bs; j++) { 543b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 544b6490206SBarry Smith for (l=0; l<bs; l++) { 5457e67e3f9SSatish Balay aa[count++] = a->a[bs2*k + l*bs + j]; 546b6490206SBarry Smith } 547b6490206SBarry Smith } 548b6490206SBarry Smith } 549b6490206SBarry Smith } 5500752156aSBarry Smith ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr); 551606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 552ce6f0cecSBarry Smith 553b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 554ce6f0cecSBarry Smith if (file) { 555ce6f0cecSBarry Smith fprintf(file,"-matload_block_size %d\n",a->bs); 556ce6f0cecSBarry Smith } 5573a40ed3dSBarry Smith PetscFunctionReturn(0); 5582593348eSBarry Smith } 5592593348eSBarry Smith 56004929863SHong Zhang extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer); 56104929863SHong Zhang 5624a2ae208SSatish Balay #undef __FUNCT__ 5634a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 564b0a32e0cSBarry Smith static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 5652593348eSBarry Smith { 566b6490206SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 567fb9695e5SSatish Balay int ierr,i,j,bs = a->bs,k,l,bs2=a->bs2; 568f3ef73ceSBarry Smith PetscViewerFormat format; 5692593348eSBarry Smith 5703a40ed3dSBarry Smith PetscFunctionBegin; 571b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 572fb9695e5SSatish Balay if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_LONG) { 573b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 574fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 575bcd9e38bSBarry Smith Mat aij; 576bcd9e38bSBarry Smith ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr); 577bcd9e38bSBarry Smith ierr = MatView(aij,viewer);CHKERRQ(ierr); 578bcd9e38bSBarry Smith ierr = MatDestroy(aij);CHKERRQ(ierr); 57904929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 58004929863SHong Zhang #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX) 58104929863SHong Zhang ierr = MatMPIBAIJFactorInfo_DSCPACK(A,viewer);CHKERRQ(ierr); 58204929863SHong Zhang #endif 58304929863SHong Zhang PetscFunctionReturn(0); 584fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_COMMON) { 585b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 58644cd7ae7SLois Curfman McInnes for (i=0; i<a->mbs; i++) { 58744cd7ae7SLois Curfman McInnes for (j=0; j<bs; j++) { 588b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 58944cd7ae7SLois Curfman McInnes for (k=a->i[i]; k<a->i[i+1]; k++) { 59044cd7ae7SLois Curfman McInnes for (l=0; l<bs; l++) { 591aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 5920e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 59362b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l, 5940e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 5950e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 59662b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l, 5970e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 5980e6d2581SBarry Smith } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 59962b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6000ef38995SBarry Smith } 60144cd7ae7SLois Curfman McInnes #else 6020ef38995SBarry Smith if (a->a[bs2*k + l*bs + j] != 0.0) { 60362b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 6040ef38995SBarry Smith } 60544cd7ae7SLois Curfman McInnes #endif 60644cd7ae7SLois Curfman McInnes } 60744cd7ae7SLois Curfman McInnes } 608b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 60944cd7ae7SLois Curfman McInnes } 61044cd7ae7SLois Curfman McInnes } 611b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 6120ef38995SBarry Smith } else { 613b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr); 614b6490206SBarry Smith for (i=0; i<a->mbs; i++) { 615b6490206SBarry Smith for (j=0; j<bs; j++) { 616b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr); 617b6490206SBarry Smith for (k=a->i[i]; k<a->i[i+1]; k++) { 618b6490206SBarry Smith for (l=0; l<bs; l++) { 619aa482453SBarry Smith #if defined(PETSC_USE_COMPLEX) 6200e6d2581SBarry Smith if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 62162b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l, 6220e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6230e6d2581SBarry Smith } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 62462b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l, 6250e6d2581SBarry Smith PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 6260ef38995SBarry Smith } else { 62762b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 62888685aaeSLois Curfman McInnes } 62988685aaeSLois Curfman McInnes #else 63062b941d6SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 63188685aaeSLois Curfman McInnes #endif 6322593348eSBarry Smith } 6332593348eSBarry Smith } 634b0a32e0cSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 6352593348eSBarry Smith } 6362593348eSBarry Smith } 637b0a32e0cSBarry Smith ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr); 638b6490206SBarry Smith } 639b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 6403a40ed3dSBarry Smith PetscFunctionReturn(0); 6412593348eSBarry Smith } 6422593348eSBarry Smith 6434a2ae208SSatish Balay #undef __FUNCT__ 6444a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 645b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 6463270192aSSatish Balay { 64777ed5343SBarry Smith Mat A = (Mat) Aa; 6483270192aSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 649b65db4caSBarry Smith int row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2; 6500e6d2581SBarry Smith PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 6513f1db9ecSBarry Smith MatScalar *aa; 652b0a32e0cSBarry Smith PetscViewer viewer; 6533270192aSSatish Balay 6543a40ed3dSBarry Smith PetscFunctionBegin; 6553270192aSSatish Balay 656b65db4caSBarry Smith /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/ 65777ed5343SBarry Smith ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 65877ed5343SBarry Smith 659b0a32e0cSBarry Smith ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 66077ed5343SBarry Smith 6613270192aSSatish Balay /* loop over matrix elements drawing boxes */ 662b0a32e0cSBarry Smith color = PETSC_DRAW_BLUE; 6633270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6643270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 665273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6663270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6673270192aSSatish Balay aa = a->a + j*bs2; 6683270192aSSatish Balay for (k=0; k<bs; k++) { 6693270192aSSatish Balay for (l=0; l<bs; l++) { 6700e6d2581SBarry Smith if (PetscRealPart(*aa++) >= 0.) continue; 671b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 6723270192aSSatish Balay } 6733270192aSSatish Balay } 6743270192aSSatish Balay } 6753270192aSSatish Balay } 676b0a32e0cSBarry Smith color = PETSC_DRAW_CYAN; 6773270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6783270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 679273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6803270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6813270192aSSatish Balay aa = a->a + j*bs2; 6823270192aSSatish Balay for (k=0; k<bs; k++) { 6833270192aSSatish Balay for (l=0; l<bs; l++) { 6840e6d2581SBarry Smith if (PetscRealPart(*aa++) != 0.) continue; 685b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 6863270192aSSatish Balay } 6873270192aSSatish Balay } 6883270192aSSatish Balay } 6893270192aSSatish Balay } 6903270192aSSatish Balay 691b0a32e0cSBarry Smith color = PETSC_DRAW_RED; 6923270192aSSatish Balay for (i=0,row=0; i<mbs; i++,row+=bs) { 6933270192aSSatish Balay for (j=a->i[i]; j<a->i[i+1]; j++) { 694273d9f13SBarry Smith y_l = A->m - row - 1.0; y_r = y_l + 1.0; 6953270192aSSatish Balay x_l = a->j[j]*bs; x_r = x_l + 1.0; 6963270192aSSatish Balay aa = a->a + j*bs2; 6973270192aSSatish Balay for (k=0; k<bs; k++) { 6983270192aSSatish Balay for (l=0; l<bs; l++) { 6990e6d2581SBarry Smith if (PetscRealPart(*aa++) <= 0.) continue; 700b0a32e0cSBarry Smith ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 7013270192aSSatish Balay } 7023270192aSSatish Balay } 7033270192aSSatish Balay } 7043270192aSSatish Balay } 70577ed5343SBarry Smith PetscFunctionReturn(0); 70677ed5343SBarry Smith } 7073270192aSSatish Balay 7084a2ae208SSatish Balay #undef __FUNCT__ 7094a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ_Draw" 710b0a32e0cSBarry Smith static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 71177ed5343SBarry Smith { 7127dce120fSSatish Balay int ierr; 7130e6d2581SBarry Smith PetscReal xl,yl,xr,yr,w,h; 714b0a32e0cSBarry Smith PetscDraw draw; 71577ed5343SBarry Smith PetscTruth isnull; 7163270192aSSatish Balay 71777ed5343SBarry Smith PetscFunctionBegin; 71877ed5343SBarry Smith 719b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 720b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 72177ed5343SBarry Smith 72277ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 723273d9f13SBarry Smith xr = A->n; yr = A->m; h = yr/10.0; w = xr/10.0; 72477ed5343SBarry Smith xr += w; yr += h; xl = -w; yl = -h; 725b0a32e0cSBarry Smith ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 726b0a32e0cSBarry Smith ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 72777ed5343SBarry Smith ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr); 7283a40ed3dSBarry Smith PetscFunctionReturn(0); 7293270192aSSatish Balay } 7303270192aSSatish Balay 7314a2ae208SSatish Balay #undef __FUNCT__ 7324a2ae208SSatish Balay #define __FUNCT__ "MatView_SeqBAIJ" 733b0a32e0cSBarry Smith int MatView_SeqBAIJ(Mat A,PetscViewer viewer) 7342593348eSBarry Smith { 73519bcc07fSBarry Smith int ierr; 7366831982aSBarry Smith PetscTruth issocket,isascii,isbinary,isdraw; 7372593348eSBarry Smith 7383a40ed3dSBarry Smith PetscFunctionBegin; 739b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr); 740b0a32e0cSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 741fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr); 742fb9695e5SSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr); 7430f5bd95cSBarry Smith if (issocket) { 74429bbc08cSBarry Smith SETERRQ(PETSC_ERR_SUP,"Socket viewer not supported"); 7450f5bd95cSBarry Smith } else if (isascii){ 7463a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 7470f5bd95cSBarry Smith } else if (isbinary) { 7483a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 7490f5bd95cSBarry Smith } else if (isdraw) { 7503a40ed3dSBarry Smith ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 7515cd90555SBarry Smith } else { 75229bbc08cSBarry Smith SETERRQ1(1,"Viewer type %s not supported by SeqBAIJ matrices",((PetscObject)viewer)->type_name); 7532593348eSBarry Smith } 7543a40ed3dSBarry Smith PetscFunctionReturn(0); 7552593348eSBarry Smith } 756b6490206SBarry Smith 757cd0e1443SSatish Balay 7584a2ae208SSatish Balay #undef __FUNCT__ 7594a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_SeqBAIJ" 76087828ca2SBarry Smith int MatGetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v) 761cd0e1443SSatish Balay { 762cd0e1443SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 7632d61bbb3SSatish Balay int *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 7642d61bbb3SSatish Balay int *ai = a->i,*ailen = a->ilen; 7652d61bbb3SSatish Balay int brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2; 7663f1db9ecSBarry Smith MatScalar *ap,*aa = a->a,zero = 0.0; 767cd0e1443SSatish Balay 7683a40ed3dSBarry Smith PetscFunctionBegin; 7692d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over rows */ 770cd0e1443SSatish Balay row = im[k]; brow = row/bs; 77129bbc08cSBarry Smith if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 772273d9f13SBarry Smith if (row >= A->m) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 7732d61bbb3SSatish Balay rp = aj + ai[brow] ; ap = aa + bs2*ai[brow] ; 7742c3acbe9SBarry Smith nrow = ailen[brow]; 7752d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over columns */ 77629bbc08cSBarry Smith if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); 777273d9f13SBarry Smith if (in[l] >= A->n) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 7782d61bbb3SSatish Balay col = in[l] ; 7792d61bbb3SSatish Balay bcol = col/bs; 7802d61bbb3SSatish Balay cidx = col%bs; 7812d61bbb3SSatish Balay ridx = row%bs; 7822d61bbb3SSatish Balay high = nrow; 7832d61bbb3SSatish Balay low = 0; /* assume unsorted */ 7842d61bbb3SSatish Balay while (high-low > 5) { 785cd0e1443SSatish Balay t = (low+high)/2; 786cd0e1443SSatish Balay if (rp[t] > bcol) high = t; 787cd0e1443SSatish Balay else low = t; 788cd0e1443SSatish Balay } 789cd0e1443SSatish Balay for (i=low; i<high; i++) { 790cd0e1443SSatish Balay if (rp[i] > bcol) break; 791cd0e1443SSatish Balay if (rp[i] == bcol) { 7922d61bbb3SSatish Balay *v++ = ap[bs2*i+bs*cidx+ridx]; 7932d61bbb3SSatish Balay goto finished; 794cd0e1443SSatish Balay } 795cd0e1443SSatish Balay } 7962d61bbb3SSatish Balay *v++ = zero; 7972d61bbb3SSatish Balay finished:; 798cd0e1443SSatish Balay } 799cd0e1443SSatish Balay } 8003a40ed3dSBarry Smith PetscFunctionReturn(0); 801cd0e1443SSatish Balay } 802cd0e1443SSatish Balay 80395d5f7c2SBarry Smith #if defined(PETSC_USE_MAT_SINGLE) 8044a2ae208SSatish Balay #undef __FUNCT__ 8054a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 80687828ca2SBarry Smith int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv) 80795d5f7c2SBarry Smith { 808563b5814SBarry Smith Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data; 809563b5814SBarry Smith int ierr,i,N = m*n*b->bs2; 810563b5814SBarry Smith MatScalar *vsingle; 81195d5f7c2SBarry Smith 81295d5f7c2SBarry Smith PetscFunctionBegin; 813563b5814SBarry Smith if (N > b->setvalueslen) { 814563b5814SBarry Smith if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 815b0a32e0cSBarry Smith ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr); 816563b5814SBarry Smith b->setvalueslen = N; 817563b5814SBarry Smith } 818563b5814SBarry Smith vsingle = b->setvaluescopy; 81995d5f7c2SBarry Smith for (i=0; i<N; i++) { 82095d5f7c2SBarry Smith vsingle[i] = v[i]; 82195d5f7c2SBarry Smith } 82295d5f7c2SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 82395d5f7c2SBarry Smith PetscFunctionReturn(0); 82495d5f7c2SBarry Smith } 82595d5f7c2SBarry Smith #endif 82695d5f7c2SBarry Smith 8272d61bbb3SSatish Balay 8284a2ae208SSatish Balay #undef __FUNCT__ 8294a2ae208SSatish Balay #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 83095d5f7c2SBarry Smith int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,int *im,int n,int *in,MatScalar *v,InsertMode is) 83192c4ed94SBarry Smith { 83292c4ed94SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 8338a84c255SSatish Balay int *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 834273d9f13SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 835549d3d68SSatish Balay int *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr; 836273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 83795d5f7c2SBarry Smith MatScalar *value = v,*ap,*aa = a->a,*bap; 83892c4ed94SBarry Smith 8393a40ed3dSBarry Smith PetscFunctionBegin; 8400e324ae4SSatish Balay if (roworiented) { 8410e324ae4SSatish Balay stepval = (n-1)*bs; 8420e324ae4SSatish Balay } else { 8430e324ae4SSatish Balay stepval = (m-1)*bs; 8440e324ae4SSatish Balay } 84592c4ed94SBarry Smith for (k=0; k<m; k++) { /* loop over added rows */ 84692c4ed94SBarry Smith row = im[k]; 8475ef9f2a5SBarry Smith if (row < 0) continue; 848aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 84929bbc08cSBarry Smith if (row >= a->mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 85092c4ed94SBarry Smith #endif 85192c4ed94SBarry Smith rp = aj + ai[row]; 85292c4ed94SBarry Smith ap = aa + bs2*ai[row]; 85392c4ed94SBarry Smith rmax = imax[row]; 85492c4ed94SBarry Smith nrow = ailen[row]; 85592c4ed94SBarry Smith low = 0; 85692c4ed94SBarry Smith for (l=0; l<n; l++) { /* loop over added columns */ 8575ef9f2a5SBarry Smith if (in[l] < 0) continue; 858aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 85929bbc08cSBarry Smith if (in[l] >= a->nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 86092c4ed94SBarry Smith #endif 86192c4ed94SBarry Smith col = in[l]; 86292c4ed94SBarry Smith if (roworiented) { 8630e324ae4SSatish Balay value = v + k*(stepval+bs)*bs + l*bs; 8640e324ae4SSatish Balay } else { 8650e324ae4SSatish Balay value = v + l*(stepval+bs)*bs + k*bs; 86692c4ed94SBarry Smith } 86792c4ed94SBarry Smith if (!sorted) low = 0; high = nrow; 86892c4ed94SBarry Smith while (high-low > 7) { 86992c4ed94SBarry Smith t = (low+high)/2; 87092c4ed94SBarry Smith if (rp[t] > col) high = t; 87192c4ed94SBarry Smith else low = t; 87292c4ed94SBarry Smith } 87392c4ed94SBarry Smith for (i=low; i<high; i++) { 87492c4ed94SBarry Smith if (rp[i] > col) break; 87592c4ed94SBarry Smith if (rp[i] == col) { 8768a84c255SSatish Balay bap = ap + bs2*i; 8770e324ae4SSatish Balay if (roworiented) { 8788a84c255SSatish Balay if (is == ADD_VALUES) { 879dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 880dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 8818a84c255SSatish Balay bap[jj] += *value++; 882dd9472c6SBarry Smith } 883dd9472c6SBarry Smith } 8840e324ae4SSatish Balay } else { 885dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 886dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 8870e324ae4SSatish Balay bap[jj] = *value++; 8888a84c255SSatish Balay } 889dd9472c6SBarry Smith } 890dd9472c6SBarry Smith } 8910e324ae4SSatish Balay } else { 8920e324ae4SSatish Balay if (is == ADD_VALUES) { 893dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 894dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 8950e324ae4SSatish Balay *bap++ += *value++; 896dd9472c6SBarry Smith } 897dd9472c6SBarry Smith } 8980e324ae4SSatish Balay } else { 899dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 900dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 9010e324ae4SSatish Balay *bap++ = *value++; 9020e324ae4SSatish Balay } 9038a84c255SSatish Balay } 904dd9472c6SBarry Smith } 905dd9472c6SBarry Smith } 906f1241b54SBarry Smith goto noinsert2; 90792c4ed94SBarry Smith } 90892c4ed94SBarry Smith } 90989280ab3SLois Curfman McInnes if (nonew == 1) goto noinsert2; 91029bbc08cSBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 91192c4ed94SBarry Smith if (nrow >= rmax) { 91292c4ed94SBarry Smith /* there is no extra room in row, therefore enlarge */ 91392c4ed94SBarry Smith int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 9143f1db9ecSBarry Smith MatScalar *new_a; 91592c4ed94SBarry Smith 91629bbc08cSBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 91789280ab3SLois Curfman McInnes 91892c4ed94SBarry Smith /* malloc new storage space */ 9193f1db9ecSBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 920b0a32e0cSBarry Smith ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 92192c4ed94SBarry Smith new_j = (int*)(new_a + bs2*new_nz); 92292c4ed94SBarry Smith new_i = new_j + new_nz; 92392c4ed94SBarry Smith 92492c4ed94SBarry Smith /* copy over old data into new slots */ 92592c4ed94SBarry Smith for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];} 92692c4ed94SBarry Smith for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 927549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr); 92892c4ed94SBarry Smith len = (new_nz - CHUNKSIZE - ai[row] - nrow); 929549d3d68SSatish Balay ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr); 930549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 931549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 932549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 93392c4ed94SBarry Smith /* free up old matrix storage */ 934606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 935606d414cSSatish Balay if (!a->singlemalloc) { 936606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 937606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 938606d414cSSatish Balay } 93992c4ed94SBarry Smith aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 940c4992f7dSBarry Smith a->singlemalloc = PETSC_TRUE; 94192c4ed94SBarry Smith 94292c4ed94SBarry Smith rp = aj + ai[row]; ap = aa + bs2*ai[row]; 94392c4ed94SBarry Smith rmax = imax[row] = imax[row] + CHUNKSIZE; 944b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 94592c4ed94SBarry Smith a->maxnz += bs2*CHUNKSIZE; 94692c4ed94SBarry Smith a->reallocs++; 94792c4ed94SBarry Smith a->nz++; 94892c4ed94SBarry Smith } 94992c4ed94SBarry Smith N = nrow++ - 1; 95092c4ed94SBarry Smith /* shift up all the later entries in this row */ 95192c4ed94SBarry Smith for (ii=N; ii>=i; ii--) { 95292c4ed94SBarry Smith rp[ii+1] = rp[ii]; 953549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 95492c4ed94SBarry Smith } 955549d3d68SSatish Balay if (N >= i) { 956549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 957549d3d68SSatish Balay } 95892c4ed94SBarry Smith rp[i] = col; 9598a84c255SSatish Balay bap = ap + bs2*i; 9600e324ae4SSatish Balay if (roworiented) { 961dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 962dd9472c6SBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 9630e324ae4SSatish Balay bap[jj] = *value++; 964dd9472c6SBarry Smith } 965dd9472c6SBarry Smith } 9660e324ae4SSatish Balay } else { 967dd9472c6SBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 968dd9472c6SBarry Smith for (jj=0; jj<bs; jj++) { 9690e324ae4SSatish Balay *bap++ = *value++; 9700e324ae4SSatish Balay } 971dd9472c6SBarry Smith } 972dd9472c6SBarry Smith } 973f1241b54SBarry Smith noinsert2:; 97492c4ed94SBarry Smith low = i; 97592c4ed94SBarry Smith } 97692c4ed94SBarry Smith ailen[row] = nrow; 97792c4ed94SBarry Smith } 9783a40ed3dSBarry Smith PetscFunctionReturn(0); 97992c4ed94SBarry Smith } 98092c4ed94SBarry Smith 9814a2ae208SSatish Balay #undef __FUNCT__ 9824a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 9838f6be9afSLois Curfman McInnes int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 984584200bdSSatish Balay { 985584200bdSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 986584200bdSSatish Balay int fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 987273d9f13SBarry Smith int m = A->m,*ip,N,*ailen = a->ilen; 988549d3d68SSatish Balay int mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr; 9893f1db9ecSBarry Smith MatScalar *aa = a->a,*ap; 990a56a16c8SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 991a56a16c8SHong Zhang PetscTruth flag; 992a56a16c8SHong Zhang #endif 993584200bdSSatish Balay 9943a40ed3dSBarry Smith PetscFunctionBegin; 9953a40ed3dSBarry Smith if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 996584200bdSSatish Balay 99743ee02c3SBarry Smith if (m) rmax = ailen[0]; 998584200bdSSatish Balay for (i=1; i<mbs; i++) { 999584200bdSSatish Balay /* move each row back by the amount of empty slots (fshift) before it*/ 1000584200bdSSatish Balay fshift += imax[i-1] - ailen[i-1]; 1001d402145bSBarry Smith rmax = PetscMax(rmax,ailen[i]); 1002584200bdSSatish Balay if (fshift) { 1003a7c10996SSatish Balay ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1004584200bdSSatish Balay N = ailen[i]; 1005584200bdSSatish Balay for (j=0; j<N; j++) { 1006584200bdSSatish Balay ip[j-fshift] = ip[j]; 1007549d3d68SSatish Balay ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1008584200bdSSatish Balay } 1009584200bdSSatish Balay } 1010584200bdSSatish Balay ai[i] = ai[i-1] + ailen[i-1]; 1011584200bdSSatish Balay } 1012584200bdSSatish Balay if (mbs) { 1013584200bdSSatish Balay fshift += imax[mbs-1] - ailen[mbs-1]; 1014584200bdSSatish Balay ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1015584200bdSSatish Balay } 1016584200bdSSatish Balay /* reset ilen and imax for each row */ 1017584200bdSSatish Balay for (i=0; i<mbs; i++) { 1018584200bdSSatish Balay ailen[i] = imax[i] = ai[i+1] - ai[i]; 1019584200bdSSatish Balay } 1020a7c10996SSatish Balay a->nz = ai[mbs]; 1021584200bdSSatish Balay 1022584200bdSSatish Balay /* diagonals may have moved, so kill the diagonal pointers */ 1023584200bdSSatish Balay if (fshift && a->diag) { 1024606d414cSSatish Balay ierr = PetscFree(a->diag);CHKERRQ(ierr); 1025b424e231SHong Zhang PetscLogObjectMemory(A,-(mbs+1)*sizeof(int)); 1026584200bdSSatish Balay a->diag = 0; 1027584200bdSSatish Balay } 1028bba1ac68SSatish Balay PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2); 1029bba1ac68SSatish Balay PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs); 1030b0a32e0cSBarry Smith PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax); 1031e2f3b5e9SSatish Balay a->reallocs = 0; 10320e6d2581SBarry Smith A->info.nz_unneeded = (PetscReal)fshift*bs2; 1033cf4441caSHong Zhang 1034a56a16c8SHong Zhang #if defined(PETSC_HAVE_DSCPACK) 1035a56a16c8SHong Zhang ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr); 1036a56a16c8SHong Zhang if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); } 1037a56a16c8SHong Zhang #endif 1038a56a16c8SHong Zhang 10393a40ed3dSBarry Smith PetscFunctionReturn(0); 1040584200bdSSatish Balay } 1041584200bdSSatish Balay 10425a838052SSatish Balay 1043bea157c4SSatish Balay 1044bea157c4SSatish Balay /* 1045bea157c4SSatish Balay This function returns an array of flags which indicate the locations of contiguous 1046bea157c4SSatish Balay blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1047bea157c4SSatish Balay then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1048bea157c4SSatish Balay Assume: sizes should be long enough to hold all the values. 1049bea157c4SSatish Balay */ 10504a2ae208SSatish Balay #undef __FUNCT__ 10514a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1052bea157c4SSatish Balay static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max) 1053d9b7c43dSSatish Balay { 1054bea157c4SSatish Balay int i,j,k,row; 1055bea157c4SSatish Balay PetscTruth flg; 10563a40ed3dSBarry Smith 1057433994e6SBarry Smith PetscFunctionBegin; 1058bea157c4SSatish Balay for (i=0,j=0; i<n; j++) { 1059bea157c4SSatish Balay row = idx[i]; 1060bea157c4SSatish Balay if (row%bs!=0) { /* Not the begining of a block */ 1061bea157c4SSatish Balay sizes[j] = 1; 1062bea157c4SSatish Balay i++; 1063e4fda26cSSatish Balay } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1064bea157c4SSatish Balay sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1065bea157c4SSatish Balay i++; 1066bea157c4SSatish Balay } else { /* Begining of the block, so check if the complete block exists */ 1067bea157c4SSatish Balay flg = PETSC_TRUE; 1068bea157c4SSatish Balay for (k=1; k<bs; k++) { 1069bea157c4SSatish Balay if (row+k != idx[i+k]) { /* break in the block */ 1070bea157c4SSatish Balay flg = PETSC_FALSE; 1071bea157c4SSatish Balay break; 1072d9b7c43dSSatish Balay } 1073bea157c4SSatish Balay } 1074bea157c4SSatish Balay if (flg == PETSC_TRUE) { /* No break in the bs */ 1075bea157c4SSatish Balay sizes[j] = bs; 1076bea157c4SSatish Balay i+= bs; 1077bea157c4SSatish Balay } else { 1078bea157c4SSatish Balay sizes[j] = 1; 1079bea157c4SSatish Balay i++; 1080bea157c4SSatish Balay } 1081bea157c4SSatish Balay } 1082bea157c4SSatish Balay } 1083bea157c4SSatish Balay *bs_max = j; 10843a40ed3dSBarry Smith PetscFunctionReturn(0); 1085d9b7c43dSSatish Balay } 1086d9b7c43dSSatish Balay 10874a2ae208SSatish Balay #undef __FUNCT__ 10884a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_SeqBAIJ" 108987828ca2SBarry Smith int MatZeroRows_SeqBAIJ(Mat A,IS is,PetscScalar *diag) 1090d9b7c43dSSatish Balay { 1091d9b7c43dSSatish Balay Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 1092b6815fffSBarry Smith int ierr,i,j,k,count,is_n,*is_idx,*rows; 1093bea157c4SSatish Balay int bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max; 109487828ca2SBarry Smith PetscScalar zero = 0.0; 10953f1db9ecSBarry Smith MatScalar *aa; 1096d9b7c43dSSatish Balay 10973a40ed3dSBarry Smith PetscFunctionBegin; 1098d9b7c43dSSatish Balay /* Make a copy of the IS and sort it */ 1099b9b97703SBarry Smith ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr); 1100d9b7c43dSSatish Balay ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr); 1101d9b7c43dSSatish Balay 1102bea157c4SSatish Balay /* allocate memory for rows,sizes */ 1103b0a32e0cSBarry Smith ierr = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr); 1104bea157c4SSatish Balay sizes = rows + is_n; 1105bea157c4SSatish Balay 1106563b5814SBarry Smith /* copy IS values to rows, and sort them */ 1107bea157c4SSatish Balay for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; } 1108bea157c4SSatish Balay ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 1109dffd3267SBarry Smith if (baij->keepzeroedrows) { 1110dffd3267SBarry Smith for (i=0; i<is_n; i++) { sizes[i] = 1; } 1111dffd3267SBarry Smith bs_max = is_n; 1112dffd3267SBarry Smith } else { 1113bea157c4SSatish Balay ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 1114dffd3267SBarry Smith } 1115b97a56abSSatish Balay ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr); 1116bea157c4SSatish Balay 1117bea157c4SSatish Balay for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 1118bea157c4SSatish Balay row = rows[j]; 1119273d9f13SBarry Smith if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row); 1120bea157c4SSatish Balay count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 1121bea157c4SSatish Balay aa = baij->a + baij->i[row/bs]*bs2 + (row%bs); 1122dffd3267SBarry Smith if (sizes[i] == bs && !baij->keepzeroedrows) { 1123bea157c4SSatish Balay if (diag) { 1124bea157c4SSatish Balay if (baij->ilen[row/bs] > 0) { 1125bea157c4SSatish Balay baij->ilen[row/bs] = 1; 1126bea157c4SSatish Balay baij->j[baij->i[row/bs]] = row/bs; 1127bea157c4SSatish Balay ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 1128a07cd24cSSatish Balay } 1129563b5814SBarry Smith /* Now insert all the diagonal values for this bs */ 1130bea157c4SSatish Balay for (k=0; k<bs; k++) { 1131bea157c4SSatish Balay ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr); 1132bea157c4SSatish Balay } 1133bea157c4SSatish Balay } else { /* (!diag) */ 1134bea157c4SSatish Balay baij->ilen[row/bs] = 0; 1135bea157c4SSatish Balay } /* end (!diag) */ 1136bea157c4SSatish Balay } else { /* (sizes[i] != bs) */ 1137aa482453SBarry Smith #if defined (PETSC_USE_DEBUG) 113829bbc08cSBarry Smith if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1"); 1139bea157c4SSatish Balay #endif 1140bea157c4SSatish Balay for (k=0; k<count; k++) { 1141d9b7c43dSSatish Balay aa[0] = zero; 1142d9b7c43dSSatish Balay aa += bs; 1143d9b7c43dSSatish Balay } 1144d9b7c43dSSatish Balay if (diag) { 1145f830108cSBarry Smith ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr); 1146d9b7c43dSSatish Balay } 1147d9b7c43dSSatish Balay } 1148bea157c4SSatish Balay } 1149bea157c4SSatish Balay 1150606d414cSSatish Balay ierr = PetscFree(rows);CHKERRQ(ierr); 11519a8dea36SBarry Smith ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 11523a40ed3dSBarry Smith PetscFunctionReturn(0); 1153d9b7c43dSSatish Balay } 11541c351548SSatish Balay 11554a2ae208SSatish Balay #undef __FUNCT__ 11564a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_SeqBAIJ" 115787828ca2SBarry Smith int MatSetValues_SeqBAIJ(Mat A,int m,int *im,int n,int *in,PetscScalar *v,InsertMode is) 11582d61bbb3SSatish Balay { 11592d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 11602d61bbb3SSatish Balay int *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted; 1161273d9f13SBarry Smith int *imax=a->imax,*ai=a->i,*ailen=a->ilen; 11622d61bbb3SSatish Balay int *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol; 1163549d3d68SSatish Balay int ridx,cidx,bs2=a->bs2,ierr; 1164273d9f13SBarry Smith PetscTruth roworiented=a->roworiented; 11653f1db9ecSBarry Smith MatScalar *ap,value,*aa=a->a,*bap; 11662d61bbb3SSatish Balay 11672d61bbb3SSatish Balay PetscFunctionBegin; 11682d61bbb3SSatish Balay for (k=0; k<m; k++) { /* loop over added rows */ 11692d61bbb3SSatish Balay row = im[k]; brow = row/bs; 11705ef9f2a5SBarry Smith if (row < 0) continue; 1171aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1172273d9f13SBarry Smith if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m); 11732d61bbb3SSatish Balay #endif 11742d61bbb3SSatish Balay rp = aj + ai[brow]; 11752d61bbb3SSatish Balay ap = aa + bs2*ai[brow]; 11762d61bbb3SSatish Balay rmax = imax[brow]; 11772d61bbb3SSatish Balay nrow = ailen[brow]; 11782d61bbb3SSatish Balay low = 0; 11792d61bbb3SSatish Balay for (l=0; l<n; l++) { /* loop over added columns */ 11805ef9f2a5SBarry Smith if (in[l] < 0) continue; 1181aa482453SBarry Smith #if defined(PETSC_USE_BOPT_g) 1182273d9f13SBarry Smith if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n); 11832d61bbb3SSatish Balay #endif 11842d61bbb3SSatish Balay col = in[l]; bcol = col/bs; 11852d61bbb3SSatish Balay ridx = row % bs; cidx = col % bs; 11862d61bbb3SSatish Balay if (roworiented) { 11875ef9f2a5SBarry Smith value = v[l + k*n]; 11882d61bbb3SSatish Balay } else { 11892d61bbb3SSatish Balay value = v[k + l*m]; 11902d61bbb3SSatish Balay } 11912d61bbb3SSatish Balay if (!sorted) low = 0; high = nrow; 11922d61bbb3SSatish Balay while (high-low > 7) { 11932d61bbb3SSatish Balay t = (low+high)/2; 11942d61bbb3SSatish Balay if (rp[t] > bcol) high = t; 11952d61bbb3SSatish Balay else low = t; 11962d61bbb3SSatish Balay } 11972d61bbb3SSatish Balay for (i=low; i<high; i++) { 11982d61bbb3SSatish Balay if (rp[i] > bcol) break; 11992d61bbb3SSatish Balay if (rp[i] == bcol) { 12002d61bbb3SSatish Balay bap = ap + bs2*i + bs*cidx + ridx; 12012d61bbb3SSatish Balay if (is == ADD_VALUES) *bap += value; 12022d61bbb3SSatish Balay else *bap = value; 12032d61bbb3SSatish Balay goto noinsert1; 12042d61bbb3SSatish Balay } 12052d61bbb3SSatish Balay } 12062d61bbb3SSatish Balay if (nonew == 1) goto noinsert1; 120729bbc08cSBarry Smith else if (nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 12082d61bbb3SSatish Balay if (nrow >= rmax) { 12092d61bbb3SSatish Balay /* there is no extra room in row, therefore enlarge */ 12102d61bbb3SSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; 12113f1db9ecSBarry Smith MatScalar *new_a; 12122d61bbb3SSatish Balay 121329bbc08cSBarry Smith if (nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); 12142d61bbb3SSatish Balay 12152d61bbb3SSatish Balay /* Malloc new storage space */ 12163f1db9ecSBarry Smith len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); 1217b0a32e0cSBarry Smith ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); 12182d61bbb3SSatish Balay new_j = (int*)(new_a + bs2*new_nz); 12192d61bbb3SSatish Balay new_i = new_j + new_nz; 12202d61bbb3SSatish Balay 12212d61bbb3SSatish Balay /* copy over old data into new slots */ 12222d61bbb3SSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} 12232d61bbb3SSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} 1224549d3d68SSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); 12252d61bbb3SSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); 1226549d3d68SSatish Balay ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); 1227549d3d68SSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); 1228549d3d68SSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); 1229549d3d68SSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); 12302d61bbb3SSatish Balay /* free up old matrix storage */ 1231606d414cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); 1232606d414cSSatish Balay if (!a->singlemalloc) { 1233606d414cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); 1234606d414cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr); 1235606d414cSSatish Balay } 12362d61bbb3SSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; 1237c4992f7dSBarry Smith a->singlemalloc = PETSC_TRUE; 12382d61bbb3SSatish Balay 12392d61bbb3SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 12402d61bbb3SSatish Balay rmax = imax[brow] = imax[brow] + CHUNKSIZE; 1241b0a32e0cSBarry Smith PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); 12422d61bbb3SSatish Balay a->maxnz += bs2*CHUNKSIZE; 12432d61bbb3SSatish Balay a->reallocs++; 12442d61bbb3SSatish Balay a->nz++; 12452d61bbb3SSatish Balay } 12462d61bbb3SSatish Balay N = nrow++ - 1; 12472d61bbb3SSatish Balay /* shift up all the later entries in this row */ 12482d61bbb3SSatish Balay for (ii=N; ii>=i; ii--) { 12492d61bbb3SSatish Balay rp[ii+1] = rp[ii]; 1250549d3d68SSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 12512d61bbb3SSatish Balay } 1252549d3d68SSatish Balay if (N>=i) { 1253549d3d68SSatish Balay ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1254549d3d68SSatish Balay } 12552d61bbb3SSatish Balay rp[i] = bcol; 12562d61bbb3SSatish Balay ap[bs2*i + bs*cidx + ridx] = value; 12572d61bbb3SSatish Balay noinsert1:; 12582d61bbb3SSatish Balay low = i; 12592d61bbb3SSatish Balay } 12602d61bbb3SSatish Balay ailen[brow] = nrow; 12612d61bbb3SSatish Balay } 12622d61bbb3SSatish Balay PetscFunctionReturn(0); 12632d61bbb3SSatish Balay } 12642d61bbb3SSatish Balay 12652d61bbb3SSatish Balay 12664a2ae208SSatish Balay #undef __FUNCT__ 12674a2ae208SSatish Balay #define __FUNCT__ "MatILUFactor_SeqBAIJ" 12685ef9f2a5SBarry Smith int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatILUInfo *info) 12692d61bbb3SSatish Balay { 12702d61bbb3SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 12712d61bbb3SSatish Balay Mat outA; 12722d61bbb3SSatish Balay int ierr; 1273667159a5SBarry Smith PetscTruth row_identity,col_identity; 12742d61bbb3SSatish Balay 12752d61bbb3SSatish Balay PetscFunctionBegin; 127629bbc08cSBarry Smith if (info && info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 1277667159a5SBarry Smith ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 1278667159a5SBarry Smith ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 1279667159a5SBarry Smith if (!row_identity || !col_identity) { 128029bbc08cSBarry Smith SETERRQ(1,"Row and column permutations must be identity for in-place ILU"); 1281667159a5SBarry Smith } 12822d61bbb3SSatish Balay 12832d61bbb3SSatish Balay outA = inA; 12842d61bbb3SSatish Balay inA->factor = FACTOR_LU; 12852d61bbb3SSatish Balay 12862d61bbb3SSatish Balay if (!a->diag) { 1287c4992f7dSBarry Smith ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 12882d61bbb3SSatish Balay } 1289cf242676SKris Buschelman 1290c38d4ed2SBarry Smith a->row = row; 1291c38d4ed2SBarry Smith a->col = col; 1292c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 1293c38d4ed2SBarry Smith ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 1294c38d4ed2SBarry Smith 1295c38d4ed2SBarry Smith /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 12964c49b128SBarry Smith ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 1297b0a32e0cSBarry Smith PetscLogObjectParent(inA,a->icol); 1298c38d4ed2SBarry Smith 1299cf242676SKris Buschelman /* 1300cf242676SKris Buschelman Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver 1301cf242676SKris Buschelman for ILU(0) factorization with natural ordering 1302cf242676SKris Buschelman */ 1303cf242676SKris Buschelman if (a->bs < 8) { 1304cf242676SKris Buschelman ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr); 1305cf242676SKris Buschelman } else { 1306c38d4ed2SBarry Smith if (!a->solve_work) { 130787828ca2SBarry Smith ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr); 130887828ca2SBarry Smith PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar)); 1309c38d4ed2SBarry Smith } 13102d61bbb3SSatish Balay } 13112d61bbb3SSatish Balay 1312667159a5SBarry Smith ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr); 1313667159a5SBarry Smith 13142d61bbb3SSatish Balay PetscFunctionReturn(0); 13152d61bbb3SSatish Balay } 13164a2ae208SSatish Balay #undef __FUNCT__ 13174a2ae208SSatish Balay #define __FUNCT__ "MatPrintHelp_SeqBAIJ" 1318ba4ca20aSSatish Balay int MatPrintHelp_SeqBAIJ(Mat A) 1319ba4ca20aSSatish Balay { 1320c38d4ed2SBarry Smith static PetscTruth called = PETSC_FALSE; 1321ba4ca20aSSatish Balay MPI_Comm comm = A->comm; 1322d132466eSBarry Smith int ierr; 1323ba4ca20aSSatish Balay 13243a40ed3dSBarry Smith PetscFunctionBegin; 1325c38d4ed2SBarry Smith if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE; 1326d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr); 1327d132466eSBarry Smith ierr = (*PetscHelpPrintf)(comm," -mat_block_size <block_size>\n");CHKERRQ(ierr); 13283a40ed3dSBarry Smith PetscFunctionReturn(0); 1329ba4ca20aSSatish Balay } 1330d9b7c43dSSatish Balay 1331fb2e594dSBarry Smith EXTERN_C_BEGIN 13324a2ae208SSatish Balay #undef __FUNCT__ 13334a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 133427a8da17SBarry Smith int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices) 133527a8da17SBarry Smith { 133627a8da17SBarry Smith Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data; 133714db4f74SSatish Balay int i,nz,nbs; 133827a8da17SBarry Smith 133927a8da17SBarry Smith PetscFunctionBegin; 134014db4f74SSatish Balay nz = baij->maxnz/baij->bs2; 134114db4f74SSatish Balay nbs = baij->nbs; 134227a8da17SBarry Smith for (i=0; i<nz; i++) { 134327a8da17SBarry Smith baij->j[i] = indices[i]; 134427a8da17SBarry Smith } 134527a8da17SBarry Smith baij->nz = nz; 134614db4f74SSatish Balay for (i=0; i<nbs; i++) { 134727a8da17SBarry Smith baij->ilen[i] = baij->imax[i]; 134827a8da17SBarry Smith } 134927a8da17SBarry Smith 135027a8da17SBarry Smith PetscFunctionReturn(0); 135127a8da17SBarry Smith } 1352fb2e594dSBarry Smith EXTERN_C_END 135327a8da17SBarry Smith 13544a2ae208SSatish Balay #undef __FUNCT__ 13554a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 135627a8da17SBarry Smith /*@ 135727a8da17SBarry Smith MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 135827a8da17SBarry Smith in the matrix. 135927a8da17SBarry Smith 136027a8da17SBarry Smith Input Parameters: 136127a8da17SBarry Smith + mat - the SeqBAIJ matrix 136227a8da17SBarry Smith - indices - the column indices 136327a8da17SBarry Smith 136415091d37SBarry Smith Level: advanced 136515091d37SBarry Smith 136627a8da17SBarry Smith Notes: 136727a8da17SBarry Smith This can be called if you have precomputed the nonzero structure of the 136827a8da17SBarry Smith matrix and want to provide it to the matrix object to improve the performance 136927a8da17SBarry Smith of the MatSetValues() operation. 137027a8da17SBarry Smith 137127a8da17SBarry Smith You MUST have set the correct numbers of nonzeros per row in the call to 137227a8da17SBarry Smith MatCreateSeqBAIJ(). 137327a8da17SBarry Smith 137427a8da17SBarry Smith MUST be called before any calls to MatSetValues(); 137527a8da17SBarry Smith 137627a8da17SBarry Smith @*/ 137727a8da17SBarry Smith int MatSeqBAIJSetColumnIndices(Mat mat,int *indices) 137827a8da17SBarry Smith { 137927a8da17SBarry Smith int ierr,(*f)(Mat,int *); 138027a8da17SBarry Smith 138127a8da17SBarry Smith PetscFunctionBegin; 138227a8da17SBarry Smith PetscValidHeaderSpecific(mat,MAT_COOKIE); 1383c134de8dSSatish Balay ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr); 138427a8da17SBarry Smith if (f) { 138527a8da17SBarry Smith ierr = (*f)(mat,indices);CHKERRQ(ierr); 138627a8da17SBarry Smith } else { 138729bbc08cSBarry Smith SETERRQ(1,"Wrong type of matrix to set column indices"); 138827a8da17SBarry Smith } 138927a8da17SBarry Smith PetscFunctionReturn(0); 139027a8da17SBarry Smith } 139127a8da17SBarry Smith 13924a2ae208SSatish Balay #undef __FUNCT__ 13934a2ae208SSatish Balay #define __FUNCT__ "MatGetRowMax_SeqBAIJ" 1394273d9f13SBarry Smith int MatGetRowMax_SeqBAIJ(Mat A,Vec v) 1395273d9f13SBarry Smith { 1396273d9f13SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1397273d9f13SBarry Smith int ierr,i,j,n,row,bs,*ai,*aj,mbs; 1398273d9f13SBarry Smith PetscReal atmp; 139987828ca2SBarry Smith PetscScalar *x,zero = 0.0; 1400273d9f13SBarry Smith MatScalar *aa; 1401273d9f13SBarry Smith int ncols,brow,krow,kcol; 1402273d9f13SBarry Smith 1403273d9f13SBarry Smith PetscFunctionBegin; 1404273d9f13SBarry Smith if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 1405273d9f13SBarry Smith bs = a->bs; 1406273d9f13SBarry Smith aa = a->a; 1407273d9f13SBarry Smith ai = a->i; 1408273d9f13SBarry Smith aj = a->j; 1409273d9f13SBarry Smith mbs = a->mbs; 1410273d9f13SBarry Smith 1411273d9f13SBarry Smith ierr = VecSet(&zero,v);CHKERRQ(ierr); 1412273d9f13SBarry Smith ierr = VecGetArray(v,&x);CHKERRQ(ierr); 1413273d9f13SBarry Smith ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 1414273d9f13SBarry Smith if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 1415273d9f13SBarry Smith for (i=0; i<mbs; i++) { 1416273d9f13SBarry Smith ncols = ai[1] - ai[0]; ai++; 1417273d9f13SBarry Smith brow = bs*i; 1418273d9f13SBarry Smith for (j=0; j<ncols; j++){ 1419273d9f13SBarry Smith /* bcol = bs*(*aj); */ 1420273d9f13SBarry Smith for (kcol=0; kcol<bs; kcol++){ 1421273d9f13SBarry Smith for (krow=0; krow<bs; krow++){ 1422273d9f13SBarry Smith atmp = PetscAbsScalar(*aa); aa++; 1423273d9f13SBarry Smith row = brow + krow; /* row index */ 1424273d9f13SBarry Smith /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */ 1425273d9f13SBarry Smith if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp; 1426273d9f13SBarry Smith } 1427273d9f13SBarry Smith } 1428273d9f13SBarry Smith aj++; 1429273d9f13SBarry Smith } 1430273d9f13SBarry Smith } 1431273d9f13SBarry Smith ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 1432273d9f13SBarry Smith PetscFunctionReturn(0); 1433273d9f13SBarry Smith } 1434273d9f13SBarry Smith 14354a2ae208SSatish Balay #undef __FUNCT__ 14364a2ae208SSatish Balay #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ" 1437273d9f13SBarry Smith int MatSetUpPreallocation_SeqBAIJ(Mat A) 1438273d9f13SBarry Smith { 1439273d9f13SBarry Smith int ierr; 1440273d9f13SBarry Smith 1441273d9f13SBarry Smith PetscFunctionBegin; 1442273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr); 1443273d9f13SBarry Smith PetscFunctionReturn(0); 1444273d9f13SBarry Smith } 1445273d9f13SBarry Smith 14464a2ae208SSatish Balay #undef __FUNCT__ 14474a2ae208SSatish Balay #define __FUNCT__ "MatGetArray_SeqBAIJ" 144887828ca2SBarry Smith int MatGetArray_SeqBAIJ(Mat A,PetscScalar **array) 1449f2a5309cSSatish Balay { 1450f2a5309cSSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1451f2a5309cSSatish Balay PetscFunctionBegin; 1452f2a5309cSSatish Balay *array = a->a; 1453f2a5309cSSatish Balay PetscFunctionReturn(0); 1454f2a5309cSSatish Balay } 1455f2a5309cSSatish Balay 14564a2ae208SSatish Balay #undef __FUNCT__ 14574a2ae208SSatish Balay #define __FUNCT__ "MatRestoreArray_SeqBAIJ" 145887828ca2SBarry Smith int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar **array) 1459f2a5309cSSatish Balay { 1460f2a5309cSSatish Balay PetscFunctionBegin; 1461f2a5309cSSatish Balay PetscFunctionReturn(0); 1462f2a5309cSSatish Balay } 1463f2a5309cSSatish Balay 14642593348eSBarry Smith /* -------------------------------------------------------------------*/ 1465cc2dc46cSBarry Smith static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 1466cc2dc46cSBarry Smith MatGetRow_SeqBAIJ, 1467cc2dc46cSBarry Smith MatRestoreRow_SeqBAIJ, 1468cc2dc46cSBarry Smith MatMult_SeqBAIJ_N, 1469cc2dc46cSBarry Smith MatMultAdd_SeqBAIJ_N, 14707c922b88SBarry Smith MatMultTranspose_SeqBAIJ, 14717c922b88SBarry Smith MatMultTransposeAdd_SeqBAIJ, 1472cc2dc46cSBarry Smith MatSolve_SeqBAIJ_N, 1473cc2dc46cSBarry Smith 0, 1474cc2dc46cSBarry Smith 0, 1475cc2dc46cSBarry Smith 0, 1476cc2dc46cSBarry Smith MatLUFactor_SeqBAIJ, 1477cc2dc46cSBarry Smith 0, 1478b6490206SBarry Smith 0, 1479f2501298SSatish Balay MatTranspose_SeqBAIJ, 1480cc2dc46cSBarry Smith MatGetInfo_SeqBAIJ, 1481cc2dc46cSBarry Smith MatEqual_SeqBAIJ, 1482cc2dc46cSBarry Smith MatGetDiagonal_SeqBAIJ, 1483cc2dc46cSBarry Smith MatDiagonalScale_SeqBAIJ, 1484cc2dc46cSBarry Smith MatNorm_SeqBAIJ, 1485b6490206SBarry Smith 0, 1486cc2dc46cSBarry Smith MatAssemblyEnd_SeqBAIJ, 1487cc2dc46cSBarry Smith 0, 1488cc2dc46cSBarry Smith MatSetOption_SeqBAIJ, 1489cc2dc46cSBarry Smith MatZeroEntries_SeqBAIJ, 1490cc2dc46cSBarry Smith MatZeroRows_SeqBAIJ, 1491cc2dc46cSBarry Smith MatLUFactorSymbolic_SeqBAIJ, 1492cc2dc46cSBarry Smith MatLUFactorNumeric_SeqBAIJ_N, 1493cc2dc46cSBarry Smith 0, 1494cc2dc46cSBarry Smith 0, 1495273d9f13SBarry Smith MatSetUpPreallocation_SeqBAIJ, 1496cc2dc46cSBarry Smith MatILUFactorSymbolic_SeqBAIJ, 1497cc2dc46cSBarry Smith 0, 1498f2a5309cSSatish Balay MatGetArray_SeqBAIJ, 1499f2a5309cSSatish Balay MatRestoreArray_SeqBAIJ, 15002e8a6d31SBarry Smith MatDuplicate_SeqBAIJ, 1501cc2dc46cSBarry Smith 0, 1502cc2dc46cSBarry Smith 0, 1503cc2dc46cSBarry Smith MatILUFactor_SeqBAIJ, 1504cc2dc46cSBarry Smith 0, 1505cc2dc46cSBarry Smith 0, 1506cc2dc46cSBarry Smith MatGetSubMatrices_SeqBAIJ, 1507cc2dc46cSBarry Smith MatIncreaseOverlap_SeqBAIJ, 1508cc2dc46cSBarry Smith MatGetValues_SeqBAIJ, 1509cc2dc46cSBarry Smith 0, 1510cc2dc46cSBarry Smith MatPrintHelp_SeqBAIJ, 1511cc2dc46cSBarry Smith MatScale_SeqBAIJ, 1512cc2dc46cSBarry Smith 0, 1513cc2dc46cSBarry Smith 0, 1514cc2dc46cSBarry Smith 0, 1515cc2dc46cSBarry Smith MatGetBlockSize_SeqBAIJ, 15163b2fbd54SBarry Smith MatGetRowIJ_SeqBAIJ, 151792c4ed94SBarry Smith MatRestoreRowIJ_SeqBAIJ, 1518cc2dc46cSBarry Smith 0, 1519cc2dc46cSBarry Smith 0, 1520cc2dc46cSBarry Smith 0, 1521cc2dc46cSBarry Smith 0, 1522cc2dc46cSBarry Smith 0, 1523cc2dc46cSBarry Smith 0, 1524d3825aa8SBarry Smith MatSetValuesBlocked_SeqBAIJ, 1525cc2dc46cSBarry Smith MatGetSubMatrix_SeqBAIJ, 1526b9b97703SBarry Smith MatDestroy_SeqBAIJ, 1527b9b97703SBarry Smith MatView_SeqBAIJ, 15283a64cc32SBarry Smith MatGetPetscMaps_Petsc, 1529273d9f13SBarry Smith 0, 1530273d9f13SBarry Smith 0, 1531273d9f13SBarry Smith 0, 1532273d9f13SBarry Smith 0, 1533273d9f13SBarry Smith 0, 1534273d9f13SBarry Smith 0, 1535273d9f13SBarry Smith MatGetRowMax_SeqBAIJ, 1536273d9f13SBarry Smith MatConvert_Basic}; 15372593348eSBarry Smith 15383e90b805SBarry Smith EXTERN_C_BEGIN 15394a2ae208SSatish Balay #undef __FUNCT__ 15404a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_SeqBAIJ" 15413e90b805SBarry Smith int MatStoreValues_SeqBAIJ(Mat mat) 15423e90b805SBarry Smith { 15433e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1544273d9f13SBarry Smith int nz = aij->i[mat->m]*aij->bs*aij->bs2; 1545d132466eSBarry Smith int ierr; 15463e90b805SBarry Smith 15473e90b805SBarry Smith PetscFunctionBegin; 15483e90b805SBarry Smith if (aij->nonew != 1) { 154929bbc08cSBarry Smith SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 15503e90b805SBarry Smith } 15513e90b805SBarry Smith 15523e90b805SBarry Smith /* allocate space for values if not already there */ 15533e90b805SBarry Smith if (!aij->saved_values) { 155487828ca2SBarry Smith ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr); 15553e90b805SBarry Smith } 15563e90b805SBarry Smith 15573e90b805SBarry Smith /* copy values over */ 155887828ca2SBarry Smith ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 15593e90b805SBarry Smith PetscFunctionReturn(0); 15603e90b805SBarry Smith } 15613e90b805SBarry Smith EXTERN_C_END 15623e90b805SBarry Smith 15633e90b805SBarry Smith EXTERN_C_BEGIN 15644a2ae208SSatish Balay #undef __FUNCT__ 15654a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 156632e87ba3SBarry Smith int MatRetrieveValues_SeqBAIJ(Mat mat) 15673e90b805SBarry Smith { 15683e90b805SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data; 1569273d9f13SBarry Smith int nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr; 15703e90b805SBarry Smith 15713e90b805SBarry Smith PetscFunctionBegin; 15723e90b805SBarry Smith if (aij->nonew != 1) { 157329bbc08cSBarry Smith SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first"); 15743e90b805SBarry Smith } 15753e90b805SBarry Smith if (!aij->saved_values) { 157629bbc08cSBarry Smith SETERRQ(1,"Must call MatStoreValues(A);first"); 15773e90b805SBarry Smith } 15783e90b805SBarry Smith 15793e90b805SBarry Smith /* copy values over */ 158087828ca2SBarry Smith ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 15813e90b805SBarry Smith PetscFunctionReturn(0); 15823e90b805SBarry Smith } 15833e90b805SBarry Smith EXTERN_C_END 15843e90b805SBarry Smith 1585273d9f13SBarry Smith EXTERN_C_BEGIN 1586273d9f13SBarry Smith extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,Mat*); 1587273d9f13SBarry Smith EXTERN_C_END 1588273d9f13SBarry Smith 1589273d9f13SBarry Smith EXTERN_C_BEGIN 15904a2ae208SSatish Balay #undef __FUNCT__ 15914a2ae208SSatish Balay #define __FUNCT__ "MatCreate_SeqBAIJ" 1592273d9f13SBarry Smith int MatCreate_SeqBAIJ(Mat B) 15932593348eSBarry Smith { 1594273d9f13SBarry Smith int ierr,size; 1595b6490206SBarry Smith Mat_SeqBAIJ *b; 15963b2fbd54SBarry Smith 15973a40ed3dSBarry Smith PetscFunctionBegin; 1598273d9f13SBarry Smith ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr); 159929bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 1600b6490206SBarry Smith 1601273d9f13SBarry Smith B->m = B->M = PetscMax(B->m,B->M); 1602273d9f13SBarry Smith B->n = B->N = PetscMax(B->n,B->N); 1603b0a32e0cSBarry Smith ierr = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr); 1604b0a32e0cSBarry Smith B->data = (void*)b; 1605549d3d68SSatish Balay ierr = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr); 1606549d3d68SSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 16072593348eSBarry Smith B->factor = 0; 16082593348eSBarry Smith B->lupivotthreshold = 1.0; 160990f02eecSBarry Smith B->mapping = 0; 16102593348eSBarry Smith b->row = 0; 16112593348eSBarry Smith b->col = 0; 1612e51c0b9cSSatish Balay b->icol = 0; 16132593348eSBarry Smith b->reallocs = 0; 16143e90b805SBarry Smith b->saved_values = 0; 1615cfce73b9SSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 1616563b5814SBarry Smith b->setvalueslen = 0; 1617563b5814SBarry Smith b->setvaluescopy = PETSC_NULL; 1618563b5814SBarry Smith #endif 16198661488fSKris Buschelman b->single_precision_solves = PETSC_FALSE; 16202593348eSBarry Smith 16213a64cc32SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr); 16223a64cc32SBarry Smith ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr); 1623a5ae1ecdSBarry Smith 1624c4992f7dSBarry Smith b->sorted = PETSC_FALSE; 1625c4992f7dSBarry Smith b->roworiented = PETSC_TRUE; 16262593348eSBarry Smith b->nonew = 0; 16272593348eSBarry Smith b->diag = 0; 16282593348eSBarry Smith b->solve_work = 0; 1629de6a44a3SBarry Smith b->mult_work = 0; 16302a1b7f2aSHong Zhang B->spptr = 0; 16310e6d2581SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 1632883fce79SBarry Smith b->keepzeroedrows = PETSC_FALSE; 16334e220ebcSLois Curfman McInnes 1634f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 16353e90b805SBarry Smith "MatStoreValues_SeqBAIJ", 1636bc4b532fSSatish Balay MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 1637f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 16383e90b805SBarry Smith "MatRetrieveValues_SeqBAIJ", 1639bc4b532fSSatish Balay MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 1640f1af5d2fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C", 164127a8da17SBarry Smith "MatSeqBAIJSetColumnIndices_SeqBAIJ", 1642bc4b532fSSatish Balay MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 1643273d9f13SBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_SeqBAIJ_SeqAIJ_C", 1644273d9f13SBarry Smith "MatConvert_SeqBAIJ_SeqAIJ", 1645273d9f13SBarry Smith MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 16463a40ed3dSBarry Smith PetscFunctionReturn(0); 16472593348eSBarry Smith } 1648273d9f13SBarry Smith EXTERN_C_END 16492593348eSBarry Smith 16504a2ae208SSatish Balay #undef __FUNCT__ 16514a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_SeqBAIJ" 16522e8a6d31SBarry Smith int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 16532593348eSBarry Smith { 16542593348eSBarry Smith Mat C; 1655b6490206SBarry Smith Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data; 165627a8da17SBarry Smith int i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr; 1657de6a44a3SBarry Smith 16583a40ed3dSBarry Smith PetscFunctionBegin; 165929bbc08cSBarry Smith if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix"); 16602593348eSBarry Smith 16612593348eSBarry Smith *B = 0; 1662273d9f13SBarry Smith ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr); 1663273d9f13SBarry Smith ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr); 1664273d9f13SBarry Smith c = (Mat_SeqBAIJ*)C->data; 166544cd7ae7SLois Curfman McInnes 1666b6490206SBarry Smith c->bs = a->bs; 16679df24120SSatish Balay c->bs2 = a->bs2; 1668b6490206SBarry Smith c->mbs = a->mbs; 1669b6490206SBarry Smith c->nbs = a->nbs; 167035d8aa7fSBarry Smith ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 16712593348eSBarry Smith 1672b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr); 1673b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr); 1674b6490206SBarry Smith for (i=0; i<mbs; i++) { 16752593348eSBarry Smith c->imax[i] = a->imax[i]; 16762593348eSBarry Smith c->ilen[i] = a->ilen[i]; 16772593348eSBarry Smith } 16782593348eSBarry Smith 16792593348eSBarry Smith /* allocate the matrix space */ 1680c4992f7dSBarry Smith c->singlemalloc = PETSC_TRUE; 16813f1db9ecSBarry Smith len = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int)); 1682b0a32e0cSBarry Smith ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr); 16837e67e3f9SSatish Balay c->j = (int*)(c->a + nz*bs2); 1684de6a44a3SBarry Smith c->i = c->j + nz; 1685549d3d68SSatish Balay ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr); 1686b6490206SBarry Smith if (mbs > 0) { 1687549d3d68SSatish Balay ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr); 16882e8a6d31SBarry Smith if (cpvalues == MAT_COPY_VALUES) { 1689549d3d68SSatish Balay ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 16902e8a6d31SBarry Smith } else { 1691549d3d68SSatish Balay ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 16922593348eSBarry Smith } 16932593348eSBarry Smith } 16942593348eSBarry Smith 1695b0a32e0cSBarry Smith PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 16962593348eSBarry Smith c->sorted = a->sorted; 16972593348eSBarry Smith c->roworiented = a->roworiented; 16982593348eSBarry Smith c->nonew = a->nonew; 16992593348eSBarry Smith 17002593348eSBarry Smith if (a->diag) { 1701b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr); 1702b0a32e0cSBarry Smith PetscLogObjectMemory(C,(mbs+1)*sizeof(int)); 1703b6490206SBarry Smith for (i=0; i<mbs; i++) { 17042593348eSBarry Smith c->diag[i] = a->diag[i]; 17052593348eSBarry Smith } 170698305bb5SBarry Smith } else c->diag = 0; 17072593348eSBarry Smith c->nz = a->nz; 17082593348eSBarry Smith c->maxnz = a->maxnz; 17092593348eSBarry Smith c->solve_work = 0; 17102a1b7f2aSHong Zhang C->spptr = 0; /* Dangerous -I'm throwing away a->spptr */ 17117fc0212eSBarry Smith c->mult_work = 0; 1712273d9f13SBarry Smith C->preallocated = PETSC_TRUE; 1713273d9f13SBarry Smith C->assembled = PETSC_TRUE; 17142593348eSBarry Smith *B = C; 1715b0a32e0cSBarry Smith ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr); 17163a40ed3dSBarry Smith PetscFunctionReturn(0); 17172593348eSBarry Smith } 17182593348eSBarry Smith 1719273d9f13SBarry Smith EXTERN_C_BEGIN 17204a2ae208SSatish Balay #undef __FUNCT__ 17214a2ae208SSatish Balay #define __FUNCT__ "MatLoad_SeqBAIJ" 1722b0a32e0cSBarry Smith int MatLoad_SeqBAIJ(PetscViewer viewer,MatType type,Mat *A) 17232593348eSBarry Smith { 1724b6490206SBarry Smith Mat_SeqBAIJ *a; 17252593348eSBarry Smith Mat B; 1726f1af5d2fSBarry Smith int i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1; 1727b6490206SBarry Smith int *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 172835aab85fSBarry Smith int kmax,jcount,block,idx,point,nzcountb,extra_rows; 1729a7c10996SSatish Balay int *masked,nmask,tmp,bs2,ishift; 173087828ca2SBarry Smith PetscScalar *aa; 173119bcc07fSBarry Smith MPI_Comm comm = ((PetscObject)viewer)->comm; 17322593348eSBarry Smith 17333a40ed3dSBarry Smith PetscFunctionBegin; 1734b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 1735de6a44a3SBarry Smith bs2 = bs*bs; 1736b6490206SBarry Smith 1737d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 173829bbc08cSBarry Smith if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor"); 1739b0a32e0cSBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 17400752156aSBarry Smith ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 1741552e946dSBarry Smith if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 17422593348eSBarry Smith M = header[1]; N = header[2]; nz = header[3]; 17432593348eSBarry Smith 1744d64ed03dSBarry Smith if (header[3] < 0) { 174529bbc08cSBarry Smith SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 1746d64ed03dSBarry Smith } 1747d64ed03dSBarry Smith 174829bbc08cSBarry Smith if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices"); 174935aab85fSBarry Smith 175035aab85fSBarry Smith /* 175135aab85fSBarry Smith This code adds extra rows to make sure the number of rows is 175235aab85fSBarry Smith divisible by the blocksize 175335aab85fSBarry Smith */ 1754b6490206SBarry Smith mbs = M/bs; 175535aab85fSBarry Smith extra_rows = bs - M + bs*(mbs); 175635aab85fSBarry Smith if (extra_rows == bs) extra_rows = 0; 175735aab85fSBarry Smith else mbs++; 175835aab85fSBarry Smith if (extra_rows) { 1759b0a32e0cSBarry Smith PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"); 176035aab85fSBarry Smith } 1761b6490206SBarry Smith 17622593348eSBarry Smith /* read in row lengths */ 1763b0a32e0cSBarry Smith ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr); 17640752156aSBarry Smith ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 176535aab85fSBarry Smith for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 17662593348eSBarry Smith 1767b6490206SBarry Smith /* read in column indices */ 1768b0a32e0cSBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr); 17690752156aSBarry Smith ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 177035aab85fSBarry Smith for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 1771b6490206SBarry Smith 1772b6490206SBarry Smith /* loop over row lengths determining block row lengths */ 1773b0a32e0cSBarry Smith ierr = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr); 1774549d3d68SSatish Balay ierr = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr); 1775b0a32e0cSBarry Smith ierr = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr); 1776549d3d68SSatish Balay ierr = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr); 177735aab85fSBarry Smith masked = mask + mbs; 1778b6490206SBarry Smith rowcount = 0; nzcount = 0; 1779b6490206SBarry Smith for (i=0; i<mbs; i++) { 178035aab85fSBarry Smith nmask = 0; 1781b6490206SBarry Smith for (j=0; j<bs; j++) { 1782b6490206SBarry Smith kmax = rowlengths[rowcount]; 1783b6490206SBarry Smith for (k=0; k<kmax; k++) { 178435aab85fSBarry Smith tmp = jj[nzcount++]/bs; 178535aab85fSBarry Smith if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 1786b6490206SBarry Smith } 1787b6490206SBarry Smith rowcount++; 1788b6490206SBarry Smith } 178935aab85fSBarry Smith browlengths[i] += nmask; 179035aab85fSBarry Smith /* zero out the mask elements we set */ 179135aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1792b6490206SBarry Smith } 1793b6490206SBarry Smith 17942593348eSBarry Smith /* create our matrix */ 17953eee16b0SBarry Smith ierr = MatCreateSeqBAIJ(comm,bs,M+extra_rows,N+extra_rows,0,browlengths,A);CHKERRQ(ierr); 17962593348eSBarry Smith B = *A; 1797b6490206SBarry Smith a = (Mat_SeqBAIJ*)B->data; 17982593348eSBarry Smith 17992593348eSBarry Smith /* set matrix "i" values */ 1800de6a44a3SBarry Smith a->i[0] = 0; 1801b6490206SBarry Smith for (i=1; i<= mbs; i++) { 1802b6490206SBarry Smith a->i[i] = a->i[i-1] + browlengths[i-1]; 1803b6490206SBarry Smith a->ilen[i-1] = browlengths[i-1]; 18042593348eSBarry Smith } 1805b6490206SBarry Smith a->nz = 0; 1806b6490206SBarry Smith for (i=0; i<mbs; i++) a->nz += browlengths[i]; 18072593348eSBarry Smith 1808b6490206SBarry Smith /* read in nonzero values */ 180987828ca2SBarry Smith ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr); 18100752156aSBarry Smith ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 181135aab85fSBarry Smith for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 1812b6490206SBarry Smith 1813b6490206SBarry Smith /* set "a" and "j" values into matrix */ 1814b6490206SBarry Smith nzcount = 0; jcount = 0; 1815b6490206SBarry Smith for (i=0; i<mbs; i++) { 1816b6490206SBarry Smith nzcountb = nzcount; 181735aab85fSBarry Smith nmask = 0; 1818b6490206SBarry Smith for (j=0; j<bs; j++) { 1819b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1820b6490206SBarry Smith for (k=0; k<kmax; k++) { 182135aab85fSBarry Smith tmp = jj[nzcount++]/bs; 182235aab85fSBarry Smith if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 1823b6490206SBarry Smith } 1824b6490206SBarry Smith } 1825de6a44a3SBarry Smith /* sort the masked values */ 1826433994e6SBarry Smith ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 1827de6a44a3SBarry Smith 1828b6490206SBarry Smith /* set "j" values into matrix */ 1829b6490206SBarry Smith maskcount = 1; 183035aab85fSBarry Smith for (j=0; j<nmask; j++) { 183135aab85fSBarry Smith a->j[jcount++] = masked[j]; 1832de6a44a3SBarry Smith mask[masked[j]] = maskcount++; 1833b6490206SBarry Smith } 1834b6490206SBarry Smith /* set "a" values into matrix */ 1835de6a44a3SBarry Smith ishift = bs2*a->i[i]; 1836b6490206SBarry Smith for (j=0; j<bs; j++) { 1837b6490206SBarry Smith kmax = rowlengths[i*bs+j]; 1838b6490206SBarry Smith for (k=0; k<kmax; k++) { 1839de6a44a3SBarry Smith tmp = jj[nzcountb]/bs ; 1840de6a44a3SBarry Smith block = mask[tmp] - 1; 1841de6a44a3SBarry Smith point = jj[nzcountb] - bs*tmp; 1842de6a44a3SBarry Smith idx = ishift + bs2*block + j + bs*point; 1843375fe846SBarry Smith a->a[idx] = (MatScalar)aa[nzcountb++]; 1844b6490206SBarry Smith } 1845b6490206SBarry Smith } 184635aab85fSBarry Smith /* zero out the mask elements we set */ 184735aab85fSBarry Smith for (j=0; j<nmask; j++) mask[masked[j]] = 0; 1848b6490206SBarry Smith } 184929bbc08cSBarry Smith if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 1850b6490206SBarry Smith 1851606d414cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 1852606d414cSSatish Balay ierr = PetscFree(browlengths);CHKERRQ(ierr); 1853606d414cSSatish Balay ierr = PetscFree(aa);CHKERRQ(ierr); 1854606d414cSSatish Balay ierr = PetscFree(jj);CHKERRQ(ierr); 1855606d414cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 1856b6490206SBarry Smith 1857b6490206SBarry Smith B->assembled = PETSC_TRUE; 1858de6a44a3SBarry Smith 18599c01be13SBarry Smith ierr = MatView_Private(B);CHKERRQ(ierr); 18603a40ed3dSBarry Smith PetscFunctionReturn(0); 18612593348eSBarry Smith } 1862273d9f13SBarry Smith EXTERN_C_END 18632593348eSBarry Smith 18644a2ae208SSatish Balay #undef __FUNCT__ 18654a2ae208SSatish Balay #define __FUNCT__ "MatCreateSeqBAIJ" 1866273d9f13SBarry Smith /*@C 1867273d9f13SBarry Smith MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 1868273d9f13SBarry Smith compressed row) format. For good matrix assembly performance the 1869273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 1870273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 1871273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 18722593348eSBarry Smith 1873273d9f13SBarry Smith Collective on MPI_Comm 1874273d9f13SBarry Smith 1875273d9f13SBarry Smith Input Parameters: 1876273d9f13SBarry Smith + comm - MPI communicator, set to PETSC_COMM_SELF 1877273d9f13SBarry Smith . bs - size of block 1878273d9f13SBarry Smith . m - number of rows 1879273d9f13SBarry Smith . n - number of columns 188035d8aa7fSBarry Smith . nz - number of nonzero blocks per block row (same for all rows) 188135d8aa7fSBarry Smith - nnz - array containing the number of nonzero blocks in the various block rows 1882273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 1883273d9f13SBarry Smith 1884273d9f13SBarry Smith Output Parameter: 1885273d9f13SBarry Smith . A - the matrix 1886273d9f13SBarry Smith 1887273d9f13SBarry Smith Options Database Keys: 1888273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 1889273d9f13SBarry Smith block calculations (much slower) 1890273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 1891273d9f13SBarry Smith 1892273d9f13SBarry Smith Level: intermediate 1893273d9f13SBarry Smith 1894273d9f13SBarry Smith Notes: 189535d8aa7fSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 189635d8aa7fSBarry Smith 1897273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 1898273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 1899273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 1900273d9f13SBarry Smith 1901273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 1902273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1903273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 1904273d9f13SBarry Smith matrices. 1905273d9f13SBarry Smith 1906273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1907273d9f13SBarry Smith @*/ 1908273d9f13SBarry Smith int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,int *nnz,Mat *A) 1909273d9f13SBarry Smith { 1910273d9f13SBarry Smith int ierr; 1911273d9f13SBarry Smith 1912273d9f13SBarry Smith PetscFunctionBegin; 1913273d9f13SBarry Smith ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr); 1914273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 1915273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr); 1916273d9f13SBarry Smith PetscFunctionReturn(0); 1917273d9f13SBarry Smith } 1918273d9f13SBarry Smith 19194a2ae208SSatish Balay #undef __FUNCT__ 19204a2ae208SSatish Balay #define __FUNCT__ "MatSeqBAIJSetPreallocation" 1921273d9f13SBarry Smith /*@C 1922273d9f13SBarry Smith MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 1923273d9f13SBarry Smith per row in the matrix. For good matrix assembly performance the 1924273d9f13SBarry Smith user should preallocate the matrix storage by setting the parameter nz 1925273d9f13SBarry Smith (or the array nnz). By setting these parameters accurately, performance 1926273d9f13SBarry Smith during matrix assembly can be increased by more than a factor of 50. 1927273d9f13SBarry Smith 1928273d9f13SBarry Smith Collective on MPI_Comm 1929273d9f13SBarry Smith 1930273d9f13SBarry Smith Input Parameters: 1931273d9f13SBarry Smith + A - the matrix 1932273d9f13SBarry Smith . bs - size of block 1933273d9f13SBarry Smith . nz - number of block nonzeros per block row (same for all rows) 1934273d9f13SBarry Smith - nnz - array containing the number of block nonzeros in the various block rows 1935273d9f13SBarry Smith (possibly different for each block row) or PETSC_NULL 1936273d9f13SBarry Smith 1937273d9f13SBarry Smith Options Database Keys: 1938273d9f13SBarry Smith . -mat_no_unroll - uses code that does not unroll the loops in the 1939273d9f13SBarry Smith block calculations (much slower) 1940273d9f13SBarry Smith . -mat_block_size - size of the blocks to use 1941273d9f13SBarry Smith 1942273d9f13SBarry Smith Level: intermediate 1943273d9f13SBarry Smith 1944273d9f13SBarry Smith Notes: 1945273d9f13SBarry Smith The block AIJ format is fully compatible with standard Fortran 77 1946273d9f13SBarry Smith storage. That is, the stored row and column indices can begin at 1947273d9f13SBarry Smith either one (as in Fortran) or zero. See the users' manual for details. 1948273d9f13SBarry Smith 1949273d9f13SBarry Smith Specify the preallocated storage with either nz or nnz (not both). 1950273d9f13SBarry Smith Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory 1951273d9f13SBarry Smith allocation. For additional details, see the users manual chapter on 1952273d9f13SBarry Smith matrices. 1953273d9f13SBarry Smith 1954273d9f13SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ() 1955273d9f13SBarry Smith @*/ 1956273d9f13SBarry Smith int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,int *nnz) 1957273d9f13SBarry Smith { 1958273d9f13SBarry Smith Mat_SeqBAIJ *b; 1959273d9f13SBarry Smith int i,len,ierr,mbs,nbs,bs2,newbs = bs; 1960273d9f13SBarry Smith PetscTruth flg; 1961273d9f13SBarry Smith 1962273d9f13SBarry Smith PetscFunctionBegin; 1963273d9f13SBarry Smith ierr = PetscTypeCompare((PetscObject)B,MATSEQBAIJ,&flg);CHKERRQ(ierr); 1964273d9f13SBarry Smith if (!flg) PetscFunctionReturn(0); 1965273d9f13SBarry Smith 1966273d9f13SBarry Smith B->preallocated = PETSC_TRUE; 1967b0a32e0cSBarry Smith ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr); 1968273d9f13SBarry Smith if (nnz && newbs != bs) { 1969273d9f13SBarry Smith SETERRQ(1,"Cannot change blocksize from command line if setting nnz"); 1970273d9f13SBarry Smith } 1971273d9f13SBarry Smith bs = newbs; 1972273d9f13SBarry Smith 1973273d9f13SBarry Smith mbs = B->m/bs; 1974273d9f13SBarry Smith nbs = B->n/bs; 1975273d9f13SBarry Smith bs2 = bs*bs; 1976273d9f13SBarry Smith 1977273d9f13SBarry Smith if (mbs*bs!=B->m || nbs*bs!=B->n) { 197835d8aa7fSBarry Smith SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs); 1979273d9f13SBarry Smith } 1980273d9f13SBarry Smith 1981435da068SBarry Smith if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 1982435da068SBarry Smith if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz); 1983273d9f13SBarry Smith if (nnz) { 1984273d9f13SBarry Smith for (i=0; i<mbs; i++) { 1985273d9f13SBarry Smith if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]); 19863a7fca6bSBarry Smith if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs); 1987273d9f13SBarry Smith } 1988273d9f13SBarry Smith } 1989273d9f13SBarry Smith 1990273d9f13SBarry Smith b = (Mat_SeqBAIJ*)B->data; 1991b0a32e0cSBarry Smith ierr = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr); 199254138f6bSKris Buschelman B->ops->solve = MatSolve_SeqBAIJ_Update; 199354138f6bSKris Buschelman B->ops->solvetranspose = MatSolveTranspose_SeqBAIJ_Update; 1994273d9f13SBarry Smith if (!flg) { 1995273d9f13SBarry Smith switch (bs) { 1996273d9f13SBarry Smith case 1: 1997273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 1998273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_1; 1999273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2000273d9f13SBarry Smith break; 2001273d9f13SBarry Smith case 2: 2002273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 2003273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_2; 2004273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2005273d9f13SBarry Smith break; 2006273d9f13SBarry Smith case 3: 2007273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 2008273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_3; 2009273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2010273d9f13SBarry Smith break; 2011273d9f13SBarry Smith case 4: 2012273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 2013273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_4; 2014273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2015273d9f13SBarry Smith break; 2016273d9f13SBarry Smith case 5: 2017273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 2018273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_5; 2019273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2020273d9f13SBarry Smith break; 2021273d9f13SBarry Smith case 6: 2022273d9f13SBarry Smith B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 2023273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_6; 2024273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2025273d9f13SBarry Smith break; 2026273d9f13SBarry Smith case 7: 202754138f6bSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 2028273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_7; 2029273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2030273d9f13SBarry Smith break; 2031273d9f13SBarry Smith default: 203254138f6bSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 2033273d9f13SBarry Smith B->ops->mult = MatMult_SeqBAIJ_N; 2034273d9f13SBarry Smith B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2035273d9f13SBarry Smith break; 2036273d9f13SBarry Smith } 2037273d9f13SBarry Smith } 2038273d9f13SBarry Smith b->bs = bs; 2039273d9f13SBarry Smith b->mbs = mbs; 2040273d9f13SBarry Smith b->nbs = nbs; 2041b0a32e0cSBarry Smith ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr); 2042273d9f13SBarry Smith if (!nnz) { 2043435da068SBarry Smith if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2044273d9f13SBarry Smith else if (nz <= 0) nz = 1; 2045273d9f13SBarry Smith for (i=0; i<mbs; i++) b->imax[i] = nz; 2046273d9f13SBarry Smith nz = nz*mbs; 2047273d9f13SBarry Smith } else { 2048273d9f13SBarry Smith nz = 0; 2049273d9f13SBarry Smith for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2050273d9f13SBarry Smith } 2051273d9f13SBarry Smith 2052273d9f13SBarry Smith /* allocate the matrix space */ 2053273d9f13SBarry Smith len = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int); 2054b0a32e0cSBarry Smith ierr = PetscMalloc(len,&b->a);CHKERRQ(ierr); 2055273d9f13SBarry Smith ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2056273d9f13SBarry Smith b->j = (int*)(b->a + nz*bs2); 2057273d9f13SBarry Smith ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr); 2058273d9f13SBarry Smith b->i = b->j + nz; 2059273d9f13SBarry Smith b->singlemalloc = PETSC_TRUE; 2060273d9f13SBarry Smith 2061273d9f13SBarry Smith b->i[0] = 0; 2062273d9f13SBarry Smith for (i=1; i<mbs+1; i++) { 2063273d9f13SBarry Smith b->i[i] = b->i[i-1] + b->imax[i-1]; 2064273d9f13SBarry Smith } 2065273d9f13SBarry Smith 2066273d9f13SBarry Smith /* b->ilen will count nonzeros in each block row so far. */ 206716d6aa21SSatish Balay ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr); 2068b0a32e0cSBarry Smith PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ)); 2069273d9f13SBarry Smith for (i=0; i<mbs; i++) { b->ilen[i] = 0;} 2070273d9f13SBarry Smith 2071273d9f13SBarry Smith b->bs = bs; 2072273d9f13SBarry Smith b->bs2 = bs2; 2073273d9f13SBarry Smith b->mbs = mbs; 2074273d9f13SBarry Smith b->nz = 0; 2075273d9f13SBarry Smith b->maxnz = nz*bs2; 2076273d9f13SBarry Smith B->info.nz_unneeded = (PetscReal)b->maxnz; 2077273d9f13SBarry Smith PetscFunctionReturn(0); 2078273d9f13SBarry Smith } 20792593348eSBarry Smith 2080