1*a30f8f8cSSatish Balay /*$Id: mpisbaij.c,v 1.195 2000/04/20 03:27:54 bsmith Exp $*/ 2*a30f8f8cSSatish Balay 3*a30f8f8cSSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h" 4*a30f8f8cSSatish Balay #include "src/vec/vecimpl.h" 5*a30f8f8cSSatish Balay #include "mpisbaij.h" 6*a30f8f8cSSatish Balay #include "src/mat/impls/sbaij/seq/sbaij.h" 7*a30f8f8cSSatish Balay 8*a30f8f8cSSatish Balay extern int MatSetUpMultiply_MPISBAIJ(Mat); 9*a30f8f8cSSatish Balay extern int DisAssemble_MPISBAIJ(Mat); 10*a30f8f8cSSatish Balay extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int); 11*a30f8f8cSSatish Balay extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **); 12*a30f8f8cSSatish Balay extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *); 13*a30f8f8cSSatish Balay extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode); 14*a30f8f8cSSatish Balay extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode); 15*a30f8f8cSSatish Balay extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 16*a30f8f8cSSatish Balay extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**); 17*a30f8f8cSSatish Balay extern int MatPrintHelp_SeqSBAIJ(Mat); 18*a30f8f8cSSatish Balay extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*); 19*a30f8f8cSSatish Balay 20*a30f8f8cSSatish Balay /* UGLY, ugly, ugly 21*a30f8f8cSSatish Balay When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does 22*a30f8f8cSSatish Balay not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and 23*a30f8f8cSSatish Balay inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ() 24*a30f8f8cSSatish Balay converts the entries into single precision and then calls ..._MatScalar() to put them 25*a30f8f8cSSatish Balay into the single precision data structures. 26*a30f8f8cSSatish Balay */ 27*a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 28*a30f8f8cSSatish Balay extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 29*a30f8f8cSSatish Balay extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 30*a30f8f8cSSatish Balay extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 31*a30f8f8cSSatish Balay extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 32*a30f8f8cSSatish Balay extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode); 33*a30f8f8cSSatish Balay #else 34*a30f8f8cSSatish Balay #define MatSetValuesBlocked_SeqSBAIJ_MatScalar MatSetValuesBlocked_SeqSBAIJ 35*a30f8f8cSSatish Balay #define MatSetValues_MPISBAIJ_MatScalar MatSetValues_MPISBAIJ 36*a30f8f8cSSatish Balay #define MatSetValuesBlocked_MPISBAIJ_MatScalar MatSetValuesBlocked_MPISBAIJ 37*a30f8f8cSSatish Balay #define MatSetValues_MPISBAIJ_HT_MatScalar MatSetValues_MPISBAIJ_HT 38*a30f8f8cSSatish Balay #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar MatSetValuesBlocked_MPISBAIJ_HT 39*a30f8f8cSSatish Balay #endif 40*a30f8f8cSSatish Balay 41*a30f8f8cSSatish Balay EXTERN_C_BEGIN 42*a30f8f8cSSatish Balay #undef __FUNC__ 43*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ" 44*a30f8f8cSSatish Balay int MatStoreValues_MPISBAIJ(Mat mat) 45*a30f8f8cSSatish Balay { 46*a30f8f8cSSatish Balay Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 47*a30f8f8cSSatish Balay int ierr; 48*a30f8f8cSSatish Balay 49*a30f8f8cSSatish Balay PetscFunctionBegin; 50*a30f8f8cSSatish Balay ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 51*a30f8f8cSSatish Balay ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 52*a30f8f8cSSatish Balay PetscFunctionReturn(0); 53*a30f8f8cSSatish Balay } 54*a30f8f8cSSatish Balay EXTERN_C_END 55*a30f8f8cSSatish Balay 56*a30f8f8cSSatish Balay EXTERN_C_BEGIN 57*a30f8f8cSSatish Balay #undef __FUNC__ 58*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ" 59*a30f8f8cSSatish Balay int MatRetrieveValues_MPISBAIJ(Mat mat) 60*a30f8f8cSSatish Balay { 61*a30f8f8cSSatish Balay Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 62*a30f8f8cSSatish Balay int ierr; 63*a30f8f8cSSatish Balay 64*a30f8f8cSSatish Balay PetscFunctionBegin; 65*a30f8f8cSSatish Balay ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 66*a30f8f8cSSatish Balay ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 67*a30f8f8cSSatish Balay PetscFunctionReturn(0); 68*a30f8f8cSSatish Balay } 69*a30f8f8cSSatish Balay EXTERN_C_END 70*a30f8f8cSSatish Balay 71*a30f8f8cSSatish Balay /* 72*a30f8f8cSSatish Balay Local utility routine that creates a mapping from the global column 73*a30f8f8cSSatish Balay number to the local number in the off-diagonal part of the local 74*a30f8f8cSSatish Balay storage of the matrix. This is done in a non scable way since the 75*a30f8f8cSSatish Balay length of colmap equals the global matrix length. 76*a30f8f8cSSatish Balay */ 77*a30f8f8cSSatish Balay #undef __FUNC__ 78*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private" 79*a30f8f8cSSatish Balay static int CreateColmap_MPISBAIJ_Private(Mat mat) 80*a30f8f8cSSatish Balay { 81*a30f8f8cSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 82*a30f8f8cSSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 83*a30f8f8cSSatish Balay int nbs = B->nbs,i,bs=B->bs,ierr; 84*a30f8f8cSSatish Balay 85*a30f8f8cSSatish Balay PetscFunctionBegin; 86*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 87*a30f8f8cSSatish Balay ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr); 88*a30f8f8cSSatish Balay for (i=0; i<nbs; i++){ 89*a30f8f8cSSatish Balay ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr); 90*a30f8f8cSSatish Balay } 91*a30f8f8cSSatish Balay #else 92*a30f8f8cSSatish Balay baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap); 93*a30f8f8cSSatish Balay PLogObjectMemory(mat,baij->Nbs*sizeof(int)); 94*a30f8f8cSSatish Balay ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr); 95*a30f8f8cSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 96*a30f8f8cSSatish Balay #endif 97*a30f8f8cSSatish Balay PetscFunctionReturn(0); 98*a30f8f8cSSatish Balay } 99*a30f8f8cSSatish Balay 100*a30f8f8cSSatish Balay #define CHUNKSIZE 10 101*a30f8f8cSSatish Balay 102*a30f8f8cSSatish Balay #define MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \ 103*a30f8f8cSSatish Balay { \ 104*a30f8f8cSSatish Balay \ 105*a30f8f8cSSatish Balay brow = row/bs; \ 106*a30f8f8cSSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 107*a30f8f8cSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 108*a30f8f8cSSatish Balay bcol = col/bs; \ 109*a30f8f8cSSatish Balay ridx = row % bs; cidx = col % bs; \ 110*a30f8f8cSSatish Balay low = 0; high = nrow; \ 111*a30f8f8cSSatish Balay while (high-low > 3) { \ 112*a30f8f8cSSatish Balay t = (low+high)/2; \ 113*a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 114*a30f8f8cSSatish Balay else low = t; \ 115*a30f8f8cSSatish Balay } \ 116*a30f8f8cSSatish Balay for (_i=low; _i<high; _i++) { \ 117*a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 118*a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 119*a30f8f8cSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 120*a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 121*a30f8f8cSSatish Balay else *bap = value; \ 122*a30f8f8cSSatish Balay goto a_noinsert; \ 123*a30f8f8cSSatish Balay } \ 124*a30f8f8cSSatish Balay } \ 125*a30f8f8cSSatish Balay if (a->nonew == 1) goto a_noinsert; \ 126*a30f8f8cSSatish Balay else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 127*a30f8f8cSSatish Balay if (nrow >= rmax) { \ 128*a30f8f8cSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 129*a30f8f8cSSatish Balay int new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 130*a30f8f8cSSatish Balay MatScalar *new_a; \ 131*a30f8f8cSSatish Balay \ 132*a30f8f8cSSatish Balay if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 133*a30f8f8cSSatish Balay \ 134*a30f8f8cSSatish Balay /* malloc new storage space */ \ 135*a30f8f8cSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \ 136*a30f8f8cSSatish Balay new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 137*a30f8f8cSSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 138*a30f8f8cSSatish Balay new_i = new_j + new_nz; \ 139*a30f8f8cSSatish Balay \ 140*a30f8f8cSSatish Balay /* copy over old data into new slots */ \ 141*a30f8f8cSSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \ 142*a30f8f8cSSatish Balay for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \ 143*a30f8f8cSSatish Balay ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 144*a30f8f8cSSatish Balay len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \ 145*a30f8f8cSSatish Balay ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 146*a30f8f8cSSatish Balay ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 147*a30f8f8cSSatish Balay ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \ 148*a30f8f8cSSatish Balay ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \ 149*a30f8f8cSSatish Balay aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 150*a30f8f8cSSatish Balay /* free up old matrix storage */ \ 151*a30f8f8cSSatish Balay ierr = PetscFree(a->a);CHKERRQ(ierr); \ 152*a30f8f8cSSatish Balay if (!a->singlemalloc) { \ 153*a30f8f8cSSatish Balay ierr = PetscFree(a->i);CHKERRQ(ierr); \ 154*a30f8f8cSSatish Balay ierr = PetscFree(a->j);CHKERRQ(ierr);\ 155*a30f8f8cSSatish Balay } \ 156*a30f8f8cSSatish Balay aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j; \ 157*a30f8f8cSSatish Balay a->singlemalloc = PETSC_TRUE; \ 158*a30f8f8cSSatish Balay \ 159*a30f8f8cSSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 160*a30f8f8cSSatish Balay rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \ 161*a30f8f8cSSatish Balay PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 162*a30f8f8cSSatish Balay a->s_maxnz += bs2*CHUNKSIZE; \ 163*a30f8f8cSSatish Balay a->reallocs++; \ 164*a30f8f8cSSatish Balay a->s_nz++; \ 165*a30f8f8cSSatish Balay } \ 166*a30f8f8cSSatish Balay N = nrow++ - 1; \ 167*a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 168*a30f8f8cSSatish Balay for (ii=N; ii>=_i; ii--) { \ 169*a30f8f8cSSatish Balay rp[ii+1] = rp[ii]; \ 170*a30f8f8cSSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 171*a30f8f8cSSatish Balay } \ 172*a30f8f8cSSatish Balay if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 173*a30f8f8cSSatish Balay rp[_i] = bcol; \ 174*a30f8f8cSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 175*a30f8f8cSSatish Balay a_noinsert:; \ 176*a30f8f8cSSatish Balay ailen[brow] = nrow; \ 177*a30f8f8cSSatish Balay } 178*a30f8f8cSSatish Balay #ifndef MatSetValues_SeqBAIJ_B_Private 179*a30f8f8cSSatish Balay #define MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \ 180*a30f8f8cSSatish Balay { \ 181*a30f8f8cSSatish Balay brow = row/bs; \ 182*a30f8f8cSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 183*a30f8f8cSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 184*a30f8f8cSSatish Balay bcol = col/bs; \ 185*a30f8f8cSSatish Balay ridx = row % bs; cidx = col % bs; \ 186*a30f8f8cSSatish Balay low = 0; high = nrow; \ 187*a30f8f8cSSatish Balay while (high-low > 3) { \ 188*a30f8f8cSSatish Balay t = (low+high)/2; \ 189*a30f8f8cSSatish Balay if (rp[t] > bcol) high = t; \ 190*a30f8f8cSSatish Balay else low = t; \ 191*a30f8f8cSSatish Balay } \ 192*a30f8f8cSSatish Balay for (_i=low; _i<high; _i++) { \ 193*a30f8f8cSSatish Balay if (rp[_i] > bcol) break; \ 194*a30f8f8cSSatish Balay if (rp[_i] == bcol) { \ 195*a30f8f8cSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 196*a30f8f8cSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 197*a30f8f8cSSatish Balay else *bap = value; \ 198*a30f8f8cSSatish Balay goto b_noinsert; \ 199*a30f8f8cSSatish Balay } \ 200*a30f8f8cSSatish Balay } \ 201*a30f8f8cSSatish Balay if (b->nonew == 1) goto b_noinsert; \ 202*a30f8f8cSSatish Balay else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \ 203*a30f8f8cSSatish Balay if (nrow >= rmax) { \ 204*a30f8f8cSSatish Balay /* there is no extra room in row, therefore enlarge */ \ 205*a30f8f8cSSatish Balay int new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \ 206*a30f8f8cSSatish Balay MatScalar *new_a; \ 207*a30f8f8cSSatish Balay \ 208*a30f8f8cSSatish Balay if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \ 209*a30f8f8cSSatish Balay \ 210*a30f8f8cSSatish Balay /* malloc new storage space */ \ 211*a30f8f8cSSatish Balay len = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \ 212*a30f8f8cSSatish Balay new_a = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \ 213*a30f8f8cSSatish Balay new_j = (int*)(new_a + bs2*new_nz); \ 214*a30f8f8cSSatish Balay new_i = new_j + new_nz; \ 215*a30f8f8cSSatish Balay \ 216*a30f8f8cSSatish Balay /* copy over old data into new slots */ \ 217*a30f8f8cSSatish Balay for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \ 218*a30f8f8cSSatish Balay for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \ 219*a30f8f8cSSatish Balay ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \ 220*a30f8f8cSSatish Balay len = (new_nz - CHUNKSIZE - bi[brow] - nrow); \ 221*a30f8f8cSSatish Balay ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \ 222*a30f8f8cSSatish Balay ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 223*a30f8f8cSSatish Balay ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \ 224*a30f8f8cSSatish Balay ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \ 225*a30f8f8cSSatish Balay ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr); \ 226*a30f8f8cSSatish Balay /* free up old matrix storage */ \ 227*a30f8f8cSSatish Balay ierr = PetscFree(b->a);CHKERRQ(ierr); \ 228*a30f8f8cSSatish Balay if (!b->singlemalloc) { \ 229*a30f8f8cSSatish Balay ierr = PetscFree(b->i);CHKERRQ(ierr); \ 230*a30f8f8cSSatish Balay ierr = PetscFree(b->j);CHKERRQ(ierr); \ 231*a30f8f8cSSatish Balay } \ 232*a30f8f8cSSatish Balay ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j; \ 233*a30f8f8cSSatish Balay b->singlemalloc = PETSC_TRUE; \ 234*a30f8f8cSSatish Balay \ 235*a30f8f8cSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 236*a30f8f8cSSatish Balay rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \ 237*a30f8f8cSSatish Balay PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \ 238*a30f8f8cSSatish Balay b->maxnz += bs2*CHUNKSIZE; \ 239*a30f8f8cSSatish Balay b->reallocs++; \ 240*a30f8f8cSSatish Balay b->nz++; \ 241*a30f8f8cSSatish Balay } \ 242*a30f8f8cSSatish Balay N = nrow++ - 1; \ 243*a30f8f8cSSatish Balay /* shift up all the later entries in this row */ \ 244*a30f8f8cSSatish Balay for (ii=N; ii>=_i; ii--) { \ 245*a30f8f8cSSatish Balay rp[ii+1] = rp[ii]; \ 246*a30f8f8cSSatish Balay ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 247*a30f8f8cSSatish Balay } \ 248*a30f8f8cSSatish Balay if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 249*a30f8f8cSSatish Balay rp[_i] = bcol; \ 250*a30f8f8cSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 251*a30f8f8cSSatish Balay b_noinsert:; \ 252*a30f8f8cSSatish Balay bilen[brow] = nrow; \ 253*a30f8f8cSSatish Balay } 254*a30f8f8cSSatish Balay #endif 255*a30f8f8cSSatish Balay 256*a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 257*a30f8f8cSSatish Balay #undef __FUNC__ 258*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ" 259*a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 260*a30f8f8cSSatish Balay { 261*a30f8f8cSSatish Balay Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data; 262*a30f8f8cSSatish Balay int ierr,i,N = m*n; 263*a30f8f8cSSatish Balay MatScalar *vsingle; 264*a30f8f8cSSatish Balay 265*a30f8f8cSSatish Balay PetscFunctionBegin; 266*a30f8f8cSSatish Balay if (N > b->setvalueslen) { 267*a30f8f8cSSatish Balay if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 268*a30f8f8cSSatish Balay b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 269*a30f8f8cSSatish Balay b->setvalueslen = N; 270*a30f8f8cSSatish Balay } 271*a30f8f8cSSatish Balay vsingle = b->setvaluescopy; 272*a30f8f8cSSatish Balay 273*a30f8f8cSSatish Balay for (i=0; i<N; i++) { 274*a30f8f8cSSatish Balay vsingle[i] = v[i]; 275*a30f8f8cSSatish Balay } 276*a30f8f8cSSatish Balay ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 277*a30f8f8cSSatish Balay PetscFunctionReturn(0); 278*a30f8f8cSSatish Balay } 279*a30f8f8cSSatish Balay 280*a30f8f8cSSatish Balay #undef __FUNC__ 281*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 282*a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 283*a30f8f8cSSatish Balay { 284*a30f8f8cSSatish Balay Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 285*a30f8f8cSSatish Balay int ierr,i,N = m*n*b->bs2; 286*a30f8f8cSSatish Balay MatScalar *vsingle; 287*a30f8f8cSSatish Balay 288*a30f8f8cSSatish Balay PetscFunctionBegin; 289*a30f8f8cSSatish Balay if (N > b->setvalueslen) { 290*a30f8f8cSSatish Balay if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 291*a30f8f8cSSatish Balay b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 292*a30f8f8cSSatish Balay b->setvalueslen = N; 293*a30f8f8cSSatish Balay } 294*a30f8f8cSSatish Balay vsingle = b->setvaluescopy; 295*a30f8f8cSSatish Balay for (i=0; i<N; i++) { 296*a30f8f8cSSatish Balay vsingle[i] = v[i]; 297*a30f8f8cSSatish Balay } 298*a30f8f8cSSatish Balay ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 299*a30f8f8cSSatish Balay PetscFunctionReturn(0); 300*a30f8f8cSSatish Balay } 301*a30f8f8cSSatish Balay 302*a30f8f8cSSatish Balay #undef __FUNC__ 303*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT" 304*a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 305*a30f8f8cSSatish Balay { 306*a30f8f8cSSatish Balay Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 307*a30f8f8cSSatish Balay int ierr,i,N = m*n; 308*a30f8f8cSSatish Balay MatScalar *vsingle; 309*a30f8f8cSSatish Balay 310*a30f8f8cSSatish Balay PetscFunctionBegin; 311*a30f8f8cSSatish Balay if (N > b->setvalueslen) { 312*a30f8f8cSSatish Balay if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 313*a30f8f8cSSatish Balay b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 314*a30f8f8cSSatish Balay b->setvalueslen = N; 315*a30f8f8cSSatish Balay } 316*a30f8f8cSSatish Balay vsingle = b->setvaluescopy; 317*a30f8f8cSSatish Balay for (i=0; i<N; i++) { 318*a30f8f8cSSatish Balay vsingle[i] = v[i]; 319*a30f8f8cSSatish Balay } 320*a30f8f8cSSatish Balay ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 321*a30f8f8cSSatish Balay PetscFunctionReturn(0); 322*a30f8f8cSSatish Balay } 323*a30f8f8cSSatish Balay 324*a30f8f8cSSatish Balay #undef __FUNC__ 325*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT" 326*a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv) 327*a30f8f8cSSatish Balay { 328*a30f8f8cSSatish Balay Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data; 329*a30f8f8cSSatish Balay int ierr,i,N = m*n*b->bs2; 330*a30f8f8cSSatish Balay MatScalar *vsingle; 331*a30f8f8cSSatish Balay 332*a30f8f8cSSatish Balay PetscFunctionBegin; 333*a30f8f8cSSatish Balay if (N > b->setvalueslen) { 334*a30f8f8cSSatish Balay if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);} 335*a30f8f8cSSatish Balay b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy); 336*a30f8f8cSSatish Balay b->setvalueslen = N; 337*a30f8f8cSSatish Balay } 338*a30f8f8cSSatish Balay vsingle = b->setvaluescopy; 339*a30f8f8cSSatish Balay for (i=0; i<N; i++) { 340*a30f8f8cSSatish Balay vsingle[i] = v[i]; 341*a30f8f8cSSatish Balay } 342*a30f8f8cSSatish Balay ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr); 343*a30f8f8cSSatish Balay PetscFunctionReturn(0); 344*a30f8f8cSSatish Balay } 345*a30f8f8cSSatish Balay #endif 346*a30f8f8cSSatish Balay 347*a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks). 348*a30f8f8cSSatish Balay Any a(i,j) with i>j input by user is ingored. 349*a30f8f8cSSatish Balay */ 350*a30f8f8cSSatish Balay #undef __FUNC__ 351*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ" 352*a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 353*a30f8f8cSSatish Balay { 354*a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 355*a30f8f8cSSatish Balay MatScalar value; 356*a30f8f8cSSatish Balay int ierr,i,j,row,col; 357*a30f8f8cSSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 358*a30f8f8cSSatish Balay int rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs; 359*a30f8f8cSSatish Balay int cend_orig=baij->cend_bs,bs=baij->bs; 360*a30f8f8cSSatish Balay 361*a30f8f8cSSatish Balay /* Some Variables required in the macro */ 362*a30f8f8cSSatish Balay Mat A = baij->A; 363*a30f8f8cSSatish Balay Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data; 364*a30f8f8cSSatish Balay int *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 365*a30f8f8cSSatish Balay MatScalar *aa=a->a; 366*a30f8f8cSSatish Balay 367*a30f8f8cSSatish Balay Mat B = baij->B; 368*a30f8f8cSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 369*a30f8f8cSSatish Balay int *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 370*a30f8f8cSSatish Balay MatScalar *ba=b->a; 371*a30f8f8cSSatish Balay 372*a30f8f8cSSatish Balay int *rp,ii,nrow,_i,rmax,N,brow,bcol; 373*a30f8f8cSSatish Balay int low,high,t,ridx,cidx,bs2=a->bs2; 374*a30f8f8cSSatish Balay MatScalar *ap,*bap; 375*a30f8f8cSSatish Balay 376*a30f8f8cSSatish Balay /* for stash */ 377*a30f8f8cSSatish Balay int n_loc, *in_loc; 378*a30f8f8cSSatish Balay MatScalar *v_loc; 379*a30f8f8cSSatish Balay 380*a30f8f8cSSatish Balay PetscFunctionBegin; 381*a30f8f8cSSatish Balay 382*a30f8f8cSSatish Balay if(!baij->donotstash){ 383*a30f8f8cSSatish Balay in_loc = (int*)PetscMalloc(n*sizeof(int)); CHKPTRQ(in_loc); 384*a30f8f8cSSatish Balay v_loc = (MatScalar*)PetscMalloc(n*sizeof(MatScalar)); CHKPTRQ(v_loc); 385*a30f8f8cSSatish Balay } 386*a30f8f8cSSatish Balay 387*a30f8f8cSSatish Balay for (i=0; i<m; i++) { 388*a30f8f8cSSatish Balay if (im[i] < 0) continue; 389*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 390*a30f8f8cSSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 391*a30f8f8cSSatish Balay #endif 392*a30f8f8cSSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */ 393*a30f8f8cSSatish Balay row = im[i] - rstart_orig; /* local row index */ 394*a30f8f8cSSatish Balay for (j=0; j<n; j++) { 395*a30f8f8cSSatish Balay if (im[i]/bs > in[j]/bs) continue; /* ignore low triangular blocks */ 396*a30f8f8cSSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig){ /* diag entry (A) */ 397*a30f8f8cSSatish Balay col = in[j] - cstart_orig; /* local col index */ 398*a30f8f8cSSatish Balay brow = row/bs; bcol = col/bs; 399*a30f8f8cSSatish Balay if (brow > bcol) continue; /* ignore lower triangular blocks of A */ 400*a30f8f8cSSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 401*a30f8f8cSSatish Balay MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv); 402*a30f8f8cSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 403*a30f8f8cSSatish Balay } else if (in[j] < 0) continue; 404*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 405*a30f8f8cSSatish Balay else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");} 406*a30f8f8cSSatish Balay #endif 407*a30f8f8cSSatish Balay else { /* off-diag entry (B) */ 408*a30f8f8cSSatish Balay if (mat->was_assembled) { 409*a30f8f8cSSatish Balay if (!baij->colmap) { 410*a30f8f8cSSatish Balay ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 411*a30f8f8cSSatish Balay } 412*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 413*a30f8f8cSSatish Balay ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 414*a30f8f8cSSatish Balay col = col - 1 + in[j]%bs; 415*a30f8f8cSSatish Balay #else 416*a30f8f8cSSatish Balay col = baij->colmap[in[j]/bs] - 1 + in[j]%bs; 417*a30f8f8cSSatish Balay #endif 418*a30f8f8cSSatish Balay if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) { 419*a30f8f8cSSatish Balay ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr); 420*a30f8f8cSSatish Balay col = in[j]; 421*a30f8f8cSSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 422*a30f8f8cSSatish Balay B = baij->B; 423*a30f8f8cSSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 424*a30f8f8cSSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 425*a30f8f8cSSatish Balay ba=b->a; 426*a30f8f8cSSatish Balay } 427*a30f8f8cSSatish Balay } else col = in[j]; 428*a30f8f8cSSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 429*a30f8f8cSSatish Balay MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv); 430*a30f8f8cSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 431*a30f8f8cSSatish Balay } 432*a30f8f8cSSatish Balay } 433*a30f8f8cSSatish Balay } else { /* off processor entry */ 434*a30f8f8cSSatish Balay if (!baij->donotstash) { 435*a30f8f8cSSatish Balay n_loc = 0; 436*a30f8f8cSSatish Balay for (j=0; j<n; j++){ 437*a30f8f8cSSatish Balay if (im[i]/bs > in[j]/bs) continue; /* ignore low triangular blocks */ 438*a30f8f8cSSatish Balay in_loc[n_loc] = in[j]; 439*a30f8f8cSSatish Balay if (roworiented) { 440*a30f8f8cSSatish Balay v_loc[n_loc] = v[i*n+j]; 441*a30f8f8cSSatish Balay } else { 442*a30f8f8cSSatish Balay v_loc[n_loc] = v[j*m+i]; 443*a30f8f8cSSatish Balay } 444*a30f8f8cSSatish Balay n_loc++; 445*a30f8f8cSSatish Balay } 446*a30f8f8cSSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr); 447*a30f8f8cSSatish Balay } 448*a30f8f8cSSatish Balay } 449*a30f8f8cSSatish Balay } 450*a30f8f8cSSatish Balay 451*a30f8f8cSSatish Balay if(!baij->donotstash){ 452*a30f8f8cSSatish Balay ierr = PetscFree(in_loc);CHKERRQ(ierr); 453*a30f8f8cSSatish Balay ierr = PetscFree(v_loc);CHKERRQ(ierr); 454*a30f8f8cSSatish Balay } 455*a30f8f8cSSatish Balay PetscFunctionReturn(0); 456*a30f8f8cSSatish Balay } 457*a30f8f8cSSatish Balay 458*a30f8f8cSSatish Balay #undef __FUNC__ 459*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ" 460*a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 461*a30f8f8cSSatish Balay { 462*a30f8f8cSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 463*a30f8f8cSSatish Balay MatScalar *value,*barray=baij->barray; 464*a30f8f8cSSatish Balay int ierr,i,j,ii,jj,row,col; 465*a30f8f8cSSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 466*a30f8f8cSSatish Balay int rend=baij->rend,cstart=baij->cstart,stepval; 467*a30f8f8cSSatish Balay int cend=baij->cend,bs=baij->bs,bs2=baij->bs2; 468*a30f8f8cSSatish Balay 469*a30f8f8cSSatish Balay PetscFunctionBegin; 470*a30f8f8cSSatish Balay if(!barray) { 471*a30f8f8cSSatish Balay baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray); 472*a30f8f8cSSatish Balay } 473*a30f8f8cSSatish Balay 474*a30f8f8cSSatish Balay if (roworiented) { 475*a30f8f8cSSatish Balay stepval = (n-1)*bs; 476*a30f8f8cSSatish Balay } else { 477*a30f8f8cSSatish Balay stepval = (m-1)*bs; 478*a30f8f8cSSatish Balay } 479*a30f8f8cSSatish Balay for (i=0; i<m; i++) { 480*a30f8f8cSSatish Balay if (im[i] < 0) continue; 481*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 482*a30f8f8cSSatish Balay if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs); 483*a30f8f8cSSatish Balay #endif 484*a30f8f8cSSatish Balay if (im[i] >= rstart && im[i] < rend) { 485*a30f8f8cSSatish Balay row = im[i] - rstart; 486*a30f8f8cSSatish Balay for (j=0; j<n; j++) { 487*a30f8f8cSSatish Balay /* If NumCol = 1 then a copy is not required */ 488*a30f8f8cSSatish Balay if ((roworiented) && (n == 1)) { 489*a30f8f8cSSatish Balay barray = v + i*bs2; 490*a30f8f8cSSatish Balay } else if((!roworiented) && (m == 1)) { 491*a30f8f8cSSatish Balay barray = v + j*bs2; 492*a30f8f8cSSatish Balay } else { /* Here a copy is required */ 493*a30f8f8cSSatish Balay if (roworiented) { 494*a30f8f8cSSatish Balay value = v + i*(stepval+bs)*bs + j*bs; 495*a30f8f8cSSatish Balay } else { 496*a30f8f8cSSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 497*a30f8f8cSSatish Balay } 498*a30f8f8cSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 499*a30f8f8cSSatish Balay for (jj=0; jj<bs; jj++) { 500*a30f8f8cSSatish Balay *barray++ = *value++; 501*a30f8f8cSSatish Balay } 502*a30f8f8cSSatish Balay } 503*a30f8f8cSSatish Balay barray -=bs2; 504*a30f8f8cSSatish Balay } 505*a30f8f8cSSatish Balay 506*a30f8f8cSSatish Balay if (in[j] >= cstart && in[j] < cend){ 507*a30f8f8cSSatish Balay col = in[j] - cstart; 508*a30f8f8cSSatish Balay ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 509*a30f8f8cSSatish Balay } 510*a30f8f8cSSatish Balay else if (in[j] < 0) continue; 511*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 512*a30f8f8cSSatish Balay else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);} 513*a30f8f8cSSatish Balay #endif 514*a30f8f8cSSatish Balay else { 515*a30f8f8cSSatish Balay if (mat->was_assembled) { 516*a30f8f8cSSatish Balay if (!baij->colmap) { 517*a30f8f8cSSatish Balay ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 518*a30f8f8cSSatish Balay } 519*a30f8f8cSSatish Balay 520*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 521*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 522*a30f8f8cSSatish Balay { int data; 523*a30f8f8cSSatish Balay ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 524*a30f8f8cSSatish Balay if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 525*a30f8f8cSSatish Balay } 526*a30f8f8cSSatish Balay #else 527*a30f8f8cSSatish Balay if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap"); 528*a30f8f8cSSatish Balay #endif 529*a30f8f8cSSatish Balay #endif 530*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 531*a30f8f8cSSatish Balay ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 532*a30f8f8cSSatish Balay col = (col - 1)/bs; 533*a30f8f8cSSatish Balay #else 534*a30f8f8cSSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 535*a30f8f8cSSatish Balay #endif 536*a30f8f8cSSatish Balay if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 537*a30f8f8cSSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 538*a30f8f8cSSatish Balay col = in[j]; 539*a30f8f8cSSatish Balay } 540*a30f8f8cSSatish Balay } 541*a30f8f8cSSatish Balay else col = in[j]; 542*a30f8f8cSSatish Balay ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 543*a30f8f8cSSatish Balay } 544*a30f8f8cSSatish Balay } 545*a30f8f8cSSatish Balay } else { 546*a30f8f8cSSatish Balay if (!baij->donotstash) { 547*a30f8f8cSSatish Balay if (roworiented) { 548*a30f8f8cSSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 549*a30f8f8cSSatish Balay } else { 550*a30f8f8cSSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 551*a30f8f8cSSatish Balay } 552*a30f8f8cSSatish Balay } 553*a30f8f8cSSatish Balay } 554*a30f8f8cSSatish Balay } 555*a30f8f8cSSatish Balay PetscFunctionReturn(0); 556*a30f8f8cSSatish Balay } 557*a30f8f8cSSatish Balay 558*a30f8f8cSSatish Balay #define HASH_KEY 0.6180339887 559*a30f8f8cSSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp))) 560*a30f8f8cSSatish Balay /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 561*a30f8f8cSSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */ 562*a30f8f8cSSatish Balay #undef __FUNC__ 563*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar" 564*a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 565*a30f8f8cSSatish Balay { 566*a30f8f8cSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 567*a30f8f8cSSatish Balay int ierr,i,j,row,col; 568*a30f8f8cSSatish Balay int roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ; 569*a30f8f8cSSatish Balay int rend_orig=baij->rend_bs,Nbs=baij->Nbs; 570*a30f8f8cSSatish Balay int h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx; 571*a30f8f8cSSatish Balay PetscReal tmp; 572*a30f8f8cSSatish Balay MatScalar **HD = baij->hd,value; 573*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 574*a30f8f8cSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 575*a30f8f8cSSatish Balay #endif 576*a30f8f8cSSatish Balay 577*a30f8f8cSSatish Balay PetscFunctionBegin; 578*a30f8f8cSSatish Balay 579*a30f8f8cSSatish Balay for (i=0; i<m; i++) { 580*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 581*a30f8f8cSSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 582*a30f8f8cSSatish Balay if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 583*a30f8f8cSSatish Balay #endif 584*a30f8f8cSSatish Balay row = im[i]; 585*a30f8f8cSSatish Balay if (row >= rstart_orig && row < rend_orig) { 586*a30f8f8cSSatish Balay for (j=0; j<n; j++) { 587*a30f8f8cSSatish Balay col = in[j]; 588*a30f8f8cSSatish Balay if (roworiented) value = v[i*n+j]; else value = v[i+j*m]; 589*a30f8f8cSSatish Balay /* Look up into the Hash Table */ 590*a30f8f8cSSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 591*a30f8f8cSSatish Balay h1 = HASH(size,key,tmp); 592*a30f8f8cSSatish Balay 593*a30f8f8cSSatish Balay 594*a30f8f8cSSatish Balay idx = h1; 595*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 596*a30f8f8cSSatish Balay insert_ct++; 597*a30f8f8cSSatish Balay total_ct++; 598*a30f8f8cSSatish Balay if (HT[idx] != key) { 599*a30f8f8cSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 600*a30f8f8cSSatish Balay if (idx == size) { 601*a30f8f8cSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 602*a30f8f8cSSatish Balay if (idx == h1) { 603*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 604*a30f8f8cSSatish Balay } 605*a30f8f8cSSatish Balay } 606*a30f8f8cSSatish Balay } 607*a30f8f8cSSatish Balay #else 608*a30f8f8cSSatish Balay if (HT[idx] != key) { 609*a30f8f8cSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 610*a30f8f8cSSatish Balay if (idx == size) { 611*a30f8f8cSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 612*a30f8f8cSSatish Balay if (idx == h1) { 613*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 614*a30f8f8cSSatish Balay } 615*a30f8f8cSSatish Balay } 616*a30f8f8cSSatish Balay } 617*a30f8f8cSSatish Balay #endif 618*a30f8f8cSSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 619*a30f8f8cSSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 620*a30f8f8cSSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 621*a30f8f8cSSatish Balay } 622*a30f8f8cSSatish Balay } else { 623*a30f8f8cSSatish Balay if (!baij->donotstash) { 624*a30f8f8cSSatish Balay if (roworiented) { 625*a30f8f8cSSatish Balay ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr); 626*a30f8f8cSSatish Balay } else { 627*a30f8f8cSSatish Balay ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr); 628*a30f8f8cSSatish Balay } 629*a30f8f8cSSatish Balay } 630*a30f8f8cSSatish Balay } 631*a30f8f8cSSatish Balay } 632*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 633*a30f8f8cSSatish Balay baij->ht_total_ct = total_ct; 634*a30f8f8cSSatish Balay baij->ht_insert_ct = insert_ct; 635*a30f8f8cSSatish Balay #endif 636*a30f8f8cSSatish Balay PetscFunctionReturn(0); 637*a30f8f8cSSatish Balay } 638*a30f8f8cSSatish Balay 639*a30f8f8cSSatish Balay #undef __FUNC__ 640*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar" 641*a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv) 642*a30f8f8cSSatish Balay { 643*a30f8f8cSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 644*a30f8f8cSSatish Balay int ierr,i,j,ii,jj,row,col; 645*a30f8f8cSSatish Balay int roworiented = baij->roworiented,rstart=baij->rstart ; 646*a30f8f8cSSatish Balay int rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2; 647*a30f8f8cSSatish Balay int h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 648*a30f8f8cSSatish Balay PetscReal tmp; 649*a30f8f8cSSatish Balay MatScalar **HD = baij->hd,*baij_a; 650*a30f8f8cSSatish Balay MatScalar *v_t,*value; 651*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 652*a30f8f8cSSatish Balay int total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 653*a30f8f8cSSatish Balay #endif 654*a30f8f8cSSatish Balay 655*a30f8f8cSSatish Balay PetscFunctionBegin; 656*a30f8f8cSSatish Balay 657*a30f8f8cSSatish Balay if (roworiented) { 658*a30f8f8cSSatish Balay stepval = (n-1)*bs; 659*a30f8f8cSSatish Balay } else { 660*a30f8f8cSSatish Balay stepval = (m-1)*bs; 661*a30f8f8cSSatish Balay } 662*a30f8f8cSSatish Balay for (i=0; i<m; i++) { 663*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 664*a30f8f8cSSatish Balay if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 665*a30f8f8cSSatish Balay if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 666*a30f8f8cSSatish Balay #endif 667*a30f8f8cSSatish Balay row = im[i]; 668*a30f8f8cSSatish Balay v_t = v + i*bs2; 669*a30f8f8cSSatish Balay if (row >= rstart && row < rend) { 670*a30f8f8cSSatish Balay for (j=0; j<n; j++) { 671*a30f8f8cSSatish Balay col = in[j]; 672*a30f8f8cSSatish Balay 673*a30f8f8cSSatish Balay /* Look up into the Hash Table */ 674*a30f8f8cSSatish Balay key = row*Nbs+col+1; 675*a30f8f8cSSatish Balay h1 = HASH(size,key,tmp); 676*a30f8f8cSSatish Balay 677*a30f8f8cSSatish Balay idx = h1; 678*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 679*a30f8f8cSSatish Balay total_ct++; 680*a30f8f8cSSatish Balay insert_ct++; 681*a30f8f8cSSatish Balay if (HT[idx] != key) { 682*a30f8f8cSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++); 683*a30f8f8cSSatish Balay if (idx == size) { 684*a30f8f8cSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++); 685*a30f8f8cSSatish Balay if (idx == h1) { 686*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 687*a30f8f8cSSatish Balay } 688*a30f8f8cSSatish Balay } 689*a30f8f8cSSatish Balay } 690*a30f8f8cSSatish Balay #else 691*a30f8f8cSSatish Balay if (HT[idx] != key) { 692*a30f8f8cSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++); 693*a30f8f8cSSatish Balay if (idx == size) { 694*a30f8f8cSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++); 695*a30f8f8cSSatish Balay if (idx == h1) { 696*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table"); 697*a30f8f8cSSatish Balay } 698*a30f8f8cSSatish Balay } 699*a30f8f8cSSatish Balay } 700*a30f8f8cSSatish Balay #endif 701*a30f8f8cSSatish Balay baij_a = HD[idx]; 702*a30f8f8cSSatish Balay if (roworiented) { 703*a30f8f8cSSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 704*a30f8f8cSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 705*a30f8f8cSSatish Balay value = v_t; 706*a30f8f8cSSatish Balay v_t += bs; 707*a30f8f8cSSatish Balay if (addv == ADD_VALUES) { 708*a30f8f8cSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 709*a30f8f8cSSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 710*a30f8f8cSSatish Balay baij_a[jj] += *value++; 711*a30f8f8cSSatish Balay } 712*a30f8f8cSSatish Balay } 713*a30f8f8cSSatish Balay } else { 714*a30f8f8cSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 715*a30f8f8cSSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 716*a30f8f8cSSatish Balay baij_a[jj] = *value++; 717*a30f8f8cSSatish Balay } 718*a30f8f8cSSatish Balay } 719*a30f8f8cSSatish Balay } 720*a30f8f8cSSatish Balay } else { 721*a30f8f8cSSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 722*a30f8f8cSSatish Balay if (addv == ADD_VALUES) { 723*a30f8f8cSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 724*a30f8f8cSSatish Balay for (jj=0; jj<bs; jj++) { 725*a30f8f8cSSatish Balay baij_a[jj] += *value++; 726*a30f8f8cSSatish Balay } 727*a30f8f8cSSatish Balay } 728*a30f8f8cSSatish Balay } else { 729*a30f8f8cSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 730*a30f8f8cSSatish Balay for (jj=0; jj<bs; jj++) { 731*a30f8f8cSSatish Balay baij_a[jj] = *value++; 732*a30f8f8cSSatish Balay } 733*a30f8f8cSSatish Balay } 734*a30f8f8cSSatish Balay } 735*a30f8f8cSSatish Balay } 736*a30f8f8cSSatish Balay } 737*a30f8f8cSSatish Balay } else { 738*a30f8f8cSSatish Balay if (!baij->donotstash) { 739*a30f8f8cSSatish Balay if (roworiented) { 740*a30f8f8cSSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 741*a30f8f8cSSatish Balay } else { 742*a30f8f8cSSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 743*a30f8f8cSSatish Balay } 744*a30f8f8cSSatish Balay } 745*a30f8f8cSSatish Balay } 746*a30f8f8cSSatish Balay } 747*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 748*a30f8f8cSSatish Balay baij->ht_total_ct = total_ct; 749*a30f8f8cSSatish Balay baij->ht_insert_ct = insert_ct; 750*a30f8f8cSSatish Balay #endif 751*a30f8f8cSSatish Balay PetscFunctionReturn(0); 752*a30f8f8cSSatish Balay } 753*a30f8f8cSSatish Balay 754*a30f8f8cSSatish Balay #undef __FUNC__ 755*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ" 756*a30f8f8cSSatish Balay int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v) 757*a30f8f8cSSatish Balay { 758*a30f8f8cSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 759*a30f8f8cSSatish Balay int bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs; 760*a30f8f8cSSatish Balay int bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data; 761*a30f8f8cSSatish Balay 762*a30f8f8cSSatish Balay PetscFunctionBegin; 763*a30f8f8cSSatish Balay for (i=0; i<m; i++) { 764*a30f8f8cSSatish Balay if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row"); 765*a30f8f8cSSatish Balay if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large"); 766*a30f8f8cSSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 767*a30f8f8cSSatish Balay row = idxm[i] - bsrstart; 768*a30f8f8cSSatish Balay for (j=0; j<n; j++) { 769*a30f8f8cSSatish Balay if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column"); 770*a30f8f8cSSatish Balay if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large"); 771*a30f8f8cSSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend){ 772*a30f8f8cSSatish Balay col = idxn[j] - bscstart; 773*a30f8f8cSSatish Balay ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774*a30f8f8cSSatish Balay } else { 775*a30f8f8cSSatish Balay if (!baij->colmap) { 776*a30f8f8cSSatish Balay ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr); 777*a30f8f8cSSatish Balay } 778*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 779*a30f8f8cSSatish Balay ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 780*a30f8f8cSSatish Balay data --; 781*a30f8f8cSSatish Balay #else 782*a30f8f8cSSatish Balay data = baij->colmap[idxn[j]/bs]-1; 783*a30f8f8cSSatish Balay #endif 784*a30f8f8cSSatish Balay if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 785*a30f8f8cSSatish Balay else { 786*a30f8f8cSSatish Balay col = data + idxn[j]%bs; 787*a30f8f8cSSatish Balay ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 788*a30f8f8cSSatish Balay } 789*a30f8f8cSSatish Balay } 790*a30f8f8cSSatish Balay } 791*a30f8f8cSSatish Balay } else { 792*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported"); 793*a30f8f8cSSatish Balay } 794*a30f8f8cSSatish Balay } 795*a30f8f8cSSatish Balay PetscFunctionReturn(0); 796*a30f8f8cSSatish Balay } 797*a30f8f8cSSatish Balay 798*a30f8f8cSSatish Balay #undef __FUNC__ 799*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ" 800*a30f8f8cSSatish Balay int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm) 801*a30f8f8cSSatish Balay { 802*a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 803*a30f8f8cSSatish Balay Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; 804*a30f8f8cSSatish Balay Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ*)baij->B->data; 805*a30f8f8cSSatish Balay int ierr; 806*a30f8f8cSSatish Balay PetscReal sum[2],*lnorm2; 807*a30f8f8cSSatish Balay 808*a30f8f8cSSatish Balay PetscFunctionBegin; 809*a30f8f8cSSatish Balay if (baij->size == 1) { 810*a30f8f8cSSatish Balay ierr = MatNorm(baij->A,type,norm);CHKERRQ(ierr); 811*a30f8f8cSSatish Balay } else { 812*a30f8f8cSSatish Balay if (type == NORM_FROBENIUS) { 813*a30f8f8cSSatish Balay lnorm2 = (Scalar*)PetscMalloc(2*sizeof(Scalar));CHKPTRQ(lnorm2); 814*a30f8f8cSSatish Balay ierr = MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr); 815*a30f8f8cSSatish Balay *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++; /* squar power of norm(A) */ 816*a30f8f8cSSatish Balay ierr = MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr); 817*a30f8f8cSSatish Balay *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--; /* squar power of norm(B) */ 818*a30f8f8cSSatish Balay /* 819*a30f8f8cSSatish Balay ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr); 820*a30f8f8cSSatish Balay PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]); 821*a30f8f8cSSatish Balay */ 822*a30f8f8cSSatish Balay ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr); 823*a30f8f8cSSatish Balay /* 824*a30f8f8cSSatish Balay PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]); 825*a30f8f8cSSatish Balay PetscSynchronizedFlush(PETSC_COMM_WORLD); */ 826*a30f8f8cSSatish Balay 827*a30f8f8cSSatish Balay *norm = sqrt(sum[0] + 2*sum[1]); 828*a30f8f8cSSatish Balay ierr = PetscFree(lnorm2);CHKERRQ(ierr); 829*a30f8f8cSSatish Balay } else { 830*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet"); 831*a30f8f8cSSatish Balay } 832*a30f8f8cSSatish Balay } 833*a30f8f8cSSatish Balay PetscFunctionReturn(0); 834*a30f8f8cSSatish Balay } 835*a30f8f8cSSatish Balay 836*a30f8f8cSSatish Balay /* 837*a30f8f8cSSatish Balay Creates the hash table, and sets the table 838*a30f8f8cSSatish Balay This table is created only once. 839*a30f8f8cSSatish Balay If new entried need to be added to the matrix 840*a30f8f8cSSatish Balay then the hash table has to be destroyed and 841*a30f8f8cSSatish Balay recreated. 842*a30f8f8cSSatish Balay */ 843*a30f8f8cSSatish Balay #undef __FUNC__ 844*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private" 845*a30f8f8cSSatish Balay int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor) 846*a30f8f8cSSatish Balay { 847*a30f8f8cSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 848*a30f8f8cSSatish Balay Mat A = baij->A,B=baij->B; 849*a30f8f8cSSatish Balay Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data; 850*a30f8f8cSSatish Balay int i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 851*a30f8f8cSSatish Balay int size,bs2=baij->bs2,rstart=baij->rstart,ierr; 852*a30f8f8cSSatish Balay int cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs; 853*a30f8f8cSSatish Balay int *HT,key; 854*a30f8f8cSSatish Balay MatScalar **HD; 855*a30f8f8cSSatish Balay PetscReal tmp; 856*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 857*a30f8f8cSSatish Balay int ct=0,max=0; 858*a30f8f8cSSatish Balay #endif 859*a30f8f8cSSatish Balay 860*a30f8f8cSSatish Balay PetscFunctionBegin; 861*a30f8f8cSSatish Balay baij->ht_size=(int)(factor*nz); 862*a30f8f8cSSatish Balay size = baij->ht_size; 863*a30f8f8cSSatish Balay 864*a30f8f8cSSatish Balay if (baij->ht) { 865*a30f8f8cSSatish Balay PetscFunctionReturn(0); 866*a30f8f8cSSatish Balay } 867*a30f8f8cSSatish Balay 868*a30f8f8cSSatish Balay /* Allocate Memory for Hash Table */ 869*a30f8f8cSSatish Balay baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd); 870*a30f8f8cSSatish Balay baij->ht = (int*)(baij->hd + size); 871*a30f8f8cSSatish Balay HD = baij->hd; 872*a30f8f8cSSatish Balay HT = baij->ht; 873*a30f8f8cSSatish Balay 874*a30f8f8cSSatish Balay 875*a30f8f8cSSatish Balay ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr); 876*a30f8f8cSSatish Balay 877*a30f8f8cSSatish Balay 878*a30f8f8cSSatish Balay /* Loop Over A */ 879*a30f8f8cSSatish Balay for (i=0; i<a->mbs; i++) { 880*a30f8f8cSSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 881*a30f8f8cSSatish Balay row = i+rstart; 882*a30f8f8cSSatish Balay col = aj[j]+cstart; 883*a30f8f8cSSatish Balay 884*a30f8f8cSSatish Balay key = row*Nbs + col + 1; 885*a30f8f8cSSatish Balay h1 = HASH(size,key,tmp); 886*a30f8f8cSSatish Balay for (k=0; k<size; k++){ 887*a30f8f8cSSatish Balay if (HT[(h1+k)%size] == 0.0) { 888*a30f8f8cSSatish Balay HT[(h1+k)%size] = key; 889*a30f8f8cSSatish Balay HD[(h1+k)%size] = a->a + j*bs2; 890*a30f8f8cSSatish Balay break; 891*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 892*a30f8f8cSSatish Balay } else { 893*a30f8f8cSSatish Balay ct++; 894*a30f8f8cSSatish Balay #endif 895*a30f8f8cSSatish Balay } 896*a30f8f8cSSatish Balay } 897*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 898*a30f8f8cSSatish Balay if (k> max) max = k; 899*a30f8f8cSSatish Balay #endif 900*a30f8f8cSSatish Balay } 901*a30f8f8cSSatish Balay } 902*a30f8f8cSSatish Balay /* Loop Over B */ 903*a30f8f8cSSatish Balay for (i=0; i<b->mbs; i++) { 904*a30f8f8cSSatish Balay for (j=bi[i]; j<bi[i+1]; j++) { 905*a30f8f8cSSatish Balay row = i+rstart; 906*a30f8f8cSSatish Balay col = garray[bj[j]]; 907*a30f8f8cSSatish Balay key = row*Nbs + col + 1; 908*a30f8f8cSSatish Balay h1 = HASH(size,key,tmp); 909*a30f8f8cSSatish Balay for (k=0; k<size; k++){ 910*a30f8f8cSSatish Balay if (HT[(h1+k)%size] == 0.0) { 911*a30f8f8cSSatish Balay HT[(h1+k)%size] = key; 912*a30f8f8cSSatish Balay HD[(h1+k)%size] = b->a + j*bs2; 913*a30f8f8cSSatish Balay break; 914*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 915*a30f8f8cSSatish Balay } else { 916*a30f8f8cSSatish Balay ct++; 917*a30f8f8cSSatish Balay #endif 918*a30f8f8cSSatish Balay } 919*a30f8f8cSSatish Balay } 920*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 921*a30f8f8cSSatish Balay if (k> max) max = k; 922*a30f8f8cSSatish Balay #endif 923*a30f8f8cSSatish Balay } 924*a30f8f8cSSatish Balay } 925*a30f8f8cSSatish Balay 926*a30f8f8cSSatish Balay /* Print Summary */ 927*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 928*a30f8f8cSSatish Balay for (i=0,j=0; i<size; i++) { 929*a30f8f8cSSatish Balay if (HT[i]) {j++;} 930*a30f8f8cSSatish Balay } 931*a30f8f8cSSatish Balay PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max); 932*a30f8f8cSSatish Balay #endif 933*a30f8f8cSSatish Balay PetscFunctionReturn(0); 934*a30f8f8cSSatish Balay } 935*a30f8f8cSSatish Balay 936*a30f8f8cSSatish Balay #undef __FUNC__ 937*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ" 938*a30f8f8cSSatish Balay int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode) 939*a30f8f8cSSatish Balay { 940*a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 941*a30f8f8cSSatish Balay int ierr,nstash,reallocs; 942*a30f8f8cSSatish Balay InsertMode addv; 943*a30f8f8cSSatish Balay 944*a30f8f8cSSatish Balay PetscFunctionBegin; 945*a30f8f8cSSatish Balay if (baij->donotstash) { 946*a30f8f8cSSatish Balay PetscFunctionReturn(0); 947*a30f8f8cSSatish Balay } 948*a30f8f8cSSatish Balay 949*a30f8f8cSSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 950*a30f8f8cSSatish Balay ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr); 951*a30f8f8cSSatish Balay if (addv == (ADD_VALUES|INSERT_VALUES)) { 952*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added"); 953*a30f8f8cSSatish Balay } 954*a30f8f8cSSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 955*a30f8f8cSSatish Balay 956*a30f8f8cSSatish Balay ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr); 957*a30f8f8cSSatish Balay ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr); 958*a30f8f8cSSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 959*a30f8f8cSSatish Balay PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs); 960*a30f8f8cSSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 961*a30f8f8cSSatish Balay PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs); 962*a30f8f8cSSatish Balay PetscFunctionReturn(0); 963*a30f8f8cSSatish Balay } 964*a30f8f8cSSatish Balay 965*a30f8f8cSSatish Balay #undef __FUNC__ 966*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ" 967*a30f8f8cSSatish Balay int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode) 968*a30f8f8cSSatish Balay { 969*a30f8f8cSSatish Balay Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data; 970*a30f8f8cSSatish Balay Mat_SeqSBAIJ *a=(Mat_SeqSBAIJ*)baij->A->data; 971*a30f8f8cSSatish Balay Mat_SeqBAIJ *b=(Mat_SeqBAIJ*)baij->B->data; 972*a30f8f8cSSatish Balay int i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2; 973*a30f8f8cSSatish Balay int *row,*col,other_disassembled; 974*a30f8f8cSSatish Balay PetscTruth r1,r2,r3; 975*a30f8f8cSSatish Balay MatScalar *val; 976*a30f8f8cSSatish Balay InsertMode addv = mat->insertmode; 977*a30f8f8cSSatish Balay int rank; 978*a30f8f8cSSatish Balay 979*a30f8f8cSSatish Balay PetscFunctionBegin; 980*a30f8f8cSSatish Balay /* remove 2 line below later */ 981*a30f8f8cSSatish Balay ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 982*a30f8f8cSSatish Balay 983*a30f8f8cSSatish Balay if (!baij->donotstash) { 984*a30f8f8cSSatish Balay while (1) { 985*a30f8f8cSSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 986*a30f8f8cSSatish Balay /* 987*a30f8f8cSSatish Balay PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg); 988*a30f8f8cSSatish Balay PetscSynchronizedFlush(PETSC_COMM_WORLD); 989*a30f8f8cSSatish Balay */ 990*a30f8f8cSSatish Balay if (!flg) break; 991*a30f8f8cSSatish Balay 992*a30f8f8cSSatish Balay for (i=0; i<n;) { 993*a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 994*a30f8f8cSSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 995*a30f8f8cSSatish Balay if (j < n) ncols = j-i; 996*a30f8f8cSSatish Balay else ncols = n-i; 997*a30f8f8cSSatish Balay /* Now assemble all these values with a single function call */ 998*a30f8f8cSSatish Balay ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 999*a30f8f8cSSatish Balay i = j; 1000*a30f8f8cSSatish Balay } 1001*a30f8f8cSSatish Balay } 1002*a30f8f8cSSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 1003*a30f8f8cSSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 1004*a30f8f8cSSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 1005*a30f8f8cSSatish Balay restore the original flags */ 1006*a30f8f8cSSatish Balay r1 = baij->roworiented; 1007*a30f8f8cSSatish Balay r2 = a->roworiented; 1008*a30f8f8cSSatish Balay r3 = b->roworiented; 1009*a30f8f8cSSatish Balay baij->roworiented = PETSC_FALSE; 1010*a30f8f8cSSatish Balay a->roworiented = PETSC_FALSE; 1011*a30f8f8cSSatish Balay b->roworiented = PETSC_FALSE; 1012*a30f8f8cSSatish Balay while (1) { 1013*a30f8f8cSSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 1014*a30f8f8cSSatish Balay if (!flg) break; 1015*a30f8f8cSSatish Balay 1016*a30f8f8cSSatish Balay for (i=0; i<n;) { 1017*a30f8f8cSSatish Balay /* Now identify the consecutive vals belonging to the same row */ 1018*a30f8f8cSSatish Balay for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; } 1019*a30f8f8cSSatish Balay if (j < n) ncols = j-i; 1020*a30f8f8cSSatish Balay else ncols = n-i; 1021*a30f8f8cSSatish Balay ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 1022*a30f8f8cSSatish Balay i = j; 1023*a30f8f8cSSatish Balay } 1024*a30f8f8cSSatish Balay } 1025*a30f8f8cSSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 1026*a30f8f8cSSatish Balay baij->roworiented = r1; 1027*a30f8f8cSSatish Balay a->roworiented = r2; 1028*a30f8f8cSSatish Balay b->roworiented = r3; 1029*a30f8f8cSSatish Balay } 1030*a30f8f8cSSatish Balay 1031*a30f8f8cSSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 1032*a30f8f8cSSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 1033*a30f8f8cSSatish Balay 1034*a30f8f8cSSatish Balay /* determine if any processor has disassembled, if so we must 1035*a30f8f8cSSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 1036*a30f8f8cSSatish Balay /* 1037*a30f8f8cSSatish Balay if nonzero structure of submatrix B cannot change then we know that 1038*a30f8f8cSSatish Balay no processor disassembled thus we can skip this stuff 1039*a30f8f8cSSatish Balay */ 1040*a30f8f8cSSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 1041*a30f8f8cSSatish Balay ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr); 1042*a30f8f8cSSatish Balay if (mat->was_assembled && !other_disassembled) { 1043*a30f8f8cSSatish Balay ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 1044*a30f8f8cSSatish Balay } 1045*a30f8f8cSSatish Balay } 1046*a30f8f8cSSatish Balay 1047*a30f8f8cSSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 1048*a30f8f8cSSatish Balay ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); 1049*a30f8f8cSSatish Balay } 1050*a30f8f8cSSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 1051*a30f8f8cSSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 1052*a30f8f8cSSatish Balay 1053*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g) 1054*a30f8f8cSSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 1055*a30f8f8cSSatish Balay PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct); 1056*a30f8f8cSSatish Balay baij->ht_total_ct = 0; 1057*a30f8f8cSSatish Balay baij->ht_insert_ct = 0; 1058*a30f8f8cSSatish Balay } 1059*a30f8f8cSSatish Balay #endif 1060*a30f8f8cSSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 1061*a30f8f8cSSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 1062*a30f8f8cSSatish Balay mat->ops->setvalues = MatSetValues_MPISBAIJ_HT; 1063*a30f8f8cSSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT; 1064*a30f8f8cSSatish Balay } 1065*a30f8f8cSSatish Balay 1066*a30f8f8cSSatish Balay if (baij->rowvalues) { 1067*a30f8f8cSSatish Balay ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr); 1068*a30f8f8cSSatish Balay baij->rowvalues = 0; 1069*a30f8f8cSSatish Balay } 1070*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1071*a30f8f8cSSatish Balay } 1072*a30f8f8cSSatish Balay 1073*a30f8f8cSSatish Balay #undef __FUNC__ 1074*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket" 1075*a30f8f8cSSatish Balay static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer) 1076*a30f8f8cSSatish Balay { 1077*a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1078*a30f8f8cSSatish Balay int ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank; 1079*a30f8f8cSSatish Balay PetscTruth isascii,isdraw; 1080*a30f8f8cSSatish Balay 1081*a30f8f8cSSatish Balay PetscFunctionBegin; 1082*a30f8f8cSSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1083*a30f8f8cSSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1084*a30f8f8cSSatish Balay if (isascii) { 1085*a30f8f8cSSatish Balay ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1086*a30f8f8cSSatish Balay if (format == VIEWER_FORMAT_ASCII_INFO_LONG) { 1087*a30f8f8cSSatish Balay MatInfo info; 1088*a30f8f8cSSatish Balay ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr); 1089*a30f8f8cSSatish Balay ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1090*a30f8f8cSSatish Balay ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n", 1091*a30f8f8cSSatish Balay rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs, 1092*a30f8f8cSSatish Balay baij->bs,(int)info.memory);CHKERRQ(ierr); 1093*a30f8f8cSSatish Balay ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1094*a30f8f8cSSatish Balay ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1095*a30f8f8cSSatish Balay ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1096*a30f8f8cSSatish Balay ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr); 1097*a30f8f8cSSatish Balay ierr = ViewerFlush(viewer);CHKERRQ(ierr); 1098*a30f8f8cSSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 1099*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1100*a30f8f8cSSatish Balay } else if (format == VIEWER_FORMAT_ASCII_INFO) { 1101*a30f8f8cSSatish Balay ierr = ViewerASCIIPrintf(viewer," block size is %d\n",bs);CHKERRQ(ierr); 1102*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1103*a30f8f8cSSatish Balay } 1104*a30f8f8cSSatish Balay } 1105*a30f8f8cSSatish Balay 1106*a30f8f8cSSatish Balay if (isdraw) { 1107*a30f8f8cSSatish Balay Draw draw; 1108*a30f8f8cSSatish Balay PetscTruth isnull; 1109*a30f8f8cSSatish Balay ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1110*a30f8f8cSSatish Balay ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1111*a30f8f8cSSatish Balay } 1112*a30f8f8cSSatish Balay 1113*a30f8f8cSSatish Balay if (size == 1) { 1114*a30f8f8cSSatish Balay ierr = MatView(baij->A,viewer);CHKERRQ(ierr); 1115*a30f8f8cSSatish Balay } else { 1116*a30f8f8cSSatish Balay /* assemble the entire matrix onto first processor. */ 1117*a30f8f8cSSatish Balay Mat A; 1118*a30f8f8cSSatish Balay Mat_SeqBAIJ *Aloc; 1119*a30f8f8cSSatish Balay int M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 1120*a30f8f8cSSatish Balay MatScalar *a; 1121*a30f8f8cSSatish Balay 1122*a30f8f8cSSatish Balay if (!rank) { 1123*a30f8f8cSSatish Balay ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,M,M,M,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1124*a30f8f8cSSatish Balay } else { 1125*a30f8f8cSSatish Balay ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,M,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr); 1126*a30f8f8cSSatish Balay } 1127*a30f8f8cSSatish Balay PLogObjectParent(mat,A); 1128*a30f8f8cSSatish Balay 1129*a30f8f8cSSatish Balay /* copy over the A part */ 1130*a30f8f8cSSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 1131*a30f8f8cSSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1132*a30f8f8cSSatish Balay rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1133*a30f8f8cSSatish Balay 1134*a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 1135*a30f8f8cSSatish Balay rvals[0] = bs*(baij->rstart + i); 1136*a30f8f8cSSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1137*a30f8f8cSSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1138*a30f8f8cSSatish Balay col = (baij->cstart+aj[j])*bs; 1139*a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 1140*a30f8f8cSSatish Balay ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1141*a30f8f8cSSatish Balay col++; a += bs; 1142*a30f8f8cSSatish Balay } 1143*a30f8f8cSSatish Balay } 1144*a30f8f8cSSatish Balay } 1145*a30f8f8cSSatish Balay /* copy over the B part */ 1146*a30f8f8cSSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 1147*a30f8f8cSSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1148*a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 1149*a30f8f8cSSatish Balay rvals[0] = bs*(baij->rstart + i); 1150*a30f8f8cSSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1151*a30f8f8cSSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1152*a30f8f8cSSatish Balay col = baij->garray[aj[j]]*bs; 1153*a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 1154*a30f8f8cSSatish Balay ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1155*a30f8f8cSSatish Balay col++; a += bs; 1156*a30f8f8cSSatish Balay } 1157*a30f8f8cSSatish Balay } 1158*a30f8f8cSSatish Balay } 1159*a30f8f8cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 1160*a30f8f8cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1161*a30f8f8cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1162*a30f8f8cSSatish Balay /* 1163*a30f8f8cSSatish Balay Everyone has to call to draw the matrix since the graphics waits are 1164*a30f8f8cSSatish Balay synchronized across all processors that share the Draw object 1165*a30f8f8cSSatish Balay */ 1166*a30f8f8cSSatish Balay if (!rank) { 1167*a30f8f8cSSatish Balay Viewer sviewer; 1168*a30f8f8cSSatish Balay ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1169*a30f8f8cSSatish Balay ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1170*a30f8f8cSSatish Balay ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 1171*a30f8f8cSSatish Balay } 1172*a30f8f8cSSatish Balay ierr = MatDestroy(A);CHKERRQ(ierr); 1173*a30f8f8cSSatish Balay } 1174*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1175*a30f8f8cSSatish Balay } 1176*a30f8f8cSSatish Balay 1177*a30f8f8cSSatish Balay #undef __FUNC__ 1178*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ" 1179*a30f8f8cSSatish Balay int MatView_MPISBAIJ(Mat mat,Viewer viewer) 1180*a30f8f8cSSatish Balay { 1181*a30f8f8cSSatish Balay int ierr; 1182*a30f8f8cSSatish Balay PetscTruth isascii,isdraw,issocket,isbinary; 1183*a30f8f8cSSatish Balay 1184*a30f8f8cSSatish Balay PetscFunctionBegin; 1185*a30f8f8cSSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr); 1186*a30f8f8cSSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr); 1187*a30f8f8cSSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr); 1188*a30f8f8cSSatish Balay ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr); 1189*a30f8f8cSSatish Balay if (isascii || isdraw || issocket || isbinary) { 1190*a30f8f8cSSatish Balay ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1191*a30f8f8cSSatish Balay } else { 1192*a30f8f8cSSatish Balay SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name); 1193*a30f8f8cSSatish Balay } 1194*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1195*a30f8f8cSSatish Balay } 1196*a30f8f8cSSatish Balay 1197*a30f8f8cSSatish Balay #undef __FUNC__ 1198*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ" 1199*a30f8f8cSSatish Balay int MatDestroy_MPISBAIJ(Mat mat) 1200*a30f8f8cSSatish Balay { 1201*a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1202*a30f8f8cSSatish Balay int ierr; 1203*a30f8f8cSSatish Balay 1204*a30f8f8cSSatish Balay PetscFunctionBegin; 1205*a30f8f8cSSatish Balay 1206*a30f8f8cSSatish Balay if (mat->mapping) { 1207*a30f8f8cSSatish Balay ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr); 1208*a30f8f8cSSatish Balay } 1209*a30f8f8cSSatish Balay if (mat->bmapping) { 1210*a30f8f8cSSatish Balay ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr); 1211*a30f8f8cSSatish Balay } 1212*a30f8f8cSSatish Balay if (mat->rmap) { 1213*a30f8f8cSSatish Balay ierr = MapDestroy(mat->rmap);CHKERRQ(ierr); 1214*a30f8f8cSSatish Balay } 1215*a30f8f8cSSatish Balay if (mat->cmap) { 1216*a30f8f8cSSatish Balay ierr = MapDestroy(mat->cmap);CHKERRQ(ierr); 1217*a30f8f8cSSatish Balay } 1218*a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG) 1219*a30f8f8cSSatish Balay PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N); 1220*a30f8f8cSSatish Balay #endif 1221*a30f8f8cSSatish Balay 1222*a30f8f8cSSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1223*a30f8f8cSSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 1224*a30f8f8cSSatish Balay 1225*a30f8f8cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1226*a30f8f8cSSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1227*a30f8f8cSSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1228*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 1229*a30f8f8cSSatish Balay if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1230*a30f8f8cSSatish Balay #else 1231*a30f8f8cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1232*a30f8f8cSSatish Balay #endif 1233*a30f8f8cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1234*a30f8f8cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1235*a30f8f8cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1236*a30f8f8cSSatish Balay if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);} 1237*a30f8f8cSSatish Balay if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);} 1238*a30f8f8cSSatish Balay if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);} 1239*a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE) 1240*a30f8f8cSSatish Balay if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);} 1241*a30f8f8cSSatish Balay #endif 1242*a30f8f8cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 1243*a30f8f8cSSatish Balay PLogObjectDestroy(mat); 1244*a30f8f8cSSatish Balay PetscHeaderDestroy(mat); 1245*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1246*a30f8f8cSSatish Balay } 1247*a30f8f8cSSatish Balay 1248*a30f8f8cSSatish Balay #undef __FUNC__ 1249*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ" 1250*a30f8f8cSSatish Balay int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy) 1251*a30f8f8cSSatish Balay { 1252*a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1253*a30f8f8cSSatish Balay int ierr,nt; 1254*a30f8f8cSSatish Balay 1255*a30f8f8cSSatish Balay PetscFunctionBegin; 1256*a30f8f8cSSatish Balay ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1257*a30f8f8cSSatish Balay if (nt != a->n) { 1258*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx"); 1259*a30f8f8cSSatish Balay } 1260*a30f8f8cSSatish Balay ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1261*a30f8f8cSSatish Balay if (nt != a->m) { 1262*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy"); 1263*a30f8f8cSSatish Balay } 1264*a30f8f8cSSatish Balay /* do subdiagonal part */ 1265*a30f8f8cSSatish Balay ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1266*a30f8f8cSSatish Balay /* send it on its way */ 1267*a30f8f8cSSatish Balay ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1268*a30f8f8cSSatish Balay /* do local part */ 1269*a30f8f8cSSatish Balay ierr = (*a->B->ops->mult)(a->B,xx,yy);CHKERRQ(ierr); 1270*a30f8f8cSSatish Balay ierr = (*a->A->ops->multadd)(a->A,xx,yy,yy);CHKERRQ(ierr); 1271*a30f8f8cSSatish Balay /* receive remote parts */ 1272*a30f8f8cSSatish Balay ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1273*a30f8f8cSSatish Balay 1274*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1275*a30f8f8cSSatish Balay } 1276*a30f8f8cSSatish Balay 1277*a30f8f8cSSatish Balay #undef __FUNC__ 1278*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ" 1279*a30f8f8cSSatish Balay int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1280*a30f8f8cSSatish Balay { 1281*a30f8f8cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1282*a30f8f8cSSatish Balay int ierr; 1283*a30f8f8cSSatish Balay 1284*a30f8f8cSSatish Balay PetscFunctionBegin; 1285*a30f8f8cSSatish Balay /* do subdiagonal part */ 1286*a30f8f8cSSatish Balay ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1287*a30f8f8cSSatish Balay /* send it on its way */ 1288*a30f8f8cSSatish Balay ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1289*a30f8f8cSSatish Balay /* do local part */ 1290*a30f8f8cSSatish Balay ierr = (*a->B->ops->multadd)(a->B,xx,yy,zz);CHKERRQ(ierr); 1291*a30f8f8cSSatish Balay ierr = (*a->A->ops->multadd)(a->A,xx,zz,zz);CHKERRQ(ierr); 1292*a30f8f8cSSatish Balay /* receive remote parts */ 1293*a30f8f8cSSatish Balay ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1294*a30f8f8cSSatish Balay 1295*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1296*a30f8f8cSSatish Balay } 1297*a30f8f8cSSatish Balay 1298*a30f8f8cSSatish Balay #undef __FUNC__ 1299*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ" 1300*a30f8f8cSSatish Balay int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy) 1301*a30f8f8cSSatish Balay { 1302*a30f8f8cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1303*a30f8f8cSSatish Balay int ierr; 1304*a30f8f8cSSatish Balay 1305*a30f8f8cSSatish Balay PetscFunctionBegin; 1306*a30f8f8cSSatish Balay /* do nondiagonal part */ 1307*a30f8f8cSSatish Balay ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1308*a30f8f8cSSatish Balay /* send it on its way */ 1309*a30f8f8cSSatish Balay ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1310*a30f8f8cSSatish Balay /* do local part */ 1311*a30f8f8cSSatish Balay ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1312*a30f8f8cSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1313*a30f8f8cSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1314*a30f8f8cSSatish Balay /* but is not perhaps always true. */ 1315*a30f8f8cSSatish Balay ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1316*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1317*a30f8f8cSSatish Balay } 1318*a30f8f8cSSatish Balay 1319*a30f8f8cSSatish Balay #undef __FUNC__ 1320*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ" 1321*a30f8f8cSSatish Balay int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1322*a30f8f8cSSatish Balay { 1323*a30f8f8cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1324*a30f8f8cSSatish Balay int ierr; 1325*a30f8f8cSSatish Balay 1326*a30f8f8cSSatish Balay PetscFunctionBegin; 1327*a30f8f8cSSatish Balay /* do nondiagonal part */ 1328*a30f8f8cSSatish Balay ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1329*a30f8f8cSSatish Balay /* send it on its way */ 1330*a30f8f8cSSatish Balay ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1331*a30f8f8cSSatish Balay /* do local part */ 1332*a30f8f8cSSatish Balay ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1333*a30f8f8cSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1334*a30f8f8cSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1335*a30f8f8cSSatish Balay /* but is not perhaps always true. */ 1336*a30f8f8cSSatish Balay ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr); 1337*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1338*a30f8f8cSSatish Balay } 1339*a30f8f8cSSatish Balay 1340*a30f8f8cSSatish Balay /* 1341*a30f8f8cSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1342*a30f8f8cSSatish Balay diagonal block 1343*a30f8f8cSSatish Balay */ 1344*a30f8f8cSSatish Balay #undef __FUNC__ 1345*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ" 1346*a30f8f8cSSatish Balay int MatGetDiagonal_MPISBAIJ(Mat A,Vec v) 1347*a30f8f8cSSatish Balay { 1348*a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1349*a30f8f8cSSatish Balay int ierr; 1350*a30f8f8cSSatish Balay 1351*a30f8f8cSSatish Balay PetscFunctionBegin; 1352*a30f8f8cSSatish Balay /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */ 1353*a30f8f8cSSatish Balay ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1354*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1355*a30f8f8cSSatish Balay } 1356*a30f8f8cSSatish Balay 1357*a30f8f8cSSatish Balay #undef __FUNC__ 1358*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ" 1359*a30f8f8cSSatish Balay int MatScale_MPISBAIJ(Scalar *aa,Mat A) 1360*a30f8f8cSSatish Balay { 1361*a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1362*a30f8f8cSSatish Balay int ierr; 1363*a30f8f8cSSatish Balay 1364*a30f8f8cSSatish Balay PetscFunctionBegin; 1365*a30f8f8cSSatish Balay ierr = MatScale(aa,a->A);CHKERRQ(ierr); 1366*a30f8f8cSSatish Balay ierr = MatScale(aa,a->B);CHKERRQ(ierr); 1367*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1368*a30f8f8cSSatish Balay } 1369*a30f8f8cSSatish Balay 1370*a30f8f8cSSatish Balay #undef __FUNC__ 1371*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ" 1372*a30f8f8cSSatish Balay int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n) 1373*a30f8f8cSSatish Balay { 1374*a30f8f8cSSatish Balay Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1375*a30f8f8cSSatish Balay 1376*a30f8f8cSSatish Balay PetscFunctionBegin; 1377*a30f8f8cSSatish Balay if (m) *m = mat->M; 1378*a30f8f8cSSatish Balay if (n) *n = mat->N; 1379*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1380*a30f8f8cSSatish Balay } 1381*a30f8f8cSSatish Balay 1382*a30f8f8cSSatish Balay #undef __FUNC__ 1383*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ" 1384*a30f8f8cSSatish Balay int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n) 1385*a30f8f8cSSatish Balay { 1386*a30f8f8cSSatish Balay Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1387*a30f8f8cSSatish Balay 1388*a30f8f8cSSatish Balay PetscFunctionBegin; 1389*a30f8f8cSSatish Balay *m = mat->m; *n = mat->n; 1390*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1391*a30f8f8cSSatish Balay } 1392*a30f8f8cSSatish Balay 1393*a30f8f8cSSatish Balay #undef __FUNC__ 1394*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ" 1395*a30f8f8cSSatish Balay int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n) 1396*a30f8f8cSSatish Balay { 1397*a30f8f8cSSatish Balay Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1398*a30f8f8cSSatish Balay 1399*a30f8f8cSSatish Balay PetscFunctionBegin; 1400*a30f8f8cSSatish Balay if (m) *m = mat->rstart*mat->bs; 1401*a30f8f8cSSatish Balay if (n) *n = mat->rend*mat->bs; 1402*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1403*a30f8f8cSSatish Balay } 1404*a30f8f8cSSatish Balay 1405*a30f8f8cSSatish Balay #undef __FUNC__ 1406*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ" 1407*a30f8f8cSSatish Balay int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v) 1408*a30f8f8cSSatish Balay { 1409*a30f8f8cSSatish Balay Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data; 1410*a30f8f8cSSatish Balay Scalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1411*a30f8f8cSSatish Balay int bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB; 1412*a30f8f8cSSatish Balay int nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs; 1413*a30f8f8cSSatish Balay int *cmap,*idx_p,cstart = mat->cstart; 1414*a30f8f8cSSatish Balay 1415*a30f8f8cSSatish Balay PetscFunctionBegin; 1416*a30f8f8cSSatish Balay if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active"); 1417*a30f8f8cSSatish Balay mat->getrowactive = PETSC_TRUE; 1418*a30f8f8cSSatish Balay 1419*a30f8f8cSSatish Balay if (!mat->rowvalues && (idx || v)) { 1420*a30f8f8cSSatish Balay /* 1421*a30f8f8cSSatish Balay allocate enough space to hold information from the longest row. 1422*a30f8f8cSSatish Balay */ 1423*a30f8f8cSSatish Balay Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data; 1424*a30f8f8cSSatish Balay Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ*)mat->B->data; 1425*a30f8f8cSSatish Balay int max = 1,mbs = mat->mbs,tmp; 1426*a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 1427*a30f8f8cSSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */ 1428*a30f8f8cSSatish Balay if (max < tmp) { max = tmp; } 1429*a30f8f8cSSatish Balay } 1430*a30f8f8cSSatish Balay mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues); 1431*a30f8f8cSSatish Balay mat->rowindices = (int*)(mat->rowvalues + max*bs2); 1432*a30f8f8cSSatish Balay } 1433*a30f8f8cSSatish Balay 1434*a30f8f8cSSatish Balay if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows") 1435*a30f8f8cSSatish Balay lrow = row - brstart; /* local row index */ 1436*a30f8f8cSSatish Balay 1437*a30f8f8cSSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1438*a30f8f8cSSatish Balay if (!v) {pvA = 0; pvB = 0;} 1439*a30f8f8cSSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1440*a30f8f8cSSatish Balay ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1441*a30f8f8cSSatish Balay ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1442*a30f8f8cSSatish Balay nztot = nzA + nzB; 1443*a30f8f8cSSatish Balay 1444*a30f8f8cSSatish Balay cmap = mat->garray; 1445*a30f8f8cSSatish Balay if (v || idx) { 1446*a30f8f8cSSatish Balay if (nztot) { 1447*a30f8f8cSSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1448*a30f8f8cSSatish Balay int imark = -1; 1449*a30f8f8cSSatish Balay if (v) { 1450*a30f8f8cSSatish Balay *v = v_p = mat->rowvalues; 1451*a30f8f8cSSatish Balay for (i=0; i<nzB; i++) { 1452*a30f8f8cSSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1453*a30f8f8cSSatish Balay else break; 1454*a30f8f8cSSatish Balay } 1455*a30f8f8cSSatish Balay imark = i; 1456*a30f8f8cSSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1457*a30f8f8cSSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1458*a30f8f8cSSatish Balay } 1459*a30f8f8cSSatish Balay if (idx) { 1460*a30f8f8cSSatish Balay *idx = idx_p = mat->rowindices; 1461*a30f8f8cSSatish Balay if (imark > -1) { 1462*a30f8f8cSSatish Balay for (i=0; i<imark; i++) { 1463*a30f8f8cSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1464*a30f8f8cSSatish Balay } 1465*a30f8f8cSSatish Balay } else { 1466*a30f8f8cSSatish Balay for (i=0; i<nzB; i++) { 1467*a30f8f8cSSatish Balay if (cmap[cworkB[i]/bs] < cstart) 1468*a30f8f8cSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1469*a30f8f8cSSatish Balay else break; 1470*a30f8f8cSSatish Balay } 1471*a30f8f8cSSatish Balay imark = i; 1472*a30f8f8cSSatish Balay } 1473*a30f8f8cSSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1474*a30f8f8cSSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1475*a30f8f8cSSatish Balay } 1476*a30f8f8cSSatish Balay } else { 1477*a30f8f8cSSatish Balay if (idx) *idx = 0; 1478*a30f8f8cSSatish Balay if (v) *v = 0; 1479*a30f8f8cSSatish Balay } 1480*a30f8f8cSSatish Balay } 1481*a30f8f8cSSatish Balay *nz = nztot; 1482*a30f8f8cSSatish Balay ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1483*a30f8f8cSSatish Balay ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1484*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1485*a30f8f8cSSatish Balay } 1486*a30f8f8cSSatish Balay 1487*a30f8f8cSSatish Balay #undef __FUNC__ 1488*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ" 1489*a30f8f8cSSatish Balay int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v) 1490*a30f8f8cSSatish Balay { 1491*a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1492*a30f8f8cSSatish Balay 1493*a30f8f8cSSatish Balay PetscFunctionBegin; 1494*a30f8f8cSSatish Balay if (baij->getrowactive == PETSC_FALSE) { 1495*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called"); 1496*a30f8f8cSSatish Balay } 1497*a30f8f8cSSatish Balay baij->getrowactive = PETSC_FALSE; 1498*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1499*a30f8f8cSSatish Balay } 1500*a30f8f8cSSatish Balay 1501*a30f8f8cSSatish Balay #undef __FUNC__ 1502*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ" 1503*a30f8f8cSSatish Balay int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs) 1504*a30f8f8cSSatish Balay { 1505*a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1506*a30f8f8cSSatish Balay 1507*a30f8f8cSSatish Balay PetscFunctionBegin; 1508*a30f8f8cSSatish Balay *bs = baij->bs; 1509*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1510*a30f8f8cSSatish Balay } 1511*a30f8f8cSSatish Balay 1512*a30f8f8cSSatish Balay #undef __FUNC__ 1513*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ" 1514*a30f8f8cSSatish Balay int MatZeroEntries_MPISBAIJ(Mat A) 1515*a30f8f8cSSatish Balay { 1516*a30f8f8cSSatish Balay Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1517*a30f8f8cSSatish Balay int ierr; 1518*a30f8f8cSSatish Balay 1519*a30f8f8cSSatish Balay PetscFunctionBegin; 1520*a30f8f8cSSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 1521*a30f8f8cSSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 1522*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1523*a30f8f8cSSatish Balay } 1524*a30f8f8cSSatish Balay 1525*a30f8f8cSSatish Balay #undef __FUNC__ 1526*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ" 1527*a30f8f8cSSatish Balay int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1528*a30f8f8cSSatish Balay { 1529*a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data; 1530*a30f8f8cSSatish Balay Mat A = a->A,B = a->B; 1531*a30f8f8cSSatish Balay int ierr; 1532*a30f8f8cSSatish Balay PetscReal isend[5],irecv[5]; 1533*a30f8f8cSSatish Balay 1534*a30f8f8cSSatish Balay PetscFunctionBegin; 1535*a30f8f8cSSatish Balay info->block_size = (double)a->bs; 1536*a30f8f8cSSatish Balay ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1537*a30f8f8cSSatish Balay isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1538*a30f8f8cSSatish Balay isend[3] = info->memory; isend[4] = info->mallocs; 1539*a30f8f8cSSatish Balay ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1540*a30f8f8cSSatish Balay isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1541*a30f8f8cSSatish Balay isend[3] += info->memory; isend[4] += info->mallocs; 1542*a30f8f8cSSatish Balay if (flag == MAT_LOCAL) { 1543*a30f8f8cSSatish Balay info->nz_used = isend[0]; 1544*a30f8f8cSSatish Balay info->nz_allocated = isend[1]; 1545*a30f8f8cSSatish Balay info->nz_unneeded = isend[2]; 1546*a30f8f8cSSatish Balay info->memory = isend[3]; 1547*a30f8f8cSSatish Balay info->mallocs = isend[4]; 1548*a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1549*a30f8f8cSSatish Balay ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr); 1550*a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1551*a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1552*a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1553*a30f8f8cSSatish Balay info->memory = irecv[3]; 1554*a30f8f8cSSatish Balay info->mallocs = irecv[4]; 1555*a30f8f8cSSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1556*a30f8f8cSSatish Balay ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr); 1557*a30f8f8cSSatish Balay info->nz_used = irecv[0]; 1558*a30f8f8cSSatish Balay info->nz_allocated = irecv[1]; 1559*a30f8f8cSSatish Balay info->nz_unneeded = irecv[2]; 1560*a30f8f8cSSatish Balay info->memory = irecv[3]; 1561*a30f8f8cSSatish Balay info->mallocs = irecv[4]; 1562*a30f8f8cSSatish Balay } else { 1563*a30f8f8cSSatish Balay SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag); 1564*a30f8f8cSSatish Balay } 1565*a30f8f8cSSatish Balay info->rows_global = (double)a->M; 1566*a30f8f8cSSatish Balay info->columns_global = (double)a->N; 1567*a30f8f8cSSatish Balay info->rows_local = (double)a->m; 1568*a30f8f8cSSatish Balay info->columns_local = (double)a->N; 1569*a30f8f8cSSatish Balay info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1570*a30f8f8cSSatish Balay info->fill_ratio_needed = 0; 1571*a30f8f8cSSatish Balay info->factor_mallocs = 0; 1572*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1573*a30f8f8cSSatish Balay } 1574*a30f8f8cSSatish Balay 1575*a30f8f8cSSatish Balay #undef __FUNC__ 1576*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ" 1577*a30f8f8cSSatish Balay int MatSetOption_MPISBAIJ(Mat A,MatOption op) 1578*a30f8f8cSSatish Balay { 1579*a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1580*a30f8f8cSSatish Balay int ierr; 1581*a30f8f8cSSatish Balay 1582*a30f8f8cSSatish Balay PetscFunctionBegin; 1583*a30f8f8cSSatish Balay if (op == MAT_NO_NEW_NONZERO_LOCATIONS || 1584*a30f8f8cSSatish Balay op == MAT_YES_NEW_NONZERO_LOCATIONS || 1585*a30f8f8cSSatish Balay op == MAT_COLUMNS_UNSORTED || 1586*a30f8f8cSSatish Balay op == MAT_COLUMNS_SORTED || 1587*a30f8f8cSSatish Balay op == MAT_NEW_NONZERO_ALLOCATION_ERR || 1588*a30f8f8cSSatish Balay op == MAT_KEEP_ZEROED_ROWS || 1589*a30f8f8cSSatish Balay op == MAT_NEW_NONZERO_LOCATION_ERR) { 1590*a30f8f8cSSatish Balay ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1591*a30f8f8cSSatish Balay ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1592*a30f8f8cSSatish Balay } else if (op == MAT_ROW_ORIENTED) { 1593*a30f8f8cSSatish Balay a->roworiented = PETSC_TRUE; 1594*a30f8f8cSSatish Balay ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1595*a30f8f8cSSatish Balay ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1596*a30f8f8cSSatish Balay } else if (op == MAT_ROWS_SORTED || 1597*a30f8f8cSSatish Balay op == MAT_ROWS_UNSORTED || 1598*a30f8f8cSSatish Balay op == MAT_SYMMETRIC || 1599*a30f8f8cSSatish Balay op == MAT_STRUCTURALLY_SYMMETRIC || 1600*a30f8f8cSSatish Balay op == MAT_YES_NEW_DIAGONALS || 1601*a30f8f8cSSatish Balay op == MAT_USE_HASH_TABLE) { 1602*a30f8f8cSSatish Balay PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"); 1603*a30f8f8cSSatish Balay } else if (op == MAT_COLUMN_ORIENTED) { 1604*a30f8f8cSSatish Balay a->roworiented = PETSC_FALSE; 1605*a30f8f8cSSatish Balay ierr = MatSetOption(a->A,op);CHKERRQ(ierr); 1606*a30f8f8cSSatish Balay ierr = MatSetOption(a->B,op);CHKERRQ(ierr); 1607*a30f8f8cSSatish Balay } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) { 1608*a30f8f8cSSatish Balay a->donotstash = PETSC_TRUE; 1609*a30f8f8cSSatish Balay } else if (op == MAT_NO_NEW_DIAGONALS) { 1610*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS"); 1611*a30f8f8cSSatish Balay } else if (op == MAT_USE_HASH_TABLE) { 1612*a30f8f8cSSatish Balay a->ht_flag = PETSC_TRUE; 1613*a30f8f8cSSatish Balay } else { 1614*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"unknown option"); 1615*a30f8f8cSSatish Balay } 1616*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1617*a30f8f8cSSatish Balay } 1618*a30f8f8cSSatish Balay 1619*a30f8f8cSSatish Balay #undef __FUNC__ 1620*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ(" 1621*a30f8f8cSSatish Balay int MatTranspose_MPISBAIJ(Mat A,Mat *matout) 1622*a30f8f8cSSatish Balay { 1623*a30f8f8cSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 1624*a30f8f8cSSatish Balay Mat_SeqBAIJ *Aloc; 1625*a30f8f8cSSatish Balay Mat B; 1626*a30f8f8cSSatish Balay int ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col; 1627*a30f8f8cSSatish Balay int bs=baij->bs,mbs=baij->mbs; 1628*a30f8f8cSSatish Balay MatScalar *a; 1629*a30f8f8cSSatish Balay 1630*a30f8f8cSSatish Balay PetscFunctionBegin; 1631*a30f8f8cSSatish Balay if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place"); 1632*a30f8f8cSSatish Balay ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr); 1633*a30f8f8cSSatish Balay 1634*a30f8f8cSSatish Balay /* copy over the A part */ 1635*a30f8f8cSSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 1636*a30f8f8cSSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1637*a30f8f8cSSatish Balay rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals); 1638*a30f8f8cSSatish Balay 1639*a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 1640*a30f8f8cSSatish Balay rvals[0] = bs*(baij->rstart + i); 1641*a30f8f8cSSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1642*a30f8f8cSSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1643*a30f8f8cSSatish Balay col = (baij->cstart+aj[j])*bs; 1644*a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 1645*a30f8f8cSSatish Balay ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1646*a30f8f8cSSatish Balay col++; a += bs; 1647*a30f8f8cSSatish Balay } 1648*a30f8f8cSSatish Balay } 1649*a30f8f8cSSatish Balay } 1650*a30f8f8cSSatish Balay /* copy over the B part */ 1651*a30f8f8cSSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 1652*a30f8f8cSSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1653*a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 1654*a30f8f8cSSatish Balay rvals[0] = bs*(baij->rstart + i); 1655*a30f8f8cSSatish Balay for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 1656*a30f8f8cSSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1657*a30f8f8cSSatish Balay col = baij->garray[aj[j]]*bs; 1658*a30f8f8cSSatish Balay for (k=0; k<bs; k++) { 1659*a30f8f8cSSatish Balay ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 1660*a30f8f8cSSatish Balay col++; a += bs; 1661*a30f8f8cSSatish Balay } 1662*a30f8f8cSSatish Balay } 1663*a30f8f8cSSatish Balay } 1664*a30f8f8cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 1665*a30f8f8cSSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666*a30f8f8cSSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1667*a30f8f8cSSatish Balay 1668*a30f8f8cSSatish Balay if (matout) { 1669*a30f8f8cSSatish Balay *matout = B; 1670*a30f8f8cSSatish Balay } else { 1671*a30f8f8cSSatish Balay PetscOps *Abops; 1672*a30f8f8cSSatish Balay MatOps Aops; 1673*a30f8f8cSSatish Balay 1674*a30f8f8cSSatish Balay /* This isn't really an in-place transpose .... but free data structures from baij */ 1675*a30f8f8cSSatish Balay ierr = PetscFree(baij->rowners);CHKERRQ(ierr); 1676*a30f8f8cSSatish Balay ierr = MatDestroy(baij->A);CHKERRQ(ierr); 1677*a30f8f8cSSatish Balay ierr = MatDestroy(baij->B);CHKERRQ(ierr); 1678*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 1679*a30f8f8cSSatish Balay if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);} 1680*a30f8f8cSSatish Balay #else 1681*a30f8f8cSSatish Balay if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);} 1682*a30f8f8cSSatish Balay #endif 1683*a30f8f8cSSatish Balay if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);} 1684*a30f8f8cSSatish Balay if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);} 1685*a30f8f8cSSatish Balay if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);} 1686*a30f8f8cSSatish Balay ierr = PetscFree(baij);CHKERRQ(ierr); 1687*a30f8f8cSSatish Balay 1688*a30f8f8cSSatish Balay /* 1689*a30f8f8cSSatish Balay This is horrible, horrible code. We need to keep the 1690*a30f8f8cSSatish Balay A pointers for the bops and ops but copy everything 1691*a30f8f8cSSatish Balay else from C. 1692*a30f8f8cSSatish Balay */ 1693*a30f8f8cSSatish Balay Abops = A->bops; 1694*a30f8f8cSSatish Balay Aops = A->ops; 1695*a30f8f8cSSatish Balay ierr = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr); 1696*a30f8f8cSSatish Balay A->bops = Abops; 1697*a30f8f8cSSatish Balay A->ops = Aops; 1698*a30f8f8cSSatish Balay 1699*a30f8f8cSSatish Balay PetscHeaderDestroy(B); 1700*a30f8f8cSSatish Balay } 1701*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1702*a30f8f8cSSatish Balay } 1703*a30f8f8cSSatish Balay 1704*a30f8f8cSSatish Balay #undef __FUNC__ 1705*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ" 1706*a30f8f8cSSatish Balay int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr) 1707*a30f8f8cSSatish Balay { 1708*a30f8f8cSSatish Balay Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 1709*a30f8f8cSSatish Balay Mat a = baij->A,b = baij->B; 1710*a30f8f8cSSatish Balay int ierr,s1,s2,s3; 1711*a30f8f8cSSatish Balay 1712*a30f8f8cSSatish Balay PetscFunctionBegin; 1713*a30f8f8cSSatish Balay if (ll != rr) { 1714*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n"); 1715*a30f8f8cSSatish Balay } 1716*a30f8f8cSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1717*a30f8f8cSSatish Balay if (rr) { 1718*a30f8f8cSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1719*a30f8f8cSSatish Balay if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size"); 1720*a30f8f8cSSatish Balay /* Overlap communication with computation. */ 1721*a30f8f8cSSatish Balay ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1722*a30f8f8cSSatish Balay /*} if (ll) { */ 1723*a30f8f8cSSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1724*a30f8f8cSSatish Balay if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size"); 1725*a30f8f8cSSatish Balay ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr); 1726*a30f8f8cSSatish Balay /* } */ 1727*a30f8f8cSSatish Balay /* scale the diagonal block */ 1728*a30f8f8cSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 1729*a30f8f8cSSatish Balay 1730*a30f8f8cSSatish Balay /* if (rr) { */ 1731*a30f8f8cSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 1732*a30f8f8cSSatish Balay ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 1733*a30f8f8cSSatish Balay ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr); 1734*a30f8f8cSSatish Balay } 1735*a30f8f8cSSatish Balay 1736*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1737*a30f8f8cSSatish Balay } 1738*a30f8f8cSSatish Balay 1739*a30f8f8cSSatish Balay #undef __FUNC__ 1740*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ" 1741*a30f8f8cSSatish Balay int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag) 1742*a30f8f8cSSatish Balay { 1743*a30f8f8cSSatish Balay Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data; 1744*a30f8f8cSSatish Balay int i,ierr,N,*rows,*owners = l->rowners,size = l->size; 1745*a30f8f8cSSatish Balay int *procs,*nprocs,j,found,idx,nsends,*work,row; 1746*a30f8f8cSSatish Balay int nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank; 1747*a30f8f8cSSatish Balay int *rvalues,tag = A->tag,count,base,slen,n,*source; 1748*a30f8f8cSSatish Balay int *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs; 1749*a30f8f8cSSatish Balay MPI_Comm comm = A->comm; 1750*a30f8f8cSSatish Balay MPI_Request *send_waits,*recv_waits; 1751*a30f8f8cSSatish Balay MPI_Status recv_status,*send_status; 1752*a30f8f8cSSatish Balay IS istmp; 1753*a30f8f8cSSatish Balay 1754*a30f8f8cSSatish Balay PetscFunctionBegin; 1755*a30f8f8cSSatish Balay ierr = ISGetSize(is,&N);CHKERRQ(ierr); 1756*a30f8f8cSSatish Balay ierr = ISGetIndices(is,&rows);CHKERRQ(ierr); 1757*a30f8f8cSSatish Balay 1758*a30f8f8cSSatish Balay /* first count number of contributors to each processor */ 1759*a30f8f8cSSatish Balay nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs); 1760*a30f8f8cSSatish Balay ierr = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr); 1761*a30f8f8cSSatish Balay procs = nprocs + size; 1762*a30f8f8cSSatish Balay owner = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/ 1763*a30f8f8cSSatish Balay for (i=0; i<N; i++) { 1764*a30f8f8cSSatish Balay idx = rows[i]; 1765*a30f8f8cSSatish Balay found = 0; 1766*a30f8f8cSSatish Balay for (j=0; j<size; j++) { 1767*a30f8f8cSSatish Balay if (idx >= owners[j]*bs && idx < owners[j+1]*bs) { 1768*a30f8f8cSSatish Balay nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break; 1769*a30f8f8cSSatish Balay } 1770*a30f8f8cSSatish Balay } 1771*a30f8f8cSSatish Balay if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range"); 1772*a30f8f8cSSatish Balay } 1773*a30f8f8cSSatish Balay nsends = 0; for (i=0; i<size; i++) { nsends += procs[i];} 1774*a30f8f8cSSatish Balay 1775*a30f8f8cSSatish Balay /* inform other processors of number of messages and max length*/ 1776*a30f8f8cSSatish Balay work = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work); 1777*a30f8f8cSSatish Balay ierr = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr); 1778*a30f8f8cSSatish Balay nmax = work[rank]; 1779*a30f8f8cSSatish Balay nrecvs = work[size+rank]; 1780*a30f8f8cSSatish Balay ierr = PetscFree(work);CHKERRQ(ierr); 1781*a30f8f8cSSatish Balay 1782*a30f8f8cSSatish Balay /* post receives: */ 1783*a30f8f8cSSatish Balay rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues); 1784*a30f8f8cSSatish Balay recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits); 1785*a30f8f8cSSatish Balay for (i=0; i<nrecvs; i++) { 1786*a30f8f8cSSatish Balay ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr); 1787*a30f8f8cSSatish Balay } 1788*a30f8f8cSSatish Balay 1789*a30f8f8cSSatish Balay /* do sends: 1790*a30f8f8cSSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 1791*a30f8f8cSSatish Balay the ith processor 1792*a30f8f8cSSatish Balay */ 1793*a30f8f8cSSatish Balay svalues = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues); 1794*a30f8f8cSSatish Balay send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits); 1795*a30f8f8cSSatish Balay starts = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts); 1796*a30f8f8cSSatish Balay starts[0] = 0; 1797*a30f8f8cSSatish Balay for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1798*a30f8f8cSSatish Balay for (i=0; i<N; i++) { 1799*a30f8f8cSSatish Balay svalues[starts[owner[i]]++] = rows[i]; 1800*a30f8f8cSSatish Balay } 1801*a30f8f8cSSatish Balay ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr); 1802*a30f8f8cSSatish Balay 1803*a30f8f8cSSatish Balay starts[0] = 0; 1804*a30f8f8cSSatish Balay for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];} 1805*a30f8f8cSSatish Balay count = 0; 1806*a30f8f8cSSatish Balay for (i=0; i<size; i++) { 1807*a30f8f8cSSatish Balay if (procs[i]) { 1808*a30f8f8cSSatish Balay ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr); 1809*a30f8f8cSSatish Balay } 1810*a30f8f8cSSatish Balay } 1811*a30f8f8cSSatish Balay ierr = PetscFree(starts);CHKERRQ(ierr); 1812*a30f8f8cSSatish Balay 1813*a30f8f8cSSatish Balay base = owners[rank]*bs; 1814*a30f8f8cSSatish Balay 1815*a30f8f8cSSatish Balay /* wait on receives */ 1816*a30f8f8cSSatish Balay lens = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens); 1817*a30f8f8cSSatish Balay source = lens + nrecvs; 1818*a30f8f8cSSatish Balay count = nrecvs; slen = 0; 1819*a30f8f8cSSatish Balay while (count) { 1820*a30f8f8cSSatish Balay ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr); 1821*a30f8f8cSSatish Balay /* unpack receives into our local space */ 1822*a30f8f8cSSatish Balay ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr); 1823*a30f8f8cSSatish Balay source[imdex] = recv_status.MPI_SOURCE; 1824*a30f8f8cSSatish Balay lens[imdex] = n; 1825*a30f8f8cSSatish Balay slen += n; 1826*a30f8f8cSSatish Balay count--; 1827*a30f8f8cSSatish Balay } 1828*a30f8f8cSSatish Balay ierr = PetscFree(recv_waits);CHKERRQ(ierr); 1829*a30f8f8cSSatish Balay 1830*a30f8f8cSSatish Balay /* move the data into the send scatter */ 1831*a30f8f8cSSatish Balay lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows); 1832*a30f8f8cSSatish Balay count = 0; 1833*a30f8f8cSSatish Balay for (i=0; i<nrecvs; i++) { 1834*a30f8f8cSSatish Balay values = rvalues + i*nmax; 1835*a30f8f8cSSatish Balay for (j=0; j<lens[i]; j++) { 1836*a30f8f8cSSatish Balay lrows[count++] = values[j] - base; 1837*a30f8f8cSSatish Balay } 1838*a30f8f8cSSatish Balay } 1839*a30f8f8cSSatish Balay ierr = PetscFree(rvalues);CHKERRQ(ierr); 1840*a30f8f8cSSatish Balay ierr = PetscFree(lens);CHKERRQ(ierr); 1841*a30f8f8cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 1842*a30f8f8cSSatish Balay ierr = PetscFree(nprocs);CHKERRQ(ierr); 1843*a30f8f8cSSatish Balay 1844*a30f8f8cSSatish Balay /* actually zap the local rows */ 1845*a30f8f8cSSatish Balay ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr); 1846*a30f8f8cSSatish Balay PLogObjectParent(A,istmp); 1847*a30f8f8cSSatish Balay 1848*a30f8f8cSSatish Balay /* 1849*a30f8f8cSSatish Balay Zero the required rows. If the "diagonal block" of the matrix 1850*a30f8f8cSSatish Balay is square and the user wishes to set the diagonal we use seperate 1851*a30f8f8cSSatish Balay code so that MatSetValues() is not called for each diagonal allocating 1852*a30f8f8cSSatish Balay new memory, thus calling lots of mallocs and slowing things down. 1853*a30f8f8cSSatish Balay 1854*a30f8f8cSSatish Balay Contributed by: Mathew Knepley 1855*a30f8f8cSSatish Balay */ 1856*a30f8f8cSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1857*a30f8f8cSSatish Balay ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr); 1858*a30f8f8cSSatish Balay if (diag && (l->A->M == l->A->N)) { 1859*a30f8f8cSSatish Balay ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr); 1860*a30f8f8cSSatish Balay } else if (diag) { 1861*a30f8f8cSSatish Balay ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1862*a30f8f8cSSatish Balay if (((Mat_SeqSBAIJ*)l->A->data)->nonew) { 1863*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1864*a30f8f8cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1865*a30f8f8cSSatish Balay } 1866*a30f8f8cSSatish Balay for (i=0; i<slen; i++) { 1867*a30f8f8cSSatish Balay row = lrows[i] + rstart_bs; 1868*a30f8f8cSSatish Balay ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr); 1869*a30f8f8cSSatish Balay } 1870*a30f8f8cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1871*a30f8f8cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1872*a30f8f8cSSatish Balay } else { 1873*a30f8f8cSSatish Balay ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr); 1874*a30f8f8cSSatish Balay } 1875*a30f8f8cSSatish Balay 1876*a30f8f8cSSatish Balay ierr = ISDestroy(istmp);CHKERRQ(ierr); 1877*a30f8f8cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 1878*a30f8f8cSSatish Balay 1879*a30f8f8cSSatish Balay /* wait on sends */ 1880*a30f8f8cSSatish Balay if (nsends) { 1881*a30f8f8cSSatish Balay send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status); 1882*a30f8f8cSSatish Balay ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr); 1883*a30f8f8cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 1884*a30f8f8cSSatish Balay } 1885*a30f8f8cSSatish Balay ierr = PetscFree(send_waits);CHKERRQ(ierr); 1886*a30f8f8cSSatish Balay ierr = PetscFree(svalues);CHKERRQ(ierr); 1887*a30f8f8cSSatish Balay 1888*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1889*a30f8f8cSSatish Balay } 1890*a30f8f8cSSatish Balay 1891*a30f8f8cSSatish Balay #undef __FUNC__ 1892*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ" 1893*a30f8f8cSSatish Balay int MatPrintHelp_MPISBAIJ(Mat A) 1894*a30f8f8cSSatish Balay { 1895*a30f8f8cSSatish Balay Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data; 1896*a30f8f8cSSatish Balay MPI_Comm comm = A->comm; 1897*a30f8f8cSSatish Balay static int called = 0; 1898*a30f8f8cSSatish Balay int ierr; 1899*a30f8f8cSSatish Balay 1900*a30f8f8cSSatish Balay PetscFunctionBegin; 1901*a30f8f8cSSatish Balay if (!a->rank) { 1902*a30f8f8cSSatish Balay ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr); 1903*a30f8f8cSSatish Balay } 1904*a30f8f8cSSatish Balay if (called) {PetscFunctionReturn(0);} else called = 1; 1905*a30f8f8cSSatish Balay ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr); 1906*a30f8f8cSSatish Balay ierr = (*PetscHelpPrintf)(comm," -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr); 1907*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1908*a30f8f8cSSatish Balay } 1909*a30f8f8cSSatish Balay 1910*a30f8f8cSSatish Balay #undef __FUNC__ 1911*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ" 1912*a30f8f8cSSatish Balay int MatSetUnfactored_MPISBAIJ(Mat A) 1913*a30f8f8cSSatish Balay { 1914*a30f8f8cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1915*a30f8f8cSSatish Balay int ierr; 1916*a30f8f8cSSatish Balay 1917*a30f8f8cSSatish Balay PetscFunctionBegin; 1918*a30f8f8cSSatish Balay ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 1919*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1920*a30f8f8cSSatish Balay } 1921*a30f8f8cSSatish Balay 1922*a30f8f8cSSatish Balay static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *); 1923*a30f8f8cSSatish Balay 1924*a30f8f8cSSatish Balay #undef __FUNC__ 1925*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ" 1926*a30f8f8cSSatish Balay int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag) 1927*a30f8f8cSSatish Balay { 1928*a30f8f8cSSatish Balay Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data; 1929*a30f8f8cSSatish Balay Mat a,b,c,d; 1930*a30f8f8cSSatish Balay PetscTruth flg; 1931*a30f8f8cSSatish Balay int ierr; 1932*a30f8f8cSSatish Balay 1933*a30f8f8cSSatish Balay PetscFunctionBegin; 1934*a30f8f8cSSatish Balay if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type"); 1935*a30f8f8cSSatish Balay a = matA->A; b = matA->B; 1936*a30f8f8cSSatish Balay c = matB->A; d = matB->B; 1937*a30f8f8cSSatish Balay 1938*a30f8f8cSSatish Balay ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1939*a30f8f8cSSatish Balay if (flg == PETSC_TRUE) { 1940*a30f8f8cSSatish Balay ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 1941*a30f8f8cSSatish Balay } 1942*a30f8f8cSSatish Balay ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr); 1943*a30f8f8cSSatish Balay PetscFunctionReturn(0); 1944*a30f8f8cSSatish Balay } 1945*a30f8f8cSSatish Balay 1946*a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/ 1947*a30f8f8cSSatish Balay static struct _MatOps MatOps_Values = { 1948*a30f8f8cSSatish Balay MatSetValues_MPISBAIJ, 1949*a30f8f8cSSatish Balay MatGetRow_MPISBAIJ, 1950*a30f8f8cSSatish Balay MatRestoreRow_MPISBAIJ, 1951*a30f8f8cSSatish Balay MatMult_MPISBAIJ, 1952*a30f8f8cSSatish Balay MatMultAdd_MPISBAIJ, 1953*a30f8f8cSSatish Balay MatMultTranspose_MPISBAIJ, 1954*a30f8f8cSSatish Balay MatMultTransposeAdd_MPISBAIJ, 1955*a30f8f8cSSatish Balay 0, 1956*a30f8f8cSSatish Balay 0, 1957*a30f8f8cSSatish Balay 0, 1958*a30f8f8cSSatish Balay 0, 1959*a30f8f8cSSatish Balay 0, 1960*a30f8f8cSSatish Balay 0, 1961*a30f8f8cSSatish Balay 0, 1962*a30f8f8cSSatish Balay MatTranspose_MPISBAIJ, 1963*a30f8f8cSSatish Balay MatGetInfo_MPISBAIJ, 1964*a30f8f8cSSatish Balay MatEqual_MPISBAIJ, 1965*a30f8f8cSSatish Balay MatGetDiagonal_MPISBAIJ, 1966*a30f8f8cSSatish Balay MatDiagonalScale_MPISBAIJ, 1967*a30f8f8cSSatish Balay MatNorm_MPISBAIJ, 1968*a30f8f8cSSatish Balay MatAssemblyBegin_MPISBAIJ, 1969*a30f8f8cSSatish Balay MatAssemblyEnd_MPISBAIJ, 1970*a30f8f8cSSatish Balay 0, 1971*a30f8f8cSSatish Balay MatSetOption_MPISBAIJ, 1972*a30f8f8cSSatish Balay MatZeroEntries_MPISBAIJ, 1973*a30f8f8cSSatish Balay MatZeroRows_MPISBAIJ, 1974*a30f8f8cSSatish Balay 0, 1975*a30f8f8cSSatish Balay 0, 1976*a30f8f8cSSatish Balay 0, 1977*a30f8f8cSSatish Balay 0, 1978*a30f8f8cSSatish Balay MatGetSize_MPISBAIJ, 1979*a30f8f8cSSatish Balay MatGetLocalSize_MPISBAIJ, 1980*a30f8f8cSSatish Balay MatGetOwnershipRange_MPISBAIJ, 1981*a30f8f8cSSatish Balay 0, 1982*a30f8f8cSSatish Balay 0, 1983*a30f8f8cSSatish Balay 0, 1984*a30f8f8cSSatish Balay 0, 1985*a30f8f8cSSatish Balay MatDuplicate_MPISBAIJ, 1986*a30f8f8cSSatish Balay 0, 1987*a30f8f8cSSatish Balay 0, 1988*a30f8f8cSSatish Balay 0, 1989*a30f8f8cSSatish Balay 0, 1990*a30f8f8cSSatish Balay 0, 1991*a30f8f8cSSatish Balay MatGetSubMatrices_MPISBAIJ, 1992*a30f8f8cSSatish Balay MatIncreaseOverlap_MPISBAIJ, 1993*a30f8f8cSSatish Balay MatGetValues_MPISBAIJ, 1994*a30f8f8cSSatish Balay 0, 1995*a30f8f8cSSatish Balay MatPrintHelp_MPISBAIJ, 1996*a30f8f8cSSatish Balay MatScale_MPISBAIJ, 1997*a30f8f8cSSatish Balay 0, 1998*a30f8f8cSSatish Balay 0, 1999*a30f8f8cSSatish Balay 0, 2000*a30f8f8cSSatish Balay MatGetBlockSize_MPISBAIJ, 2001*a30f8f8cSSatish Balay 0, 2002*a30f8f8cSSatish Balay 0, 2003*a30f8f8cSSatish Balay 0, 2004*a30f8f8cSSatish Balay 0, 2005*a30f8f8cSSatish Balay 0, 2006*a30f8f8cSSatish Balay 0, 2007*a30f8f8cSSatish Balay MatSetUnfactored_MPISBAIJ, 2008*a30f8f8cSSatish Balay 0, 2009*a30f8f8cSSatish Balay MatSetValuesBlocked_MPISBAIJ, 2010*a30f8f8cSSatish Balay 0, 2011*a30f8f8cSSatish Balay 0, 2012*a30f8f8cSSatish Balay 0, 2013*a30f8f8cSSatish Balay MatGetMaps_Petsc}; 2014*a30f8f8cSSatish Balay 2015*a30f8f8cSSatish Balay 2016*a30f8f8cSSatish Balay EXTERN_C_BEGIN 2017*a30f8f8cSSatish Balay #undef __FUNC__ 2018*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ" 2019*a30f8f8cSSatish Balay int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a) 2020*a30f8f8cSSatish Balay { 2021*a30f8f8cSSatish Balay PetscFunctionBegin; 2022*a30f8f8cSSatish Balay *a = ((Mat_MPISBAIJ *)A->data)->A; 2023*a30f8f8cSSatish Balay *iscopy = PETSC_FALSE; 2024*a30f8f8cSSatish Balay PetscFunctionReturn(0); 2025*a30f8f8cSSatish Balay } 2026*a30f8f8cSSatish Balay EXTERN_C_END 2027*a30f8f8cSSatish Balay 2028*a30f8f8cSSatish Balay #undef __FUNC__ 2029*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ" 2030*a30f8f8cSSatish Balay /*@C 2031*a30f8f8cSSatish Balay MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format 2032*a30f8f8cSSatish Balay (block compressed row). For good matrix assembly performance 2033*a30f8f8cSSatish Balay the user should preallocate the matrix storage by setting the parameters 2034*a30f8f8cSSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2035*a30f8f8cSSatish Balay performance can be increased by more than a factor of 50. 2036*a30f8f8cSSatish Balay 2037*a30f8f8cSSatish Balay Collective on MPI_Comm 2038*a30f8f8cSSatish Balay 2039*a30f8f8cSSatish Balay Input Parameters: 2040*a30f8f8cSSatish Balay + comm - MPI communicator 2041*a30f8f8cSSatish Balay . bs - size of blockk 2042*a30f8f8cSSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2043*a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2044*a30f8f8cSSatish Balay y vector for the matrix-vector product y = Ax. 2045*a30f8f8cSSatish Balay . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2046*a30f8f8cSSatish Balay This value should be the same as the local size used in creating the 2047*a30f8f8cSSatish Balay x vector for the matrix-vector product y = Ax. 2048*a30f8f8cSSatish Balay . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 2049*a30f8f8cSSatish Balay . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 2050*a30f8f8cSSatish Balay . d_nz - number of block nonzeros per block row in diagonal portion of local 2051*a30f8f8cSSatish Balay submatrix (same for all local rows) 2052*a30f8f8cSSatish Balay . d_nnz - array containing the number of block nonzeros in the various block rows 2053*a30f8f8cSSatish Balay of the in diagonal portion of the local (possibly different for each block 2054*a30f8f8cSSatish Balay row) or PETSC_NULL. You must leave room for the diagonal entry even if it is zero. 2055*a30f8f8cSSatish Balay . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2056*a30f8f8cSSatish Balay submatrix (same for all local rows). 2057*a30f8f8cSSatish Balay - o_nnz - array containing the number of nonzeros in the various block rows of the 2058*a30f8f8cSSatish Balay off-diagonal portion of the local submatrix (possibly different for 2059*a30f8f8cSSatish Balay each block row) or PETSC_NULL. 2060*a30f8f8cSSatish Balay 2061*a30f8f8cSSatish Balay Output Parameter: 2062*a30f8f8cSSatish Balay . A - the matrix 2063*a30f8f8cSSatish Balay 2064*a30f8f8cSSatish Balay Options Database Keys: 2065*a30f8f8cSSatish Balay . -mat_no_unroll - uses code that does not unroll the loops in the 2066*a30f8f8cSSatish Balay block calculations (much slower) 2067*a30f8f8cSSatish Balay . -mat_block_size - size of the blocks to use 2068*a30f8f8cSSatish Balay . -mat_mpi - use the parallel matrix data structures even on one processor 2069*a30f8f8cSSatish Balay (defaults to using SeqBAIJ format on one processor) 2070*a30f8f8cSSatish Balay 2071*a30f8f8cSSatish Balay Notes: 2072*a30f8f8cSSatish Balay The user MUST specify either the local or global matrix dimensions 2073*a30f8f8cSSatish Balay (possibly both). 2074*a30f8f8cSSatish Balay 2075*a30f8f8cSSatish Balay If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 2076*a30f8f8cSSatish Balay than it must be used on all processors that share the object for that argument. 2077*a30f8f8cSSatish Balay 2078*a30f8f8cSSatish Balay Storage Information: 2079*a30f8f8cSSatish Balay For a square global matrix we define each processor's diagonal portion 2080*a30f8f8cSSatish Balay to be its local rows and the corresponding columns (a square submatrix); 2081*a30f8f8cSSatish Balay each processor's off-diagonal portion encompasses the remainder of the 2082*a30f8f8cSSatish Balay local matrix (a rectangular submatrix). 2083*a30f8f8cSSatish Balay 2084*a30f8f8cSSatish Balay The user can specify preallocated storage for the diagonal part of 2085*a30f8f8cSSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 2086*a30f8f8cSSatish Balay d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic 2087*a30f8f8cSSatish Balay memory allocation. Likewise, specify preallocated storage for the 2088*a30f8f8cSSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2089*a30f8f8cSSatish Balay 2090*a30f8f8cSSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2091*a30f8f8cSSatish Balay the figure below we depict these three local rows and all columns (0-11). 2092*a30f8f8cSSatish Balay 2093*a30f8f8cSSatish Balay .vb 2094*a30f8f8cSSatish Balay 0 1 2 3 4 5 6 7 8 9 10 11 2095*a30f8f8cSSatish Balay ------------------- 2096*a30f8f8cSSatish Balay row 3 | o o o d d d o o o o o o 2097*a30f8f8cSSatish Balay row 4 | o o o d d d o o o o o o 2098*a30f8f8cSSatish Balay row 5 | o o o d d d o o o o o o 2099*a30f8f8cSSatish Balay ------------------- 2100*a30f8f8cSSatish Balay .ve 2101*a30f8f8cSSatish Balay 2102*a30f8f8cSSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 2103*a30f8f8cSSatish Balay submatrix, and any entries in the o locations are stored in the 2104*a30f8f8cSSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 2105*a30f8f8cSSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 2106*a30f8f8cSSatish Balay 2107*a30f8f8cSSatish Balay Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2108*a30f8f8cSSatish Balay and o_nz should indicate the number of block nonzeros per row in the o matrix. 2109*a30f8f8cSSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 2110*a30f8f8cSSatish Balay one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2111*a30f8f8cSSatish Balay or you will get TERRIBLE performance; see the users' manual chapter on 2112*a30f8f8cSSatish Balay matrices. 2113*a30f8f8cSSatish Balay 2114*a30f8f8cSSatish Balay Level: intermediate 2115*a30f8f8cSSatish Balay 2116*a30f8f8cSSatish Balay .keywords: matrix, block, aij, compressed row, sparse, parallel 2117*a30f8f8cSSatish Balay 2118*a30f8f8cSSatish Balay .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ() 2119*a30f8f8cSSatish Balay @*/ 2120*a30f8f8cSSatish Balay 2121*a30f8f8cSSatish Balay int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A) 2122*a30f8f8cSSatish Balay { 2123*a30f8f8cSSatish Balay Mat B; 2124*a30f8f8cSSatish Balay Mat_MPISBAIJ *b; 2125*a30f8f8cSSatish Balay int ierr,i,sum[1],work[1],mbs,nbs,Mbs=PETSC_DECIDE,size; 2126*a30f8f8cSSatish Balay PetscTruth flag1,flag2,flg; 2127*a30f8f8cSSatish Balay 2128*a30f8f8cSSatish Balay PetscFunctionBegin; 2129*a30f8f8cSSatish Balay if (M != N || m != n){ /* N and n are not used after this */ 2130*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n"); 2131*a30f8f8cSSatish Balay } 2132*a30f8f8cSSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2133*a30f8f8cSSatish Balay 2134*a30f8f8cSSatish Balay if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive"); 2135*a30f8f8cSSatish Balay if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz); 2136*a30f8f8cSSatish Balay if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz); 2137*a30f8f8cSSatish Balay if (d_nnz) { 2138*a30f8f8cSSatish Balay for (i=0; i<m/bs; i++) { 2139*a30f8f8cSSatish Balay if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]); 2140*a30f8f8cSSatish Balay } 2141*a30f8f8cSSatish Balay } 2142*a30f8f8cSSatish Balay if (o_nnz) { 2143*a30f8f8cSSatish Balay for (i=0; i<m/bs; i++) { 2144*a30f8f8cSSatish Balay if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]); 2145*a30f8f8cSSatish Balay } 2146*a30f8f8cSSatish Balay } 2147*a30f8f8cSSatish Balay 2148*a30f8f8cSSatish Balay ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2149*a30f8f8cSSatish Balay ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr); 2150*a30f8f8cSSatish Balay ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr); 2151*a30f8f8cSSatish Balay if (!flag1 && !flag2 && size == 1) { 2152*a30f8f8cSSatish Balay if (M == PETSC_DECIDE) M = m; 2153*a30f8f8cSSatish Balay ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr); 2154*a30f8f8cSSatish Balay PetscFunctionReturn(0); 2155*a30f8f8cSSatish Balay } 2156*a30f8f8cSSatish Balay 2157*a30f8f8cSSatish Balay *A = 0; 2158*a30f8f8cSSatish Balay PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView); 2159*a30f8f8cSSatish Balay PLogObjectCreate(B); 2160*a30f8f8cSSatish Balay B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b); 2161*a30f8f8cSSatish Balay ierr = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr); 2162*a30f8f8cSSatish Balay ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2163*a30f8f8cSSatish Balay 2164*a30f8f8cSSatish Balay B->ops->destroy = MatDestroy_MPISBAIJ; 2165*a30f8f8cSSatish Balay B->ops->view = MatView_MPISBAIJ; 2166*a30f8f8cSSatish Balay B->mapping = 0; 2167*a30f8f8cSSatish Balay B->factor = 0; 2168*a30f8f8cSSatish Balay B->assembled = PETSC_FALSE; 2169*a30f8f8cSSatish Balay 2170*a30f8f8cSSatish Balay B->insertmode = NOT_SET_VALUES; 2171*a30f8f8cSSatish Balay ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr); 2172*a30f8f8cSSatish Balay ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr); 2173*a30f8f8cSSatish Balay 2174*a30f8f8cSSatish Balay if (m == PETSC_DECIDE && (d_nnz || o_nnz)) { 2175*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz"); 2176*a30f8f8cSSatish Balay } 2177*a30f8f8cSSatish Balay if (M == PETSC_DECIDE && m == PETSC_DECIDE) { 2178*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified"); 2179*a30f8f8cSSatish Balay } 2180*a30f8f8cSSatish Balay if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE; 2181*a30f8f8cSSatish Balay 2182*a30f8f8cSSatish Balay if (M == PETSC_DECIDE) { 2183*a30f8f8cSSatish Balay work[0] = m; mbs = m/bs; 2184*a30f8f8cSSatish Balay ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 2185*a30f8f8cSSatish Balay M = sum[0]; Mbs = M/bs; 2186*a30f8f8cSSatish Balay } else { /* M is specified */ 2187*a30f8f8cSSatish Balay Mbs = M/bs; 2188*a30f8f8cSSatish Balay if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize"); 2189*a30f8f8cSSatish Balay mbs = Mbs/b->size + ((Mbs % b->size) > b->rank); 2190*a30f8f8cSSatish Balay m = mbs*bs; 2191*a30f8f8cSSatish Balay } 2192*a30f8f8cSSatish Balay 2193*a30f8f8cSSatish Balay if (mbs*bs != m) { 2194*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize"); 2195*a30f8f8cSSatish Balay } 2196*a30f8f8cSSatish Balay 2197*a30f8f8cSSatish Balay b->m = m; B->m = m; 2198*a30f8f8cSSatish Balay b->n = m; B->n = m; 2199*a30f8f8cSSatish Balay b->N = M; B->N = M; 2200*a30f8f8cSSatish Balay b->M = M; 2201*a30f8f8cSSatish Balay B->M = M; 2202*a30f8f8cSSatish Balay b->bs = bs; 2203*a30f8f8cSSatish Balay b->bs2 = bs*bs; 2204*a30f8f8cSSatish Balay b->mbs = mbs; 2205*a30f8f8cSSatish Balay b->nbs = mbs; 2206*a30f8f8cSSatish Balay b->Mbs = Mbs; 2207*a30f8f8cSSatish Balay b->Nbs = Mbs; 2208*a30f8f8cSSatish Balay 2209*a30f8f8cSSatish Balay /* the information in the maps duplicates the information computed below, eventually 2210*a30f8f8cSSatish Balay we should remove the duplicate information that is not contained in the maps */ 2211*a30f8f8cSSatish Balay ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr); 2212*a30f8f8cSSatish Balay ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr); 2213*a30f8f8cSSatish Balay 2214*a30f8f8cSSatish Balay /* build local table of row and column ownerships */ 2215*a30f8f8cSSatish Balay b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners); 2216*a30f8f8cSSatish Balay PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2217*a30f8f8cSSatish Balay b->cowners = b->rowners + b->size + 2; 2218*a30f8f8cSSatish Balay b->rowners_bs = b->cowners + b->size + 2; 2219*a30f8f8cSSatish Balay ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2220*a30f8f8cSSatish Balay b->rowners[0] = 0; 2221*a30f8f8cSSatish Balay for (i=2; i<=b->size; i++) { 2222*a30f8f8cSSatish Balay b->rowners[i] += b->rowners[i-1]; 2223*a30f8f8cSSatish Balay } 2224*a30f8f8cSSatish Balay for (i=0; i<=b->size; i++) { 2225*a30f8f8cSSatish Balay b->rowners_bs[i] = b->rowners[i]*bs; 2226*a30f8f8cSSatish Balay } 2227*a30f8f8cSSatish Balay b->rstart = b->rowners[b->rank]; 2228*a30f8f8cSSatish Balay b->rend = b->rowners[b->rank+1]; 2229*a30f8f8cSSatish Balay b->rstart_bs = b->rstart * bs; 2230*a30f8f8cSSatish Balay b->rend_bs = b->rend * bs; 2231*a30f8f8cSSatish Balay 2232*a30f8f8cSSatish Balay b->cstart = b->rstart; 2233*a30f8f8cSSatish Balay b->cend = b->rend; 2234*a30f8f8cSSatish Balay b->cstart_bs = b->cstart * bs; 2235*a30f8f8cSSatish Balay b->cend_bs = b->cend * bs; 2236*a30f8f8cSSatish Balay 2237*a30f8f8cSSatish Balay 2238*a30f8f8cSSatish Balay if (d_nz == PETSC_DEFAULT) d_nz = 5; 2239*a30f8f8cSSatish Balay ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr); 2240*a30f8f8cSSatish Balay PLogObjectParent(B,b->A); 2241*a30f8f8cSSatish Balay if (o_nz == PETSC_DEFAULT) o_nz = 0; 2242*a30f8f8cSSatish Balay ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr); 2243*a30f8f8cSSatish Balay PLogObjectParent(B,b->B); 2244*a30f8f8cSSatish Balay 2245*a30f8f8cSSatish Balay /* build cache for off array entries formed */ 2246*a30f8f8cSSatish Balay ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr); 2247*a30f8f8cSSatish Balay ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr); 2248*a30f8f8cSSatish Balay b->donotstash = PETSC_FALSE; 2249*a30f8f8cSSatish Balay b->colmap = PETSC_NULL; 2250*a30f8f8cSSatish Balay b->garray = PETSC_NULL; 2251*a30f8f8cSSatish Balay b->roworiented = PETSC_TRUE; 2252*a30f8f8cSSatish Balay 2253*a30f8f8cSSatish Balay #if defined(PEYSC_USE_MAT_SINGLE) 2254*a30f8f8cSSatish Balay /* stuff for MatSetValues_XXX in single precision */ 2255*a30f8f8cSSatish Balay b->lensetvalues = 0; 2256*a30f8f8cSSatish Balay b->setvaluescopy = PETSC_NULL; 2257*a30f8f8cSSatish Balay #endif 2258*a30f8f8cSSatish Balay 2259*a30f8f8cSSatish Balay /* stuff used in block assembly */ 2260*a30f8f8cSSatish Balay b->barray = 0; 2261*a30f8f8cSSatish Balay 2262*a30f8f8cSSatish Balay /* stuff used for matrix vector multiply */ 2263*a30f8f8cSSatish Balay b->lvec = 0; 2264*a30f8f8cSSatish Balay b->Mvctx = 0; 2265*a30f8f8cSSatish Balay 2266*a30f8f8cSSatish Balay /* stuff for MatGetRow() */ 2267*a30f8f8cSSatish Balay b->rowindices = 0; 2268*a30f8f8cSSatish Balay b->rowvalues = 0; 2269*a30f8f8cSSatish Balay b->getrowactive = PETSC_FALSE; 2270*a30f8f8cSSatish Balay 2271*a30f8f8cSSatish Balay /* hash table stuff */ 2272*a30f8f8cSSatish Balay b->ht = 0; 2273*a30f8f8cSSatish Balay b->hd = 0; 2274*a30f8f8cSSatish Balay b->ht_size = 0; 2275*a30f8f8cSSatish Balay b->ht_flag = PETSC_FALSE; 2276*a30f8f8cSSatish Balay b->ht_fact = 0; 2277*a30f8f8cSSatish Balay b->ht_total_ct = 0; 2278*a30f8f8cSSatish Balay b->ht_insert_ct = 0; 2279*a30f8f8cSSatish Balay 2280*a30f8f8cSSatish Balay *A = B; 2281*a30f8f8cSSatish Balay ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr); 2282*a30f8f8cSSatish Balay if (flg) { 2283*a30f8f8cSSatish Balay double fact = 1.39; 2284*a30f8f8cSSatish Balay ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr); 2285*a30f8f8cSSatish Balay ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr); 2286*a30f8f8cSSatish Balay if (fact <= 1.0) fact = 1.39; 2287*a30f8f8cSSatish Balay ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 2288*a30f8f8cSSatish Balay PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact); 2289*a30f8f8cSSatish Balay } 2290*a30f8f8cSSatish Balay ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C", 2291*a30f8f8cSSatish Balay "MatStoreValues_MPISBAIJ", 2292*a30f8f8cSSatish Balay MatStoreValues_MPISBAIJ);CHKERRQ(ierr); 2293*a30f8f8cSSatish Balay ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C", 2294*a30f8f8cSSatish Balay "MatRetrieveValues_MPISBAIJ", 2295*a30f8f8cSSatish Balay MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr); 2296*a30f8f8cSSatish Balay ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C", 2297*a30f8f8cSSatish Balay "MatGetDiagonalBlock_MPISBAIJ", 2298*a30f8f8cSSatish Balay MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr); 2299*a30f8f8cSSatish Balay PetscFunctionReturn(0); 2300*a30f8f8cSSatish Balay } 2301*a30f8f8cSSatish Balay 2302*a30f8f8cSSatish Balay 2303*a30f8f8cSSatish Balay #undef __FUNC__ 2304*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ" 2305*a30f8f8cSSatish Balay static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2306*a30f8f8cSSatish Balay { 2307*a30f8f8cSSatish Balay Mat mat; 2308*a30f8f8cSSatish Balay Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data; 2309*a30f8f8cSSatish Balay int ierr,len=0; 2310*a30f8f8cSSatish Balay PetscTruth flg; 2311*a30f8f8cSSatish Balay 2312*a30f8f8cSSatish Balay PetscFunctionBegin; 2313*a30f8f8cSSatish Balay *newmat = 0; 2314*a30f8f8cSSatish Balay PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView); 2315*a30f8f8cSSatish Balay PLogObjectCreate(mat); 2316*a30f8f8cSSatish Balay mat->data = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a); 2317*a30f8f8cSSatish Balay ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2318*a30f8f8cSSatish Balay mat->ops->destroy = MatDestroy_MPISBAIJ; 2319*a30f8f8cSSatish Balay mat->ops->view = MatView_MPISBAIJ; 2320*a30f8f8cSSatish Balay mat->factor = matin->factor; 2321*a30f8f8cSSatish Balay mat->assembled = PETSC_TRUE; 2322*a30f8f8cSSatish Balay mat->insertmode = NOT_SET_VALUES; 2323*a30f8f8cSSatish Balay 2324*a30f8f8cSSatish Balay a->m = mat->m = oldmat->m; 2325*a30f8f8cSSatish Balay a->n = mat->n = oldmat->n; 2326*a30f8f8cSSatish Balay a->M = mat->M = oldmat->M; 2327*a30f8f8cSSatish Balay a->N = mat->N = oldmat->N; 2328*a30f8f8cSSatish Balay 2329*a30f8f8cSSatish Balay a->bs = oldmat->bs; 2330*a30f8f8cSSatish Balay a->bs2 = oldmat->bs2; 2331*a30f8f8cSSatish Balay a->mbs = oldmat->mbs; 2332*a30f8f8cSSatish Balay a->nbs = oldmat->nbs; 2333*a30f8f8cSSatish Balay a->Mbs = oldmat->Mbs; 2334*a30f8f8cSSatish Balay a->Nbs = oldmat->Nbs; 2335*a30f8f8cSSatish Balay 2336*a30f8f8cSSatish Balay a->rstart = oldmat->rstart; 2337*a30f8f8cSSatish Balay a->rend = oldmat->rend; 2338*a30f8f8cSSatish Balay a->cstart = oldmat->cstart; 2339*a30f8f8cSSatish Balay a->cend = oldmat->cend; 2340*a30f8f8cSSatish Balay a->size = oldmat->size; 2341*a30f8f8cSSatish Balay a->rank = oldmat->rank; 2342*a30f8f8cSSatish Balay a->donotstash = oldmat->donotstash; 2343*a30f8f8cSSatish Balay a->roworiented = oldmat->roworiented; 2344*a30f8f8cSSatish Balay a->rowindices = 0; 2345*a30f8f8cSSatish Balay a->rowvalues = 0; 2346*a30f8f8cSSatish Balay a->getrowactive = PETSC_FALSE; 2347*a30f8f8cSSatish Balay a->barray = 0; 2348*a30f8f8cSSatish Balay a->rstart_bs = oldmat->rstart_bs; 2349*a30f8f8cSSatish Balay a->rend_bs = oldmat->rend_bs; 2350*a30f8f8cSSatish Balay a->cstart_bs = oldmat->cstart_bs; 2351*a30f8f8cSSatish Balay a->cend_bs = oldmat->cend_bs; 2352*a30f8f8cSSatish Balay 2353*a30f8f8cSSatish Balay /* hash table stuff */ 2354*a30f8f8cSSatish Balay a->ht = 0; 2355*a30f8f8cSSatish Balay a->hd = 0; 2356*a30f8f8cSSatish Balay a->ht_size = 0; 2357*a30f8f8cSSatish Balay a->ht_flag = oldmat->ht_flag; 2358*a30f8f8cSSatish Balay a->ht_fact = oldmat->ht_fact; 2359*a30f8f8cSSatish Balay a->ht_total_ct = 0; 2360*a30f8f8cSSatish Balay a->ht_insert_ct = 0; 2361*a30f8f8cSSatish Balay 2362*a30f8f8cSSatish Balay 2363*a30f8f8cSSatish Balay a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners); 2364*a30f8f8cSSatish Balay PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ)); 2365*a30f8f8cSSatish Balay a->cowners = a->rowners + a->size + 2; 2366*a30f8f8cSSatish Balay a->rowners_bs = a->cowners + a->size + 2; 2367*a30f8f8cSSatish Balay ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr); 2368*a30f8f8cSSatish Balay ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr); 2369*a30f8f8cSSatish Balay ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr); 2370*a30f8f8cSSatish Balay if (oldmat->colmap) { 2371*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE) 2372*a30f8f8cSSatish Balay ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2373*a30f8f8cSSatish Balay #else 2374*a30f8f8cSSatish Balay a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap); 2375*a30f8f8cSSatish Balay PLogObjectMemory(mat,(a->Nbs)*sizeof(int)); 2376*a30f8f8cSSatish Balay ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr); 2377*a30f8f8cSSatish Balay #endif 2378*a30f8f8cSSatish Balay } else a->colmap = 0; 2379*a30f8f8cSSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 2380*a30f8f8cSSatish Balay a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray); 2381*a30f8f8cSSatish Balay PLogObjectMemory(mat,len*sizeof(int)); 2382*a30f8f8cSSatish Balay ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr); 2383*a30f8f8cSSatish Balay } else a->garray = 0; 2384*a30f8f8cSSatish Balay 2385*a30f8f8cSSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2386*a30f8f8cSSatish Balay PLogObjectParent(mat,a->lvec); 2387*a30f8f8cSSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2388*a30f8f8cSSatish Balay 2389*a30f8f8cSSatish Balay PLogObjectParent(mat,a->Mvctx); 2390*a30f8f8cSSatish Balay ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2391*a30f8f8cSSatish Balay PLogObjectParent(mat,a->A); 2392*a30f8f8cSSatish Balay ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2393*a30f8f8cSSatish Balay PLogObjectParent(mat,a->B); 2394*a30f8f8cSSatish Balay ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr); 2395*a30f8f8cSSatish Balay ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr); 2396*a30f8f8cSSatish Balay if (flg) { 2397*a30f8f8cSSatish Balay ierr = MatPrintHelp(mat);CHKERRQ(ierr); 2398*a30f8f8cSSatish Balay } 2399*a30f8f8cSSatish Balay *newmat = mat; 2400*a30f8f8cSSatish Balay PetscFunctionReturn(0); 2401*a30f8f8cSSatish Balay } 2402*a30f8f8cSSatish Balay 2403*a30f8f8cSSatish Balay #include "petscsys.h" 2404*a30f8f8cSSatish Balay 2405*a30f8f8cSSatish Balay #undef __FUNC__ 2406*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ" 2407*a30f8f8cSSatish Balay int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat) 2408*a30f8f8cSSatish Balay { 2409*a30f8f8cSSatish Balay Mat A; 2410*a30f8f8cSSatish Balay int i,nz,ierr,j,rstart,rend,fd; 2411*a30f8f8cSSatish Balay Scalar *vals,*buf; 2412*a30f8f8cSSatish Balay MPI_Comm comm = ((PetscObject)viewer)->comm; 2413*a30f8f8cSSatish Balay MPI_Status status; 2414*a30f8f8cSSatish Balay int header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols; 2415*a30f8f8cSSatish Balay int *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf; 2416*a30f8f8cSSatish Balay int tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows; 2417*a30f8f8cSSatish Balay int *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount; 2418*a30f8f8cSSatish Balay int dcount,kmax,k,nzcount,tmp; 2419*a30f8f8cSSatish Balay 2420*a30f8f8cSSatish Balay PetscFunctionBegin; 2421*a30f8f8cSSatish Balay ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr); 2422*a30f8f8cSSatish Balay 2423*a30f8f8cSSatish Balay ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2424*a30f8f8cSSatish Balay ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2425*a30f8f8cSSatish Balay if (!rank) { 2426*a30f8f8cSSatish Balay ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2427*a30f8f8cSSatish Balay ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr); 2428*a30f8f8cSSatish Balay if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object"); 2429*a30f8f8cSSatish Balay if (header[3] < 0) { 2430*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ"); 2431*a30f8f8cSSatish Balay } 2432*a30f8f8cSSatish Balay } 2433*a30f8f8cSSatish Balay 2434*a30f8f8cSSatish Balay ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr); 2435*a30f8f8cSSatish Balay M = header[1]; N = header[2]; 2436*a30f8f8cSSatish Balay 2437*a30f8f8cSSatish Balay if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices"); 2438*a30f8f8cSSatish Balay 2439*a30f8f8cSSatish Balay /* 2440*a30f8f8cSSatish Balay This code adds extra rows to make sure the number of rows is 2441*a30f8f8cSSatish Balay divisible by the blocksize 2442*a30f8f8cSSatish Balay */ 2443*a30f8f8cSSatish Balay Mbs = M/bs; 2444*a30f8f8cSSatish Balay extra_rows = bs - M + bs*(Mbs); 2445*a30f8f8cSSatish Balay if (extra_rows == bs) extra_rows = 0; 2446*a30f8f8cSSatish Balay else Mbs++; 2447*a30f8f8cSSatish Balay if (extra_rows &&!rank) { 2448*a30f8f8cSSatish Balay PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"); 2449*a30f8f8cSSatish Balay } 2450*a30f8f8cSSatish Balay 2451*a30f8f8cSSatish Balay /* determine ownership of all rows */ 2452*a30f8f8cSSatish Balay mbs = Mbs/size + ((Mbs % size) > rank); 2453*a30f8f8cSSatish Balay m = mbs * bs; 2454*a30f8f8cSSatish Balay rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners); 2455*a30f8f8cSSatish Balay browners = rowners + size + 1; 2456*a30f8f8cSSatish Balay ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr); 2457*a30f8f8cSSatish Balay rowners[0] = 0; 2458*a30f8f8cSSatish Balay for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 2459*a30f8f8cSSatish Balay for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 2460*a30f8f8cSSatish Balay rstart = rowners[rank]; 2461*a30f8f8cSSatish Balay rend = rowners[rank+1]; 2462*a30f8f8cSSatish Balay 2463*a30f8f8cSSatish Balay /* distribute row lengths to all processors */ 2464*a30f8f8cSSatish Balay locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens); 2465*a30f8f8cSSatish Balay if (!rank) { 2466*a30f8f8cSSatish Balay rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths); 2467*a30f8f8cSSatish Balay ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 2468*a30f8f8cSSatish Balay for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 2469*a30f8f8cSSatish Balay sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts); 2470*a30f8f8cSSatish Balay for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i]; 2471*a30f8f8cSSatish Balay ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2472*a30f8f8cSSatish Balay ierr = PetscFree(sndcounts);CHKERRQ(ierr); 2473*a30f8f8cSSatish Balay } else { 2474*a30f8f8cSSatish Balay ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr); 2475*a30f8f8cSSatish Balay } 2476*a30f8f8cSSatish Balay 2477*a30f8f8cSSatish Balay if (!rank) { 2478*a30f8f8cSSatish Balay /* calculate the number of nonzeros on each processor */ 2479*a30f8f8cSSatish Balay procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz); 2480*a30f8f8cSSatish Balay ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr); 2481*a30f8f8cSSatish Balay for (i=0; i<size; i++) { 2482*a30f8f8cSSatish Balay for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) { 2483*a30f8f8cSSatish Balay procsnz[i] += rowlengths[j]; 2484*a30f8f8cSSatish Balay } 2485*a30f8f8cSSatish Balay } 2486*a30f8f8cSSatish Balay ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2487*a30f8f8cSSatish Balay 2488*a30f8f8cSSatish Balay /* determine max buffer needed and allocate it */ 2489*a30f8f8cSSatish Balay maxnz = 0; 2490*a30f8f8cSSatish Balay for (i=0; i<size; i++) { 2491*a30f8f8cSSatish Balay maxnz = PetscMax(maxnz,procsnz[i]); 2492*a30f8f8cSSatish Balay } 2493*a30f8f8cSSatish Balay cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols); 2494*a30f8f8cSSatish Balay 2495*a30f8f8cSSatish Balay /* read in my part of the matrix column indices */ 2496*a30f8f8cSSatish Balay nz = procsnz[0]; 2497*a30f8f8cSSatish Balay ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2498*a30f8f8cSSatish Balay mycols = ibuf; 2499*a30f8f8cSSatish Balay if (size == 1) nz -= extra_rows; 2500*a30f8f8cSSatish Balay ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2501*a30f8f8cSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; } 2502*a30f8f8cSSatish Balay 2503*a30f8f8cSSatish Balay /* read in every ones (except the last) and ship off */ 2504*a30f8f8cSSatish Balay for (i=1; i<size-1; i++) { 2505*a30f8f8cSSatish Balay nz = procsnz[i]; 2506*a30f8f8cSSatish Balay ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2507*a30f8f8cSSatish Balay ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr); 2508*a30f8f8cSSatish Balay } 2509*a30f8f8cSSatish Balay /* read in the stuff for the last proc */ 2510*a30f8f8cSSatish Balay if (size != 1) { 2511*a30f8f8cSSatish Balay nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 2512*a30f8f8cSSatish Balay ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2513*a30f8f8cSSatish Balay for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 2514*a30f8f8cSSatish Balay ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr); 2515*a30f8f8cSSatish Balay } 2516*a30f8f8cSSatish Balay ierr = PetscFree(cols);CHKERRQ(ierr); 2517*a30f8f8cSSatish Balay } else { 2518*a30f8f8cSSatish Balay /* determine buffer space needed for message */ 2519*a30f8f8cSSatish Balay nz = 0; 2520*a30f8f8cSSatish Balay for (i=0; i<m; i++) { 2521*a30f8f8cSSatish Balay nz += locrowlens[i]; 2522*a30f8f8cSSatish Balay } 2523*a30f8f8cSSatish Balay ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf); 2524*a30f8f8cSSatish Balay mycols = ibuf; 2525*a30f8f8cSSatish Balay /* receive message of column indices*/ 2526*a30f8f8cSSatish Balay ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr); 2527*a30f8f8cSSatish Balay ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr); 2528*a30f8f8cSSatish Balay if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2529*a30f8f8cSSatish Balay } 2530*a30f8f8cSSatish Balay 2531*a30f8f8cSSatish Balay /* loop over local rows, determining number of off diagonal entries */ 2532*a30f8f8cSSatish Balay dlens = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens); 2533*a30f8f8cSSatish Balay odlens = dlens + (rend-rstart); 2534*a30f8f8cSSatish Balay mask = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask); 2535*a30f8f8cSSatish Balay ierr = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr); 2536*a30f8f8cSSatish Balay masked1 = mask + Mbs; 2537*a30f8f8cSSatish Balay masked2 = masked1 + Mbs; 2538*a30f8f8cSSatish Balay rowcount = 0; nzcount = 0; 2539*a30f8f8cSSatish Balay for (i=0; i<mbs; i++) { 2540*a30f8f8cSSatish Balay dcount = 0; 2541*a30f8f8cSSatish Balay odcount = 0; 2542*a30f8f8cSSatish Balay for (j=0; j<bs; j++) { 2543*a30f8f8cSSatish Balay kmax = locrowlens[rowcount]; 2544*a30f8f8cSSatish Balay for (k=0; k<kmax; k++) { 2545*a30f8f8cSSatish Balay tmp = mycols[nzcount++]/bs; 2546*a30f8f8cSSatish Balay if (!mask[tmp]) { 2547*a30f8f8cSSatish Balay mask[tmp] = 1; 2548*a30f8f8cSSatish Balay if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 2549*a30f8f8cSSatish Balay else masked1[dcount++] = tmp; 2550*a30f8f8cSSatish Balay } 2551*a30f8f8cSSatish Balay } 2552*a30f8f8cSSatish Balay rowcount++; 2553*a30f8f8cSSatish Balay } 2554*a30f8f8cSSatish Balay 2555*a30f8f8cSSatish Balay dlens[i] = dcount; 2556*a30f8f8cSSatish Balay odlens[i] = odcount; 2557*a30f8f8cSSatish Balay 2558*a30f8f8cSSatish Balay /* zero out the mask elements we set */ 2559*a30f8f8cSSatish Balay for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 2560*a30f8f8cSSatish Balay for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 2561*a30f8f8cSSatish Balay } 2562*a30f8f8cSSatish Balay 2563*a30f8f8cSSatish Balay /* create our matrix */ 2564*a30f8f8cSSatish Balay ierr = MatCreateMPISBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr); 2565*a30f8f8cSSatish Balay A = *newmat; 2566*a30f8f8cSSatish Balay MatSetOption(A,MAT_COLUMNS_SORTED); 2567*a30f8f8cSSatish Balay 2568*a30f8f8cSSatish Balay if (!rank) { 2569*a30f8f8cSSatish Balay buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf); 2570*a30f8f8cSSatish Balay /* read in my part of the matrix numerical values */ 2571*a30f8f8cSSatish Balay nz = procsnz[0]; 2572*a30f8f8cSSatish Balay vals = buf; 2573*a30f8f8cSSatish Balay mycols = ibuf; 2574*a30f8f8cSSatish Balay if (size == 1) nz -= extra_rows; 2575*a30f8f8cSSatish Balay ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2576*a30f8f8cSSatish Balay if (size == 1) for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; } 2577*a30f8f8cSSatish Balay 2578*a30f8f8cSSatish Balay /* insert into matrix */ 2579*a30f8f8cSSatish Balay jj = rstart*bs; 2580*a30f8f8cSSatish Balay for (i=0; i<m; i++) { 2581*a30f8f8cSSatish Balay ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2582*a30f8f8cSSatish Balay mycols += locrowlens[i]; 2583*a30f8f8cSSatish Balay vals += locrowlens[i]; 2584*a30f8f8cSSatish Balay jj++; 2585*a30f8f8cSSatish Balay } 2586*a30f8f8cSSatish Balay /* read in other processors (except the last one) and ship out */ 2587*a30f8f8cSSatish Balay for (i=1; i<size-1; i++) { 2588*a30f8f8cSSatish Balay nz = procsnz[i]; 2589*a30f8f8cSSatish Balay vals = buf; 2590*a30f8f8cSSatish Balay ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2591*a30f8f8cSSatish Balay ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr); 2592*a30f8f8cSSatish Balay } 2593*a30f8f8cSSatish Balay /* the last proc */ 2594*a30f8f8cSSatish Balay if (size != 1){ 2595*a30f8f8cSSatish Balay nz = procsnz[i] - extra_rows; 2596*a30f8f8cSSatish Balay vals = buf; 2597*a30f8f8cSSatish Balay ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2598*a30f8f8cSSatish Balay for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 2599*a30f8f8cSSatish Balay ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr); 2600*a30f8f8cSSatish Balay } 2601*a30f8f8cSSatish Balay ierr = PetscFree(procsnz);CHKERRQ(ierr); 2602*a30f8f8cSSatish Balay } else { 2603*a30f8f8cSSatish Balay /* receive numeric values */ 2604*a30f8f8cSSatish Balay buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf); 2605*a30f8f8cSSatish Balay 2606*a30f8f8cSSatish Balay /* receive message of values*/ 2607*a30f8f8cSSatish Balay vals = buf; 2608*a30f8f8cSSatish Balay mycols = ibuf; 2609*a30f8f8cSSatish Balay ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr); 2610*a30f8f8cSSatish Balay ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr); 2611*a30f8f8cSSatish Balay if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file"); 2612*a30f8f8cSSatish Balay 2613*a30f8f8cSSatish Balay /* insert into matrix */ 2614*a30f8f8cSSatish Balay jj = rstart*bs; 2615*a30f8f8cSSatish Balay for (i=0; i<m; i++) { 2616*a30f8f8cSSatish Balay ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 2617*a30f8f8cSSatish Balay mycols += locrowlens[i]; 2618*a30f8f8cSSatish Balay vals += locrowlens[i]; 2619*a30f8f8cSSatish Balay jj++; 2620*a30f8f8cSSatish Balay } 2621*a30f8f8cSSatish Balay } 2622*a30f8f8cSSatish Balay ierr = PetscFree(locrowlens);CHKERRQ(ierr); 2623*a30f8f8cSSatish Balay ierr = PetscFree(buf);CHKERRQ(ierr); 2624*a30f8f8cSSatish Balay ierr = PetscFree(ibuf);CHKERRQ(ierr); 2625*a30f8f8cSSatish Balay ierr = PetscFree(rowners);CHKERRQ(ierr); 2626*a30f8f8cSSatish Balay ierr = PetscFree(dlens);CHKERRQ(ierr); 2627*a30f8f8cSSatish Balay ierr = PetscFree(mask);CHKERRQ(ierr); 2628*a30f8f8cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2629*a30f8f8cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2630*a30f8f8cSSatish Balay PetscFunctionReturn(0); 2631*a30f8f8cSSatish Balay } 2632*a30f8f8cSSatish Balay 2633*a30f8f8cSSatish Balay #undef __FUNC__ 2634*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor" 2635*a30f8f8cSSatish Balay /*@ 2636*a30f8f8cSSatish Balay MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 2637*a30f8f8cSSatish Balay 2638*a30f8f8cSSatish Balay Input Parameters: 2639*a30f8f8cSSatish Balay . mat - the matrix 2640*a30f8f8cSSatish Balay . fact - factor 2641*a30f8f8cSSatish Balay 2642*a30f8f8cSSatish Balay Collective on Mat 2643*a30f8f8cSSatish Balay 2644*a30f8f8cSSatish Balay Level: advanced 2645*a30f8f8cSSatish Balay 2646*a30f8f8cSSatish Balay Notes: 2647*a30f8f8cSSatish Balay This can also be set by the command line option: -mat_use_hash_table fact 2648*a30f8f8cSSatish Balay 2649*a30f8f8cSSatish Balay .keywords: matrix, hashtable, factor, HT 2650*a30f8f8cSSatish Balay 2651*a30f8f8cSSatish Balay .seealso: MatSetOption() 2652*a30f8f8cSSatish Balay @*/ 2653*a30f8f8cSSatish Balay int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact) 2654*a30f8f8cSSatish Balay { 2655*a30f8f8cSSatish Balay Mat_MPIBAIJ *baij; 2656*a30f8f8cSSatish Balay 2657*a30f8f8cSSatish Balay PetscFunctionBegin; 2658*a30f8f8cSSatish Balay PetscValidHeaderSpecific(mat,MAT_COOKIE); 2659*a30f8f8cSSatish Balay if (mat->type != MATMPIBAIJ) { 2660*a30f8f8cSSatish Balay SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only."); 2661*a30f8f8cSSatish Balay } 2662*a30f8f8cSSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 2663*a30f8f8cSSatish Balay baij->ht_fact = fact; 2664*a30f8f8cSSatish Balay PetscFunctionReturn(0); 2665*a30f8f8cSSatish Balay } 2666