1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2c5d9258eSSatish Balay 3b51a4376SLisandro Dalcin #include <petsc/private/hashseti.h> 4c6db04a5SJed Brown #include <petscblaslapack.h> 565a92638SMatthew G. Knepley #include <petscsf.h> 679bdfe76SSatish Balay 77ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 87ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 97ea3e4caSstefano_zampini #endif 107ea3e4caSstefano_zampini 11985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 127843d17aSBarry Smith { 137843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14dfbe8321SBarry Smith PetscErrorCode ierr; 154e879edeSHong Zhang PetscInt i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs; 164e879edeSHong Zhang PetscScalar *va,*vv; 174e879edeSHong Zhang Vec vB,vA; 184e879edeSHong Zhang const PetscScalar *vb; 197843d17aSBarry Smith 207843d17aSBarry Smith PetscFunctionBegin; 214e879edeSHong Zhang ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 224e879edeSHong Zhang ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 234e879edeSHong Zhang 244e879edeSHong Zhang ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 25985db425SBarry Smith if (idx) { 264e879edeSHong Zhang for (i=0; i<m; i++) { 2726fbe8dcSKarl Rupp if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2826fbe8dcSKarl Rupp } 29985db425SBarry Smith } 307843d17aSBarry Smith 314e879edeSHong Zhang ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 3243359b5eSHong Zhang ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 334e879edeSHong Zhang ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 347843d17aSBarry Smith 354e879edeSHong Zhang ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 364e879edeSHong Zhang ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 374e879edeSHong Zhang for (i=0; i<m; i++) { 3826fbe8dcSKarl Rupp if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 394e879edeSHong Zhang vv[i] = vb[i]; 404e879edeSHong Zhang if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 414e879edeSHong Zhang } else { 424e879edeSHong Zhang vv[i] = va[i]; 4343359b5eSHong Zhang if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs)) 444e879edeSHong Zhang idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 4526fbe8dcSKarl Rupp } 467843d17aSBarry Smith } 474e879edeSHong Zhang ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 484e879edeSHong Zhang ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 494e879edeSHong Zhang ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 50c31cb41cSBarry Smith ierr = PetscFree(idxb);CHKERRQ(ierr); 514e879edeSHong Zhang ierr = VecDestroy(&vA);CHKERRQ(ierr); 524e879edeSHong Zhang ierr = VecDestroy(&vB);CHKERRQ(ierr); 537843d17aSBarry Smith PetscFunctionReturn(0); 547843d17aSBarry Smith } 557843d17aSBarry Smith 567087cfbeSBarry Smith PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 577fc3c18eSBarry Smith { 587fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 59dfbe8321SBarry Smith PetscErrorCode ierr; 607fc3c18eSBarry Smith 617fc3c18eSBarry Smith PetscFunctionBegin; 627fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 637fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 647fc3c18eSBarry Smith PetscFunctionReturn(0); 657fc3c18eSBarry Smith } 667fc3c18eSBarry Smith 677087cfbeSBarry Smith PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 687fc3c18eSBarry Smith { 697fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 70dfbe8321SBarry Smith PetscErrorCode ierr; 717fc3c18eSBarry Smith 727fc3c18eSBarry Smith PetscFunctionBegin; 737fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 747fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 757fc3c18eSBarry Smith PetscFunctionReturn(0); 767fc3c18eSBarry Smith } 777fc3c18eSBarry Smith 78537820f0SBarry Smith /* 79537820f0SBarry Smith Local utility routine that creates a mapping from the global column 8057b952d6SSatish Balay number to the local number in the off-diagonal part of the local 81e06f6af7SJed Brown storage of the matrix. This is done in a non scalable way since the 8257b952d6SSatish Balay length of colmap equals the global matrix length. 8357b952d6SSatish Balay */ 84ab9863d7SBarry Smith PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 8557b952d6SSatish Balay { 8657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 8757b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 886849ba73SBarry Smith PetscErrorCode ierr; 89d0f46423SBarry Smith PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 9057b952d6SSatish Balay 91d64ed03dSBarry Smith PetscFunctionBegin; 92aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 93e23dfa41SBarry Smith ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 9448e59246SSatish Balay for (i=0; i<nbs; i++) { 953861aac3SJed Brown ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 9648e59246SSatish Balay } 9748e59246SSatish Balay #else 98580bdb30SBarry Smith ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 993bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 100928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 10148e59246SSatish Balay #endif 1023a40ed3dSBarry Smith PetscFunctionReturn(0); 10357b952d6SSatish Balay } 10457b952d6SSatish Balay 105d40312a9SBarry Smith #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 10680c1aa95SSatish Balay { \ 10780c1aa95SSatish Balay brow = row/bs; \ 10880c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 109ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 11080c1aa95SSatish Balay bcol = col/bs; \ 11180c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 112ab26458aSBarry Smith low = 0; high = nrow; \ 113ab26458aSBarry Smith while (high-low > 3) { \ 114ab26458aSBarry Smith t = (low+high)/2; \ 115ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 116ab26458aSBarry Smith else low = t; \ 117ab26458aSBarry Smith } \ 118ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 11980c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 12080c1aa95SSatish Balay if (rp[_i] == bcol) { \ 12180c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 122eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 123eada6651SSatish Balay else *bap = value; \ 124ac7a638eSSatish Balay goto a_noinsert; \ 12580c1aa95SSatish Balay } \ 12680c1aa95SSatish Balay } \ 12789280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 128d40312a9SBarry Smith if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 129fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 13080c1aa95SSatish Balay N = nrow++ - 1; \ 13180c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 132580bdb30SBarry Smith ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 133580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 134580bdb30SBarry Smith ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 13580c1aa95SSatish Balay rp[_i] = bcol; \ 13680c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 137ac7a638eSSatish Balay a_noinsert:; \ 13880c1aa95SSatish Balay ailen[brow] = nrow; \ 13980c1aa95SSatish Balay } 14057b952d6SSatish Balay 141d40312a9SBarry Smith #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 142ac7a638eSSatish Balay { \ 143ac7a638eSSatish Balay brow = row/bs; \ 144ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 145ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 146ac7a638eSSatish Balay bcol = col/bs; \ 147ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 148ac7a638eSSatish Balay low = 0; high = nrow; \ 149ac7a638eSSatish Balay while (high-low > 3) { \ 150ac7a638eSSatish Balay t = (low+high)/2; \ 151ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 152ac7a638eSSatish Balay else low = t; \ 153ac7a638eSSatish Balay } \ 154ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 155ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 156ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 157ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 158ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 159ac7a638eSSatish Balay else *bap = value; \ 160ac7a638eSSatish Balay goto b_noinsert; \ 161ac7a638eSSatish Balay } \ 162ac7a638eSSatish Balay } \ 16389280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 164d40312a9SBarry Smith if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 165fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 166ac7a638eSSatish Balay N = nrow++ - 1; \ 167ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 168580bdb30SBarry Smith ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 169580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\ 170580bdb30SBarry Smith ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 171ac7a638eSSatish Balay rp[_i] = bcol; \ 172ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 173ac7a638eSSatish Balay b_noinsert:; \ 174ac7a638eSSatish Balay bilen[brow] = nrow; \ 175ac7a638eSSatish Balay } 176ac7a638eSSatish Balay 177b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 17857b952d6SSatish Balay { 17957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 18093fea6afSBarry Smith MatScalar value; 181ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 182dfbe8321SBarry Smith PetscErrorCode ierr; 183b24ad042SBarry Smith PetscInt i,j,row,col; 184d0f46423SBarry Smith PetscInt rstart_orig=mat->rmap->rstart; 185d0f46423SBarry Smith PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 186d0f46423SBarry Smith PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 18757b952d6SSatish Balay 188eada6651SSatish Balay /* Some Variables required in the macro */ 18980c1aa95SSatish Balay Mat A = baij->A; 19080c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 191b24ad042SBarry Smith PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 1923eda8832SBarry Smith MatScalar *aa =a->a; 193ac7a638eSSatish Balay 194ac7a638eSSatish Balay Mat B = baij->B; 195ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 196b24ad042SBarry Smith PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 1973eda8832SBarry Smith MatScalar *ba =b->a; 198ac7a638eSSatish Balay 199b24ad042SBarry Smith PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 200b24ad042SBarry Smith PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 2013eda8832SBarry Smith MatScalar *ap,*bap; 20280c1aa95SSatish Balay 203d64ed03dSBarry Smith PetscFunctionBegin; 20457b952d6SSatish Balay for (i=0; i<m; i++) { 2055ef9f2a5SBarry Smith if (im[i] < 0) continue; 206c1758adbSBarry Smith if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 20757b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 20857b952d6SSatish Balay row = im[i] - rstart_orig; 20957b952d6SSatish Balay for (j=0; j<n; j++) { 21057b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig) { 21157b952d6SSatish Balay col = in[j] - cstart_orig; 212db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 213db4deed7SKarl Rupp else value = v[i+j*m]; 214d40312a9SBarry Smith MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 21573959e64SBarry Smith } else if (in[j] < 0) continue; 216c1758adbSBarry Smith else if (PetscUnlikely(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 2179245e749SBarry Smith else { 21857b952d6SSatish Balay if (mat->was_assembled) { 219905e6a2fSBarry Smith if (!baij->colmap) { 220ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 221905e6a2fSBarry Smith } 222aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 2230f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 224bba1ac68SSatish Balay col = col - 1; 22548e59246SSatish Balay #else 226bba1ac68SSatish Balay col = baij->colmap[in[j]/bs] - 1; 22748e59246SSatish Balay #endif 228c9ef50b2SBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 229ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 2308295de27SSatish Balay col = in[j]; 2319bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2329bf004c3SSatish Balay B = baij->B; 2339bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 2349bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2359bf004c3SSatish Balay ba =b->a; 236c9ef50b2SBarry Smith } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 237c9ef50b2SBarry Smith else col += in[j]%bs; 2388295de27SSatish Balay } else col = in[j]; 239db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 240db4deed7SKarl Rupp else value = v[i+j*m]; 241d40312a9SBarry Smith MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 24290da58bdSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 24357b952d6SSatish Balay } 24457b952d6SSatish Balay } 245d64ed03dSBarry Smith } else { 2464cb17eb5SBarry Smith if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 24790f02eecSBarry Smith if (!baij->donotstash) { 2485080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 249ff2fd236SBarry Smith if (roworiented) { 250b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 251ff2fd236SBarry Smith } else { 252b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 25357b952d6SSatish Balay } 25457b952d6SSatish Balay } 25557b952d6SSatish Balay } 25690f02eecSBarry Smith } 2573a40ed3dSBarry Smith PetscFunctionReturn(0); 25857b952d6SSatish Balay } 25957b952d6SSatish Balay 2608ab52850SBarry Smith PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 261880c6e6aSBarry Smith { 262880c6e6aSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2638ab52850SBarry Smith PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 264880c6e6aSBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 265880c6e6aSBarry Smith PetscErrorCode ierr; 2668ab52850SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 267880c6e6aSBarry Smith PetscBool roworiented=a->roworiented; 268880c6e6aSBarry Smith const PetscScalar *value = v; 269880c6e6aSBarry Smith MatScalar *ap,*aa = a->a,*bap; 270880c6e6aSBarry Smith 271880c6e6aSBarry Smith PetscFunctionBegin; 272880c6e6aSBarry Smith rp = aj + ai[row]; 273880c6e6aSBarry Smith ap = aa + bs2*ai[row]; 274880c6e6aSBarry Smith rmax = imax[row]; 275880c6e6aSBarry Smith nrow = ailen[row]; 2768ab52850SBarry Smith value = v; 2778ab52850SBarry Smith low = 0; 2788ab52850SBarry Smith high = nrow; 279880c6e6aSBarry Smith while (high-low > 7) { 280880c6e6aSBarry Smith t = (low+high)/2; 281880c6e6aSBarry Smith if (rp[t] > col) high = t; 282880c6e6aSBarry Smith else low = t; 283880c6e6aSBarry Smith } 284880c6e6aSBarry Smith for (i=low; i<high; i++) { 285880c6e6aSBarry Smith if (rp[i] > col) break; 286880c6e6aSBarry Smith if (rp[i] == col) { 287880c6e6aSBarry Smith bap = ap + bs2*i; 288880c6e6aSBarry Smith if (roworiented) { 289880c6e6aSBarry Smith if (is == ADD_VALUES) { 2908ab52850SBarry Smith for (ii=0; ii<bs; ii++) { 291880c6e6aSBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 292880c6e6aSBarry Smith bap[jj] += *value++; 293880c6e6aSBarry Smith } 294880c6e6aSBarry Smith } 295880c6e6aSBarry Smith } else { 2968ab52850SBarry Smith for (ii=0; ii<bs; ii++) { 297880c6e6aSBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 298880c6e6aSBarry Smith bap[jj] = *value++; 299880c6e6aSBarry Smith } 300880c6e6aSBarry Smith } 301880c6e6aSBarry Smith } 302880c6e6aSBarry Smith } else { 303880c6e6aSBarry Smith if (is == ADD_VALUES) { 3048ab52850SBarry Smith for (ii=0; ii<bs; ii++,value+=bs) { 305880c6e6aSBarry Smith for (jj=0; jj<bs; jj++) { 306880c6e6aSBarry Smith bap[jj] += value[jj]; 307880c6e6aSBarry Smith } 308880c6e6aSBarry Smith bap += bs; 309880c6e6aSBarry Smith } 310880c6e6aSBarry Smith } else { 3118ab52850SBarry Smith for (ii=0; ii<bs; ii++,value+=bs) { 312880c6e6aSBarry Smith for (jj=0; jj<bs; jj++) { 313880c6e6aSBarry Smith bap[jj] = value[jj]; 314880c6e6aSBarry Smith } 315880c6e6aSBarry Smith bap += bs; 316880c6e6aSBarry Smith } 317880c6e6aSBarry Smith } 318880c6e6aSBarry Smith } 319880c6e6aSBarry Smith goto noinsert2; 320880c6e6aSBarry Smith } 321880c6e6aSBarry Smith } 322880c6e6aSBarry Smith if (nonew == 1) goto noinsert2; 323880c6e6aSBarry Smith if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol); 324880c6e6aSBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 325880c6e6aSBarry Smith N = nrow++ - 1; high++; 326880c6e6aSBarry Smith /* shift up all the later entries in this row */ 327580bdb30SBarry Smith ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 328580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 329880c6e6aSBarry Smith rp[i] = col; 330880c6e6aSBarry Smith bap = ap + bs2*i; 331880c6e6aSBarry Smith if (roworiented) { 3328ab52850SBarry Smith for (ii=0; ii<bs; ii++) { 333880c6e6aSBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 334880c6e6aSBarry Smith bap[jj] = *value++; 335880c6e6aSBarry Smith } 336880c6e6aSBarry Smith } 337880c6e6aSBarry Smith } else { 3388ab52850SBarry Smith for (ii=0; ii<bs; ii++) { 339880c6e6aSBarry Smith for (jj=0; jj<bs; jj++) { 340880c6e6aSBarry Smith *bap++ = *value++; 341880c6e6aSBarry Smith } 342880c6e6aSBarry Smith } 343880c6e6aSBarry Smith } 344880c6e6aSBarry Smith noinsert2:; 345880c6e6aSBarry Smith ailen[row] = nrow; 346880c6e6aSBarry Smith PetscFunctionReturn(0); 347880c6e6aSBarry Smith } 348880c6e6aSBarry Smith 3498ab52850SBarry Smith /* 3508ab52850SBarry Smith This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 3518ab52850SBarry Smith by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 3528ab52850SBarry Smith */ 35397e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 354ab26458aSBarry Smith { 355ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 356dd6ea824SBarry Smith const PetscScalar *value; 357f15d580aSBarry Smith MatScalar *barray = baij->barray; 358ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 359dfbe8321SBarry Smith PetscErrorCode ierr; 360899cda47SBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 361899cda47SBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 362d0f46423SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 363ab26458aSBarry Smith 364b16ae2b1SBarry Smith PetscFunctionBegin; 36530793edcSSatish Balay if (!barray) { 366785e854fSJed Brown ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 36782502324SSatish Balay baij->barray = barray; 36830793edcSSatish Balay } 36930793edcSSatish Balay 37026fbe8dcSKarl Rupp if (roworiented) stepval = (n-1)*bs; 37126fbe8dcSKarl Rupp else stepval = (m-1)*bs; 37226fbe8dcSKarl Rupp 373ab26458aSBarry Smith for (i=0; i<m; i++) { 3745ef9f2a5SBarry Smith if (im[i] < 0) continue; 375cf9c20a2SJed Brown if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1); 376ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 377ab26458aSBarry Smith row = im[i] - rstart; 378ab26458aSBarry Smith for (j=0; j<n; j++) { 37915b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 38015b57d14SSatish Balay if ((roworiented) && (n == 1)) { 381f15d580aSBarry Smith barray = (MatScalar*)v + i*bs2; 38215b57d14SSatish Balay } else if ((!roworiented) && (m == 1)) { 383f15d580aSBarry Smith barray = (MatScalar*)v + j*bs2; 38415b57d14SSatish Balay } else { /* Here a copy is required */ 385ab26458aSBarry Smith if (roworiented) { 38653ef36baSBarry Smith value = v + (i*(stepval+bs) + j)*bs; 387ab26458aSBarry Smith } else { 38853ef36baSBarry Smith value = v + (j*(stepval+bs) + i)*bs; 389abef11f7SSatish Balay } 39053ef36baSBarry Smith for (ii=0; ii<bs; ii++,value+=bs+stepval) { 39126fbe8dcSKarl Rupp for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 39253ef36baSBarry Smith barray += bs; 39347513183SBarry Smith } 39430793edcSSatish Balay barray -= bs2; 39515b57d14SSatish Balay } 396abef11f7SSatish Balay 397abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend) { 398abef11f7SSatish Balay col = in[j] - cstart; 3998ab52850SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 40026fbe8dcSKarl Rupp } else if (in[j] < 0) continue; 4019245e749SBarry Smith else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1); 4029245e749SBarry Smith else { 403ab26458aSBarry Smith if (mat->was_assembled) { 404ab26458aSBarry Smith if (!baij->colmap) { 405ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 406ab26458aSBarry Smith } 407a5eb4965SSatish Balay 4082515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 409aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 410b24ad042SBarry Smith { PetscInt data; 4110f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 412e32f2f54SBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 413fa46199cSSatish Balay } 41448e59246SSatish Balay #else 415e32f2f54SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 416a5eb4965SSatish Balay #endif 41748e59246SSatish Balay #endif 418aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 4190f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 420fa46199cSSatish Balay col = (col - 1)/bs; 42148e59246SSatish Balay #else 422a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 42348e59246SSatish Balay #endif 4240e9bae81SBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 425ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 426ab26458aSBarry Smith col = in[j]; 427bb003d0fSBarry Smith } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]); 428db4deed7SKarl Rupp } else col = in[j]; 4298ab52850SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 430ab26458aSBarry Smith } 431ab26458aSBarry Smith } 432d64ed03dSBarry Smith } else { 433bb003d0fSBarry Smith if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 434ab26458aSBarry Smith if (!baij->donotstash) { 435ff2fd236SBarry Smith if (roworiented) { 4366fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 437ff2fd236SBarry Smith } else { 4386fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 439ff2fd236SBarry Smith } 440abef11f7SSatish Balay } 441ab26458aSBarry Smith } 442ab26458aSBarry Smith } 4433a40ed3dSBarry Smith PetscFunctionReturn(0); 444ab26458aSBarry Smith } 4456fa18ffdSBarry Smith 4460bdbc534SSatish Balay #define HASH_KEY 0.6180339887 447b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 448b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 449b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 45097e5c40aSBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 4510bdbc534SSatish Balay { 4520bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 453ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 454dfbe8321SBarry Smith PetscErrorCode ierr; 455b24ad042SBarry Smith PetscInt i,j,row,col; 456d0f46423SBarry Smith PetscInt rstart_orig=mat->rmap->rstart; 457d0f46423SBarry Smith PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 458d0f46423SBarry Smith PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 459329f5518SBarry Smith PetscReal tmp; 4603eda8832SBarry Smith MatScalar **HD = baij->hd,value; 461b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4620bdbc534SSatish Balay 4630bdbc534SSatish Balay PetscFunctionBegin; 4640bdbc534SSatish Balay for (i=0; i<m; i++) { 46576bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 466e32f2f54SBarry Smith if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 467e32f2f54SBarry Smith if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 46876bd3646SJed Brown } 4690bdbc534SSatish Balay row = im[i]; 470c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 4710bdbc534SSatish Balay for (j=0; j<n; j++) { 4720bdbc534SSatish Balay col = in[j]; 473db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 474db4deed7SKarl Rupp else value = v[i+j*m]; 475b24ad042SBarry Smith /* Look up PetscInto the Hash Table */ 476c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 477c2760754SSatish Balay h1 = HASH(size,key,tmp); 4780bdbc534SSatish Balay 479c2760754SSatish Balay 480c2760754SSatish Balay idx = h1; 48176bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 482187ce0cbSSatish Balay insert_ct++; 483187ce0cbSSatish Balay total_ct++; 484187ce0cbSSatish Balay if (HT[idx] != key) { 485187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 486187ce0cbSSatish Balay if (idx == size) { 487187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 488f23aa3ddSBarry Smith if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 489187ce0cbSSatish Balay } 490187ce0cbSSatish Balay } 49176bd3646SJed Brown } else if (HT[idx] != key) { 492c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 493c2760754SSatish Balay if (idx == size) { 494c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 495f23aa3ddSBarry Smith if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 496c2760754SSatish Balay } 497c2760754SSatish Balay } 498c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 499c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 500c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 5010bdbc534SSatish Balay } 50226fbe8dcSKarl Rupp } else if (!baij->donotstash) { 503ff2fd236SBarry Smith if (roworiented) { 504b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 505ff2fd236SBarry Smith } else { 506b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 5070bdbc534SSatish Balay } 5080bdbc534SSatish Balay } 5090bdbc534SSatish Balay } 51076bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 511abf3b562SBarry Smith baij->ht_total_ct += total_ct; 512abf3b562SBarry Smith baij->ht_insert_ct += insert_ct; 51376bd3646SJed Brown } 5140bdbc534SSatish Balay PetscFunctionReturn(0); 5150bdbc534SSatish Balay } 5160bdbc534SSatish Balay 51797e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 5180bdbc534SSatish Balay { 5190bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 520ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 521dfbe8321SBarry Smith PetscErrorCode ierr; 522b24ad042SBarry Smith PetscInt i,j,ii,jj,row,col; 523899cda47SBarry Smith PetscInt rstart=baij->rstartbs; 524d0f46423SBarry Smith PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 525b24ad042SBarry Smith PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 526329f5518SBarry Smith PetscReal tmp; 5273eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 528dd6ea824SBarry Smith const PetscScalar *v_t,*value; 529b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5300bdbc534SSatish Balay 531d0a41580SSatish Balay PetscFunctionBegin; 53226fbe8dcSKarl Rupp if (roworiented) stepval = (n-1)*bs; 53326fbe8dcSKarl Rupp else stepval = (m-1)*bs; 53426fbe8dcSKarl Rupp 5350bdbc534SSatish Balay for (i=0; i<m; i++) { 53676bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 537e32f2f54SBarry Smith if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 538e32f2f54SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 53976bd3646SJed Brown } 5400bdbc534SSatish Balay row = im[i]; 541ab715e2cSSatish Balay v_t = v + i*nbs2; 542c2760754SSatish Balay if (row >= rstart && row < rend) { 5430bdbc534SSatish Balay for (j=0; j<n; j++) { 5440bdbc534SSatish Balay col = in[j]; 5450bdbc534SSatish Balay 5460bdbc534SSatish Balay /* Look up into the Hash Table */ 547c2760754SSatish Balay key = row*Nbs+col+1; 548c2760754SSatish Balay h1 = HASH(size,key,tmp); 5490bdbc534SSatish Balay 550c2760754SSatish Balay idx = h1; 55176bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 552187ce0cbSSatish Balay total_ct++; 553187ce0cbSSatish Balay insert_ct++; 554187ce0cbSSatish Balay if (HT[idx] != key) { 555187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 556187ce0cbSSatish Balay if (idx == size) { 557187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 558f23aa3ddSBarry Smith if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 559187ce0cbSSatish Balay } 560187ce0cbSSatish Balay } 56176bd3646SJed Brown } else if (HT[idx] != key) { 562c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 563c2760754SSatish Balay if (idx == size) { 564c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 565f23aa3ddSBarry Smith if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 566c2760754SSatish Balay } 567c2760754SSatish Balay } 568c2760754SSatish Balay baij_a = HD[idx]; 5690bdbc534SSatish Balay if (roworiented) { 570c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 571187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 572187ce0cbSSatish Balay value = v_t; 573187ce0cbSSatish Balay v_t += bs; 574fef45726SSatish Balay if (addv == ADD_VALUES) { 575c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 576c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 577fef45726SSatish Balay baij_a[jj] += *value++; 578b4cc0f5aSSatish Balay } 579b4cc0f5aSSatish Balay } 580fef45726SSatish Balay } else { 581c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 582c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 583fef45726SSatish Balay baij_a[jj] = *value++; 584fef45726SSatish Balay } 585fef45726SSatish Balay } 586fef45726SSatish Balay } 5870bdbc534SSatish Balay } else { 5880bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 589fef45726SSatish Balay if (addv == ADD_VALUES) { 590b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 5910bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 592fef45726SSatish Balay baij_a[jj] += *value++; 593fef45726SSatish Balay } 594fef45726SSatish Balay } 595fef45726SSatish Balay } else { 596fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 597fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 598fef45726SSatish Balay baij_a[jj] = *value++; 599fef45726SSatish Balay } 600b4cc0f5aSSatish Balay } 6010bdbc534SSatish Balay } 6020bdbc534SSatish Balay } 6030bdbc534SSatish Balay } 6040bdbc534SSatish Balay } else { 6050bdbc534SSatish Balay if (!baij->donotstash) { 6060bdbc534SSatish Balay if (roworiented) { 6078798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6080bdbc534SSatish Balay } else { 6098798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6100bdbc534SSatish Balay } 6110bdbc534SSatish Balay } 6120bdbc534SSatish Balay } 6130bdbc534SSatish Balay } 61476bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 615abf3b562SBarry Smith baij->ht_total_ct += total_ct; 616abf3b562SBarry Smith baij->ht_insert_ct += insert_ct; 61776bd3646SJed Brown } 6180bdbc534SSatish Balay PetscFunctionReturn(0); 6190bdbc534SSatish Balay } 620133cdb44SSatish Balay 621b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 622d6de1c52SSatish Balay { 623d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 6246849ba73SBarry Smith PetscErrorCode ierr; 625d0f46423SBarry Smith PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 626d0f46423SBarry Smith PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 627d6de1c52SSatish Balay 628133cdb44SSatish Balay PetscFunctionBegin; 629d6de1c52SSatish Balay for (i=0; i<m; i++) { 630e32f2f54SBarry Smith if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 631e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 632d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 633d6de1c52SSatish Balay row = idxm[i] - bsrstart; 634d6de1c52SSatish Balay for (j=0; j<n; j++) { 635e32f2f54SBarry Smith if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 636e32f2f54SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 637d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend) { 638d6de1c52SSatish Balay col = idxn[j] - bscstart; 63998dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 640d64ed03dSBarry Smith } else { 641905e6a2fSBarry Smith if (!baij->colmap) { 642ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 643905e6a2fSBarry Smith } 644aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 6450f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 646fa46199cSSatish Balay data--; 64748e59246SSatish Balay #else 64848e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 64948e59246SSatish Balay #endif 65048e59246SSatish Balay if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 651d9d09a02SSatish Balay else { 65248e59246SSatish Balay col = data + idxn[j]%bs; 65398dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 654d6de1c52SSatish Balay } 655d6de1c52SSatish Balay } 656d6de1c52SSatish Balay } 657f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 658d6de1c52SSatish Balay } 6593a40ed3dSBarry Smith PetscFunctionReturn(0); 660d6de1c52SSatish Balay } 661d6de1c52SSatish Balay 662dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 663d6de1c52SSatish Balay { 664d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 665d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 666dfbe8321SBarry Smith PetscErrorCode ierr; 667d0f46423SBarry Smith PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 668329f5518SBarry Smith PetscReal sum = 0.0; 6693eda8832SBarry Smith MatScalar *v; 670d6de1c52SSatish Balay 671d64ed03dSBarry Smith PetscFunctionBegin; 672d6de1c52SSatish Balay if (baij->size == 1) { 673064f8208SBarry Smith ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 674d6de1c52SSatish Balay } else { 675d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 676d6de1c52SSatish Balay v = amat->a; 6778a62d963SHong Zhang nz = amat->nz*bs2; 6788a62d963SHong Zhang for (i=0; i<nz; i++) { 679329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 680d6de1c52SSatish Balay } 681d6de1c52SSatish Balay v = bmat->a; 6828a62d963SHong Zhang nz = bmat->nz*bs2; 6838a62d963SHong Zhang for (i=0; i<nz; i++) { 684329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 685d6de1c52SSatish Balay } 686b2566f29SBarry Smith ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6878f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 6888a62d963SHong Zhang } else if (type == NORM_1) { /* max column sum */ 6898a62d963SHong Zhang PetscReal *tmp,*tmp2; 690899cda47SBarry Smith PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 6918f8f2f0dSBarry Smith ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr); 692857a15f1SBarry Smith ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr); 6938a62d963SHong Zhang v = amat->a; jj = amat->j; 6948a62d963SHong Zhang for (i=0; i<amat->nz; i++) { 6958a62d963SHong Zhang for (j=0; j<bs; j++) { 6968a62d963SHong Zhang col = bs*(cstart + *jj) + j; /* column index */ 6978a62d963SHong Zhang for (row=0; row<bs; row++) { 6988a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 6998a62d963SHong Zhang } 7008a62d963SHong Zhang } 7018a62d963SHong Zhang jj++; 7028a62d963SHong Zhang } 7038a62d963SHong Zhang v = bmat->a; jj = bmat->j; 7048a62d963SHong Zhang for (i=0; i<bmat->nz; i++) { 7058a62d963SHong Zhang for (j=0; j<bs; j++) { 7068a62d963SHong Zhang col = bs*garray[*jj] + j; 7078a62d963SHong Zhang for (row=0; row<bs; row++) { 7088a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 7098a62d963SHong Zhang } 7108a62d963SHong Zhang } 7118a62d963SHong Zhang jj++; 7128a62d963SHong Zhang } 713b2566f29SBarry Smith ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7148a62d963SHong Zhang *nrm = 0.0; 715d0f46423SBarry Smith for (j=0; j<mat->cmap->N; j++) { 7168a62d963SHong Zhang if (tmp2[j] > *nrm) *nrm = tmp2[j]; 7178a62d963SHong Zhang } 718857a15f1SBarry Smith ierr = PetscFree(tmp);CHKERRQ(ierr); 719857a15f1SBarry Smith ierr = PetscFree(tmp2);CHKERRQ(ierr); 7208a62d963SHong Zhang } else if (type == NORM_INFINITY) { /* max row sum */ 721577dd1f9SKris Buschelman PetscReal *sums; 722785e854fSJed Brown ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 7238a62d963SHong Zhang sum = 0.0; 7248a62d963SHong Zhang for (j=0; j<amat->mbs; j++) { 7258a62d963SHong Zhang for (row=0; row<bs; row++) sums[row] = 0.0; 7268a62d963SHong Zhang v = amat->a + bs2*amat->i[j]; 7278a62d963SHong Zhang nz = amat->i[j+1]-amat->i[j]; 7288a62d963SHong Zhang for (i=0; i<nz; i++) { 7298a62d963SHong Zhang for (col=0; col<bs; col++) { 7308a62d963SHong Zhang for (row=0; row<bs; row++) { 7318a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 7328a62d963SHong Zhang } 7338a62d963SHong Zhang } 7348a62d963SHong Zhang } 7358a62d963SHong Zhang v = bmat->a + bs2*bmat->i[j]; 7368a62d963SHong Zhang nz = bmat->i[j+1]-bmat->i[j]; 7378a62d963SHong Zhang for (i=0; i<nz; i++) { 7388a62d963SHong Zhang for (col=0; col<bs; col++) { 7398a62d963SHong Zhang for (row=0; row<bs; row++) { 7408a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 7418a62d963SHong Zhang } 7428a62d963SHong Zhang } 7438a62d963SHong Zhang } 7448a62d963SHong Zhang for (row=0; row<bs; row++) { 7458a62d963SHong Zhang if (sums[row] > sum) sum = sums[row]; 7468a62d963SHong Zhang } 7478a62d963SHong Zhang } 748b2566f29SBarry Smith ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 749577dd1f9SKris Buschelman ierr = PetscFree(sums);CHKERRQ(ierr); 750ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 751d64ed03dSBarry Smith } 7523a40ed3dSBarry Smith PetscFunctionReturn(0); 753d6de1c52SSatish Balay } 75457b952d6SSatish Balay 755fef45726SSatish Balay /* 756fef45726SSatish Balay Creates the hash table, and sets the table 757fef45726SSatish Balay This table is created only once. 758fef45726SSatish Balay If new entried need to be added to the matrix 759fef45726SSatish Balay then the hash table has to be destroyed and 760fef45726SSatish Balay recreated. 761fef45726SSatish Balay */ 762dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 763596b8d2eSBarry Smith { 764596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 765596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 766596b8d2eSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 767b24ad042SBarry Smith PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 7686849ba73SBarry Smith PetscErrorCode ierr; 769fca92195SBarry Smith PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 770899cda47SBarry Smith PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 771b24ad042SBarry Smith PetscInt *HT,key; 7723eda8832SBarry Smith MatScalar **HD; 773329f5518SBarry Smith PetscReal tmp; 7746cf91177SBarry Smith #if defined(PETSC_USE_INFO) 775b24ad042SBarry Smith PetscInt ct=0,max=0; 7764a15367fSSatish Balay #endif 777fef45726SSatish Balay 778d64ed03dSBarry Smith PetscFunctionBegin; 779fca92195SBarry Smith if (baij->ht) PetscFunctionReturn(0); 780fef45726SSatish Balay 781fca92195SBarry Smith baij->ht_size = (PetscInt)(factor*nz); 782fca92195SBarry Smith ht_size = baij->ht_size; 7830bdbc534SSatish Balay 784fef45726SSatish Balay /* Allocate Memory for Hash Table */ 7851795a4d1SJed Brown ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 786b9e4cc15SSatish Balay HD = baij->hd; 787a07cd24cSSatish Balay HT = baij->ht; 788b9e4cc15SSatish Balay 789596b8d2eSBarry Smith /* Loop Over A */ 7900bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 791596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 7920bdbc534SSatish Balay row = i+rstart; 7930bdbc534SSatish Balay col = aj[j]+cstart; 794596b8d2eSBarry Smith 795187ce0cbSSatish Balay key = row*Nbs + col + 1; 796fca92195SBarry Smith h1 = HASH(ht_size,key,tmp); 797fca92195SBarry Smith for (k=0; k<ht_size; k++) { 798fca92195SBarry Smith if (!HT[(h1+k)%ht_size]) { 799fca92195SBarry Smith HT[(h1+k)%ht_size] = key; 800fca92195SBarry Smith HD[(h1+k)%ht_size] = a->a + j*bs2; 801596b8d2eSBarry Smith break; 8026cf91177SBarry Smith #if defined(PETSC_USE_INFO) 803187ce0cbSSatish Balay } else { 804187ce0cbSSatish Balay ct++; 805187ce0cbSSatish Balay #endif 806596b8d2eSBarry Smith } 807187ce0cbSSatish Balay } 8086cf91177SBarry Smith #if defined(PETSC_USE_INFO) 809187ce0cbSSatish Balay if (k> max) max = k; 810187ce0cbSSatish Balay #endif 811596b8d2eSBarry Smith } 812596b8d2eSBarry Smith } 813596b8d2eSBarry Smith /* Loop Over B */ 8140bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 815596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 8160bdbc534SSatish Balay row = i+rstart; 8170bdbc534SSatish Balay col = garray[bj[j]]; 818187ce0cbSSatish Balay key = row*Nbs + col + 1; 819fca92195SBarry Smith h1 = HASH(ht_size,key,tmp); 820fca92195SBarry Smith for (k=0; k<ht_size; k++) { 821fca92195SBarry Smith if (!HT[(h1+k)%ht_size]) { 822fca92195SBarry Smith HT[(h1+k)%ht_size] = key; 823fca92195SBarry Smith HD[(h1+k)%ht_size] = b->a + j*bs2; 824596b8d2eSBarry Smith break; 8256cf91177SBarry Smith #if defined(PETSC_USE_INFO) 826187ce0cbSSatish Balay } else { 827187ce0cbSSatish Balay ct++; 828187ce0cbSSatish Balay #endif 829596b8d2eSBarry Smith } 830187ce0cbSSatish Balay } 8316cf91177SBarry Smith #if defined(PETSC_USE_INFO) 832187ce0cbSSatish Balay if (k> max) max = k; 833187ce0cbSSatish Balay #endif 834596b8d2eSBarry Smith } 835596b8d2eSBarry Smith } 836596b8d2eSBarry Smith 837596b8d2eSBarry Smith /* Print Summary */ 8386cf91177SBarry Smith #if defined(PETSC_USE_INFO) 839fca92195SBarry Smith for (i=0,j=0; i<ht_size; i++) { 84026fbe8dcSKarl Rupp if (HT[i]) j++; 841c38d4ed2SBarry Smith } 8421e2582c4SBarry Smith ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 843187ce0cbSSatish Balay #endif 8443a40ed3dSBarry Smith PetscFunctionReturn(0); 845596b8d2eSBarry Smith } 84657b952d6SSatish Balay 847dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 848bbb85fb3SSatish Balay { 849bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 850dfbe8321SBarry Smith PetscErrorCode ierr; 851b24ad042SBarry Smith PetscInt nstash,reallocs; 852bbb85fb3SSatish Balay 853bbb85fb3SSatish Balay PetscFunctionBegin; 85426fbe8dcSKarl Rupp if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 855bbb85fb3SSatish Balay 856d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 8571e2582c4SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 8588798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 8591e2582c4SBarry Smith ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 86046680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 8611e2582c4SBarry Smith ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 862bbb85fb3SSatish Balay PetscFunctionReturn(0); 863bbb85fb3SSatish Balay } 864bbb85fb3SSatish Balay 865dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 866bbb85fb3SSatish Balay { 867bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 86891c97fd4SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 8696849ba73SBarry Smith PetscErrorCode ierr; 870b24ad042SBarry Smith PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 871e44c0bd4SBarry Smith PetscInt *row,*col; 872ace3abfcSBarry Smith PetscBool r1,r2,r3,other_disassembled; 8733eda8832SBarry Smith MatScalar *val; 874b24ad042SBarry Smith PetscMPIInt n; 875bbb85fb3SSatish Balay 876bbb85fb3SSatish Balay PetscFunctionBegin; 8775fd66863SKarl Rupp /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 8784cb17eb5SBarry Smith if (!baij->donotstash && !mat->nooffprocentries) { 879a2d1c673SSatish Balay while (1) { 8808798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 881a2d1c673SSatish Balay if (!flg) break; 882a2d1c673SSatish Balay 883bbb85fb3SSatish Balay for (i=0; i<n;) { 884bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 88526fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 88626fbe8dcSKarl Rupp if (row[j] != rstart) break; 88726fbe8dcSKarl Rupp } 888bbb85fb3SSatish Balay if (j < n) ncols = j-i; 889bbb85fb3SSatish Balay else ncols = n-i; 890bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 8914b4eb8d3SJed Brown ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 892bbb85fb3SSatish Balay i = j; 893bbb85fb3SSatish Balay } 894bbb85fb3SSatish Balay } 8958798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 896a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 897a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 898a2d1c673SSatish Balay restore the original flags */ 899a2d1c673SSatish Balay r1 = baij->roworiented; 900a2d1c673SSatish Balay r2 = a->roworiented; 90191c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 90226fbe8dcSKarl Rupp 9037c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 9047c922b88SBarry Smith a->roworiented = PETSC_FALSE; 90526fbe8dcSKarl Rupp 90691c97fd4SSatish Balay (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 907a2d1c673SSatish Balay while (1) { 9088798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 909a2d1c673SSatish Balay if (!flg) break; 910a2d1c673SSatish Balay 911a2d1c673SSatish Balay for (i=0; i<n;) { 912a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 91326fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 91426fbe8dcSKarl Rupp if (row[j] != rstart) break; 91526fbe8dcSKarl Rupp } 916a2d1c673SSatish Balay if (j < n) ncols = j-i; 917a2d1c673SSatish Balay else ncols = n-i; 9184b4eb8d3SJed Brown ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 919a2d1c673SSatish Balay i = j; 920a2d1c673SSatish Balay } 921a2d1c673SSatish Balay } 9228798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 92326fbe8dcSKarl Rupp 924a2d1c673SSatish Balay baij->roworiented = r1; 925a2d1c673SSatish Balay a->roworiented = r2; 92626fbe8dcSKarl Rupp 92791c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 928bbb85fb3SSatish Balay } 929bbb85fb3SSatish Balay 930bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 931bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 932bbb85fb3SSatish Balay 933bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 934bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 935bbb85fb3SSatish Balay /* 936bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 937bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 938bbb85fb3SSatish Balay */ 939bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 940b2566f29SBarry Smith ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 941bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 942ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 943bbb85fb3SSatish Balay } 944bbb85fb3SSatish Balay } 945bbb85fb3SSatish Balay 946bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 947bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 948bbb85fb3SSatish Balay } 949bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 950bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 951bbb85fb3SSatish Balay 9526cf91177SBarry Smith #if defined(PETSC_USE_INFO) 953bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 954abf3b562SBarry Smith ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 95526fbe8dcSKarl Rupp 956bbb85fb3SSatish Balay baij->ht_total_ct = 0; 957bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 958bbb85fb3SSatish Balay } 959bbb85fb3SSatish Balay #endif 960bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 961bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 96226fbe8dcSKarl Rupp 963bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 964bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 965bbb85fb3SSatish Balay } 966bbb85fb3SSatish Balay 967fca92195SBarry Smith ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 96826fbe8dcSKarl Rupp 969f4259b30SLisandro Dalcin baij->rowvalues = NULL; 9704f9cfa9eSBarry Smith 9714f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 9724f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 973e56f5c9eSBarry Smith PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 974b2566f29SBarry Smith ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 975e56f5c9eSBarry Smith } 976bbb85fb3SSatish Balay PetscFunctionReturn(0); 977bbb85fb3SSatish Balay } 97857b952d6SSatish Balay 9797da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 9809804daf3SBarry Smith #include <petscdraw.h> 9816849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 98257b952d6SSatish Balay { 98357b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 984dfbe8321SBarry Smith PetscErrorCode ierr; 9857da1fb6eSBarry Smith PetscMPIInt rank = baij->rank; 986d0f46423SBarry Smith PetscInt bs = mat->rmap->bs; 987ace3abfcSBarry Smith PetscBool iascii,isdraw; 988b0a32e0cSBarry Smith PetscViewer sviewer; 989f3ef73ceSBarry Smith PetscViewerFormat format; 99057b952d6SSatish Balay 991d64ed03dSBarry Smith PetscFunctionBegin; 992251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 993251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 99432077d6dSBarry Smith if (iascii) { 995b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 996456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 9974e220ebcSLois Curfman McInnes MatInfo info; 998ffc4695bSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 999d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 10001575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1001b1e9c6f1SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n", 1002b1e9c6f1SBarry Smith rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 1003d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1004e6dd01d4SJed Brown ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1005d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1006e6dd01d4SJed Brown ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1007b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 10081575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 100907d81ca4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 101057b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 10113a40ed3dSBarry Smith PetscFunctionReturn(0); 1012fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 101377431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 10143a40ed3dSBarry Smith PetscFunctionReturn(0); 101504929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 101604929863SHong Zhang PetscFunctionReturn(0); 101757b952d6SSatish Balay } 101857b952d6SSatish Balay } 101957b952d6SSatish Balay 10200f5bd95cSBarry Smith if (isdraw) { 1021b0a32e0cSBarry Smith PetscDraw draw; 1022ace3abfcSBarry Smith PetscBool isnull; 1023b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 102445f3bb6eSLisandro Dalcin ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 102545f3bb6eSLisandro Dalcin if (isnull) PetscFunctionReturn(0); 102657b952d6SSatish Balay } 102757b952d6SSatish Balay 10287da1fb6eSBarry Smith { 102957b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 103057b952d6SSatish Balay Mat A; 103157b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 1032d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 10333eda8832SBarry Smith MatScalar *a; 10343e219373SBarry Smith const char *matname; 103557b952d6SSatish Balay 1036f204ca49SKris Buschelman /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1037f204ca49SKris Buschelman /* Perhaps this should be the type of mat? */ 1038ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 103957b952d6SSatish Balay if (!rank) { 1040f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1041d64ed03dSBarry Smith } else { 1042f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 104357b952d6SSatish Balay } 1044f204ca49SKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 10450298fd71SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 10462b82e772SSatish Balay ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 10473bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 104857b952d6SSatish Balay 104957b952d6SSatish Balay /* copy over the A part */ 105057b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 105157b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1052785e854fSJed Brown ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 105357b952d6SSatish Balay 105457b952d6SSatish Balay for (i=0; i<mbs; i++) { 1055899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 105626fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 105757b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1058899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 105957b952d6SSatish Balay for (k=0; k<bs; k++) { 106097e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1061cee3aa6bSSatish Balay col++; a += bs; 106257b952d6SSatish Balay } 106357b952d6SSatish Balay } 106457b952d6SSatish Balay } 106557b952d6SSatish Balay /* copy over the B part */ 106657b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 106757b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 106857b952d6SSatish Balay for (i=0; i<mbs; i++) { 1069899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 107026fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 107157b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 107257b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 107357b952d6SSatish Balay for (k=0; k<bs; k++) { 107497e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1075cee3aa6bSSatish Balay col++; a += bs; 107657b952d6SSatish Balay } 107757b952d6SSatish Balay } 107857b952d6SSatish Balay } 1079606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 10806d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 10816d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 108255843e3eSBarry Smith /* 108355843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1084b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 108555843e3eSBarry Smith */ 10863f08860eSBarry Smith ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1087ade3a672SBarry Smith ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 10883e219373SBarry Smith if (!rank) { 1089ade3a672SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 10907da1fb6eSBarry Smith ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 109157b952d6SSatish Balay } 10923f08860eSBarry Smith ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 10931575c14dSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 10946bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 109557b952d6SSatish Balay } 10963a40ed3dSBarry Smith PetscFunctionReturn(0); 109757b952d6SSatish Balay } 109857b952d6SSatish Balay 1099618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1100b51a4376SLisandro Dalcin PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1101660746e0SBarry Smith { 1102b51a4376SLisandro Dalcin Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1103b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1104b51a4376SLisandro Dalcin Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1105b51a4376SLisandro Dalcin const PetscInt *garray = aij->garray; 1106b51a4376SLisandro Dalcin PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1107b51a4376SLisandro Dalcin PetscInt *rowlens,*colidxs; 1108b51a4376SLisandro Dalcin PetscScalar *matvals; 1109660746e0SBarry Smith PetscErrorCode ierr; 1110660746e0SBarry Smith 1111660746e0SBarry Smith PetscFunctionBegin; 1112b51a4376SLisandro Dalcin ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1113b51a4376SLisandro Dalcin 1114b51a4376SLisandro Dalcin M = mat->rmap->N; 1115b51a4376SLisandro Dalcin N = mat->cmap->N; 1116b51a4376SLisandro Dalcin m = mat->rmap->n; 1117b51a4376SLisandro Dalcin rs = mat->rmap->rstart; 1118b51a4376SLisandro Dalcin cs = mat->cmap->rstart; 1119b51a4376SLisandro Dalcin bs = mat->rmap->bs; 1120b51a4376SLisandro Dalcin nz = bs*bs*(A->nz + B->nz); 1121b51a4376SLisandro Dalcin 1122b51a4376SLisandro Dalcin /* write matrix header */ 1123660746e0SBarry Smith header[0] = MAT_FILE_CLASSID; 1124b51a4376SLisandro Dalcin header[1] = M; header[2] = N; header[3] = nz; 1125ffc4695bSBarry Smith ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1126b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1127660746e0SBarry Smith 1128b51a4376SLisandro Dalcin /* fill in and store row lengths */ 1129b51a4376SLisandro Dalcin ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1130b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) 1131b51a4376SLisandro Dalcin for (j=0; j<bs; j++) 1132b51a4376SLisandro Dalcin rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1133b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1134b51a4376SLisandro Dalcin ierr = PetscFree(rowlens);CHKERRQ(ierr); 1135660746e0SBarry Smith 1136b51a4376SLisandro Dalcin /* fill in and store column indices */ 1137b51a4376SLisandro Dalcin ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1138b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) { 1139b51a4376SLisandro Dalcin for (k=0; k<bs; k++) { 1140b51a4376SLisandro Dalcin for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1141b51a4376SLisandro Dalcin if (garray[B->j[jb]] > cs/bs) break; 1142b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1143b51a4376SLisandro Dalcin colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1144660746e0SBarry Smith } 1145b51a4376SLisandro Dalcin for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1146b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1147b51a4376SLisandro Dalcin colidxs[cnt++] = bs*A->j[ja] + l + cs; 1148b51a4376SLisandro Dalcin for (; jb<B->i[i+1]; jb++) 1149b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1150b51a4376SLisandro Dalcin colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1151660746e0SBarry Smith } 1152660746e0SBarry Smith } 1153660746e0SBarry Smith if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1154b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr); 1155b51a4376SLisandro Dalcin ierr = PetscFree(colidxs);CHKERRQ(ierr); 1156660746e0SBarry Smith 1157b51a4376SLisandro Dalcin /* fill in and store nonzero values */ 1158b51a4376SLisandro Dalcin ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1159b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) { 1160b51a4376SLisandro Dalcin for (k=0; k<bs; k++) { 1161b51a4376SLisandro Dalcin for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1162b51a4376SLisandro Dalcin if (garray[B->j[jb]] > cs/bs) break; 1163b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1164b51a4376SLisandro Dalcin matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1165660746e0SBarry Smith } 1166b51a4376SLisandro Dalcin for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1167b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1168b51a4376SLisandro Dalcin matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1169b51a4376SLisandro Dalcin for (; jb<B->i[i+1]; jb++) 1170b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1171d21b9a37SPierre Jolivet matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1172660746e0SBarry Smith } 1173b51a4376SLisandro Dalcin } 1174b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr); 1175b51a4376SLisandro Dalcin ierr = PetscFree(matvals);CHKERRQ(ierr); 1176660746e0SBarry Smith 1177b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */ 1178b51a4376SLisandro Dalcin ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1179660746e0SBarry Smith PetscFunctionReturn(0); 1180660746e0SBarry Smith } 1181660746e0SBarry Smith 1182dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 118357b952d6SSatish Balay { 1184dfbe8321SBarry Smith PetscErrorCode ierr; 1185ace3abfcSBarry Smith PetscBool iascii,isdraw,issocket,isbinary; 118657b952d6SSatish Balay 1187d64ed03dSBarry Smith PetscFunctionBegin; 1188251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1189251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1190251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1191251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1192660746e0SBarry Smith if (iascii || isdraw || issocket) { 11937b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1194660746e0SBarry Smith } else if (isbinary) { 1195660746e0SBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 119657b952d6SSatish Balay } 11973a40ed3dSBarry Smith PetscFunctionReturn(0); 119857b952d6SSatish Balay } 119957b952d6SSatish Balay 1200dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 120179bdfe76SSatish Balay { 120279bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1203dfbe8321SBarry Smith PetscErrorCode ierr; 120479bdfe76SSatish Balay 1205d64ed03dSBarry Smith PetscFunctionBegin; 1206aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1207d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 120879bdfe76SSatish Balay #endif 12098798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12108798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 12116bf464f9SBarry Smith ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 12126bf464f9SBarry Smith ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1213aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 12146bc0bbbfSBarry Smith ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 121548e59246SSatish Balay #else 121605b42c5fSBarry Smith ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 121748e59246SSatish Balay #endif 121805b42c5fSBarry Smith ierr = PetscFree(baij->garray);CHKERRQ(ierr); 12196bf464f9SBarry Smith ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 12206bf464f9SBarry Smith ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1221fca92195SBarry Smith ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 122205b42c5fSBarry Smith ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1223fca92195SBarry Smith ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1224899cda47SBarry Smith ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1225bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 1226901853e0SKris Buschelman 1227f4259b30SLisandro Dalcin ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1228bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1229bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1230bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1231bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1232bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1233bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1234bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1235bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 12367ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 12377ea3e4caSstefano_zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 12387ea3e4caSstefano_zampini #endif 1239c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr); 12403a40ed3dSBarry Smith PetscFunctionReturn(0); 124179bdfe76SSatish Balay } 124279bdfe76SSatish Balay 1243dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1244cee3aa6bSSatish Balay { 1245cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1246dfbe8321SBarry Smith PetscErrorCode ierr; 1247b24ad042SBarry Smith PetscInt nt; 1248cee3aa6bSSatish Balay 1249d64ed03dSBarry Smith PetscFunctionBegin; 1250e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1251e7e72b3dSBarry Smith if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1252e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1253e7e72b3dSBarry Smith if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1254ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1255f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1256ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1257f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 12583a40ed3dSBarry Smith PetscFunctionReturn(0); 1259cee3aa6bSSatish Balay } 1260cee3aa6bSSatish Balay 1261dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1262cee3aa6bSSatish Balay { 1263cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1264dfbe8321SBarry Smith PetscErrorCode ierr; 1265d64ed03dSBarry Smith 1266d64ed03dSBarry Smith PetscFunctionBegin; 1267ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1268f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1269ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1270f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 12713a40ed3dSBarry Smith PetscFunctionReturn(0); 1272cee3aa6bSSatish Balay } 1273cee3aa6bSSatish Balay 1274dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1275cee3aa6bSSatish Balay { 1276cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1277dfbe8321SBarry Smith PetscErrorCode ierr; 1278cee3aa6bSSatish Balay 1279d64ed03dSBarry Smith PetscFunctionBegin; 1280cee3aa6bSSatish Balay /* do nondiagonal part */ 12817c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1282cee3aa6bSSatish Balay /* do local part */ 12837c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1284e4a140f6SJunchao Zhang /* add partial results together */ 1285ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1286ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 12873a40ed3dSBarry Smith PetscFunctionReturn(0); 1288cee3aa6bSSatish Balay } 1289cee3aa6bSSatish Balay 1290dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1291cee3aa6bSSatish Balay { 1292cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1293dfbe8321SBarry Smith PetscErrorCode ierr; 1294cee3aa6bSSatish Balay 1295d64ed03dSBarry Smith PetscFunctionBegin; 1296cee3aa6bSSatish Balay /* do nondiagonal part */ 12977c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1298cee3aa6bSSatish Balay /* do local part */ 12997c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1300e4a140f6SJunchao Zhang /* add partial results together */ 1301e4a140f6SJunchao Zhang ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1302ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 13033a40ed3dSBarry Smith PetscFunctionReturn(0); 1304cee3aa6bSSatish Balay } 1305cee3aa6bSSatish Balay 1306cee3aa6bSSatish Balay /* 1307cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1308cee3aa6bSSatish Balay diagonal block 1309cee3aa6bSSatish Balay */ 1310dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1311cee3aa6bSSatish Balay { 1312cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1313dfbe8321SBarry Smith PetscErrorCode ierr; 1314d64ed03dSBarry Smith 1315d64ed03dSBarry Smith PetscFunctionBegin; 1316e32f2f54SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13173a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13183a40ed3dSBarry Smith PetscFunctionReturn(0); 1319cee3aa6bSSatish Balay } 1320cee3aa6bSSatish Balay 1321f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1322cee3aa6bSSatish Balay { 1323cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1324dfbe8321SBarry Smith PetscErrorCode ierr; 1325d64ed03dSBarry Smith 1326d64ed03dSBarry Smith PetscFunctionBegin; 1327f4df32b1SMatthew Knepley ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1328f4df32b1SMatthew Knepley ierr = MatScale(a->B,aa);CHKERRQ(ierr); 13293a40ed3dSBarry Smith PetscFunctionReturn(0); 1330cee3aa6bSSatish Balay } 1331026e39d0SSatish Balay 1332b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1333acdf5bf4SSatish Balay { 1334acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 133587828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 13366849ba73SBarry Smith PetscErrorCode ierr; 1337d0f46423SBarry Smith PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1338d0f46423SBarry Smith PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1339899cda47SBarry Smith PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1340acdf5bf4SSatish Balay 1341d64ed03dSBarry Smith PetscFunctionBegin; 1342e7e72b3dSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1343e32f2f54SBarry Smith if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1344acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1345acdf5bf4SSatish Balay 1346acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1347acdf5bf4SSatish Balay /* 1348acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1349acdf5bf4SSatish Balay */ 1350acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1351b24ad042SBarry Smith PetscInt max = 1,mbs = mat->mbs,tmp; 1352bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1353acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 135426fbe8dcSKarl Rupp if (max < tmp) max = tmp; 1355acdf5bf4SSatish Balay } 1356dcca6d9dSJed Brown ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1357acdf5bf4SSatish Balay } 1358d9d09a02SSatish Balay lrow = row - brstart; 1359acdf5bf4SSatish Balay 1360acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1361f4259b30SLisandro Dalcin if (!v) {pvA = NULL; pvB = NULL;} 1362f4259b30SLisandro Dalcin if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1363f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1364f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1365acdf5bf4SSatish Balay nztot = nzA + nzB; 1366acdf5bf4SSatish Balay 1367acdf5bf4SSatish Balay cmap = mat->garray; 1368acdf5bf4SSatish Balay if (v || idx) { 1369acdf5bf4SSatish Balay if (nztot) { 1370acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1371b24ad042SBarry Smith PetscInt imark = -1; 1372acdf5bf4SSatish Balay if (v) { 1373acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1374acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1375d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1376acdf5bf4SSatish Balay else break; 1377acdf5bf4SSatish Balay } 1378acdf5bf4SSatish Balay imark = i; 1379acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1380acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1381acdf5bf4SSatish Balay } 1382acdf5bf4SSatish Balay if (idx) { 1383acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1384acdf5bf4SSatish Balay if (imark > -1) { 1385acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1386bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1387acdf5bf4SSatish Balay } 1388acdf5bf4SSatish Balay } else { 1389acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 139026fbe8dcSKarl Rupp if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1391acdf5bf4SSatish Balay else break; 1392acdf5bf4SSatish Balay } 1393acdf5bf4SSatish Balay imark = i; 1394acdf5bf4SSatish Balay } 1395d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1396d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1397acdf5bf4SSatish Balay } 1398d64ed03dSBarry Smith } else { 1399f4259b30SLisandro Dalcin if (idx) *idx = NULL; 1400f4259b30SLisandro Dalcin if (v) *v = NULL; 1401d212a18eSSatish Balay } 1402acdf5bf4SSatish Balay } 1403acdf5bf4SSatish Balay *nz = nztot; 1404f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1405f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14063a40ed3dSBarry Smith PetscFunctionReturn(0); 1407acdf5bf4SSatish Balay } 1408acdf5bf4SSatish Balay 1409b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1410acdf5bf4SSatish Balay { 1411acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1412d64ed03dSBarry Smith 1413d64ed03dSBarry Smith PetscFunctionBegin; 1414e7e72b3dSBarry Smith if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1415acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14163a40ed3dSBarry Smith PetscFunctionReturn(0); 1417acdf5bf4SSatish Balay } 1418acdf5bf4SSatish Balay 1419dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 142058667388SSatish Balay { 142158667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1422dfbe8321SBarry Smith PetscErrorCode ierr; 1423d64ed03dSBarry Smith 1424d64ed03dSBarry Smith PetscFunctionBegin; 142558667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 142658667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14273a40ed3dSBarry Smith PetscFunctionReturn(0); 142858667388SSatish Balay } 14290ac07820SSatish Balay 1430dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14310ac07820SSatish Balay { 14324e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14334e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 1434dfbe8321SBarry Smith PetscErrorCode ierr; 14353966268fSBarry Smith PetscLogDouble isend[5],irecv[5]; 14360ac07820SSatish Balay 1437d64ed03dSBarry Smith PetscFunctionBegin; 1438d0f46423SBarry Smith info->block_size = (PetscReal)matin->rmap->bs; 143926fbe8dcSKarl Rupp 14404e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 144126fbe8dcSKarl Rupp 14420e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1443de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 144426fbe8dcSKarl Rupp 14454e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 144626fbe8dcSKarl Rupp 14470e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1448de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 144926fbe8dcSKarl Rupp 14500ac07820SSatish Balay if (flag == MAT_LOCAL) { 14514e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 14524e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 14534e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 14544e220ebcSLois Curfman McInnes info->memory = isend[3]; 14554e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 14560ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 14573966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 145826fbe8dcSKarl Rupp 14594e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14604e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14614e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14624e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14634e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 14640ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 14653966268fSBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 146626fbe8dcSKarl Rupp 14674e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14684e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14694e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14704e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14714e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1472ce94432eSBarry Smith } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 14734e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 14744e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 14754e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 14763a40ed3dSBarry Smith PetscFunctionReturn(0); 14770ac07820SSatish Balay } 14780ac07820SSatish Balay 1479ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 148058667388SSatish Balay { 148158667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1482dfbe8321SBarry Smith PetscErrorCode ierr; 148358667388SSatish Balay 1484d64ed03dSBarry Smith PetscFunctionBegin; 148512c028f9SKris Buschelman switch (op) { 1486512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 148712c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 148828b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1489a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 149012c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 149143674050SBarry Smith MatCheckPreallocated(A,1); 14924e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 14934e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 149412c028f9SKris Buschelman break; 149512c028f9SKris Buschelman case MAT_ROW_ORIENTED: 149643674050SBarry Smith MatCheckPreallocated(A,1); 14974e0d8c25SBarry Smith a->roworiented = flg; 149826fbe8dcSKarl Rupp 14994e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 15004e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 150112c028f9SKris Buschelman break; 15028c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1503071fcb05SBarry Smith case MAT_SORTED_FULL: 1504290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 150512c028f9SKris Buschelman break; 150612c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15074e0d8c25SBarry Smith a->donotstash = flg; 150812c028f9SKris Buschelman break; 150912c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 15104e0d8c25SBarry Smith a->ht_flag = flg; 1511abf3b562SBarry Smith a->ht_fact = 1.39; 151212c028f9SKris Buschelman break; 151377e54ba9SKris Buschelman case MAT_SYMMETRIC: 151477e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 15152188ac68SBarry Smith case MAT_HERMITIAN: 1516c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 15172188ac68SBarry Smith case MAT_SYMMETRY_ETERNAL: 151843674050SBarry Smith MatCheckPreallocated(A,1); 15194e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 152077e54ba9SKris Buschelman break; 152112c028f9SKris Buschelman default: 1522ce94432eSBarry Smith SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1523d64ed03dSBarry Smith } 15243a40ed3dSBarry Smith PetscFunctionReturn(0); 152558667388SSatish Balay } 152658667388SSatish Balay 1527fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 15280ac07820SSatish Balay { 15290ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15300ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15310ac07820SSatish Balay Mat B; 1532dfbe8321SBarry Smith PetscErrorCode ierr; 1533d0f46423SBarry Smith PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1534d0f46423SBarry Smith PetscInt bs=A->rmap->bs,mbs=baij->mbs; 15353eda8832SBarry Smith MatScalar *a; 15360ac07820SSatish Balay 1537d64ed03dSBarry Smith PetscFunctionBegin; 1538cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1539ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1540d0f46423SBarry Smith ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 15417adad957SLisandro Dalcin ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 15422e72b8d9SBarry Smith /* Do not know preallocation information, but must set block size */ 15430298fd71SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1544fc4dec0aSBarry Smith } else { 1545fc4dec0aSBarry Smith B = *matout; 1546fc4dec0aSBarry Smith } 15470ac07820SSatish Balay 15480ac07820SSatish Balay /* copy over the A part */ 15490ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 15500ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1551785e854fSJed Brown ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 15520ac07820SSatish Balay 15530ac07820SSatish Balay for (i=0; i<mbs; i++) { 1554899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 155526fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 15560ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1557899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 15580ac07820SSatish Balay for (k=0; k<bs; k++) { 155997e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 156026fbe8dcSKarl Rupp 15610ac07820SSatish Balay col++; a += bs; 15620ac07820SSatish Balay } 15630ac07820SSatish Balay } 15640ac07820SSatish Balay } 15650ac07820SSatish Balay /* copy over the B part */ 15660ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 15670ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15680ac07820SSatish Balay for (i=0; i<mbs; i++) { 1569899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 157026fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 15710ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 15720ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 15730ac07820SSatish Balay for (k=0; k<bs; k++) { 157497e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 157526fbe8dcSKarl Rupp col++; 157626fbe8dcSKarl Rupp a += bs; 15770ac07820SSatish Balay } 15780ac07820SSatish Balay } 15790ac07820SSatish Balay } 1580606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 15810ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15820ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15830ac07820SSatish Balay 1584cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 158526fbe8dcSKarl Rupp else { 158628be2f97SBarry Smith ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 15870ac07820SSatish Balay } 15883a40ed3dSBarry Smith PetscFunctionReturn(0); 15890ac07820SSatish Balay } 15900e95ebc0SSatish Balay 1591dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 15920e95ebc0SSatish Balay { 159336c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 159436c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 1595dfbe8321SBarry Smith PetscErrorCode ierr; 1596b24ad042SBarry Smith PetscInt s1,s2,s3; 15970e95ebc0SSatish Balay 1598d64ed03dSBarry Smith PetscFunctionBegin; 159936c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 160036c4a09eSSatish Balay if (rr) { 160136c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1602e32f2f54SBarry Smith if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 160336c4a09eSSatish Balay /* Overlap communication with computation. */ 1604ca9f406cSSatish Balay ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 160536c4a09eSSatish Balay } 16060e95ebc0SSatish Balay if (ll) { 16070e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1608e32f2f54SBarry Smith if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 16090298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 16100e95ebc0SSatish Balay } 161136c4a09eSSatish Balay /* scale the diagonal block */ 161236c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 161336c4a09eSSatish Balay 161436c4a09eSSatish Balay if (rr) { 161536c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 1616ca9f406cSSatish Balay ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 16170298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 161836c4a09eSSatish Balay } 16193a40ed3dSBarry Smith PetscFunctionReturn(0); 16200e95ebc0SSatish Balay } 16210e95ebc0SSatish Balay 16222b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 16230ac07820SSatish Balay { 16240ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 162565a92638SMatthew G. Knepley PetscInt *lrows; 16266e520ac8SStefano Zampini PetscInt r, len; 162794342113SStefano Zampini PetscBool cong; 16286849ba73SBarry Smith PetscErrorCode ierr; 16290ac07820SSatish Balay 1630d64ed03dSBarry Smith PetscFunctionBegin; 16316e520ac8SStefano Zampini /* get locally owned rows */ 16326e520ac8SStefano Zampini ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 163397b48c8fSBarry Smith /* fix right hand side if needed */ 163497b48c8fSBarry Smith if (x && b) { 163565a92638SMatthew G. Knepley const PetscScalar *xx; 163665a92638SMatthew G. Knepley PetscScalar *bb; 163765a92638SMatthew G. Knepley 163897b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 163997b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 164065a92638SMatthew G. Knepley for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 164197b48c8fSBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 164297b48c8fSBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 164397b48c8fSBarry Smith } 164497b48c8fSBarry Smith 16450ac07820SSatish Balay /* actually zap the local rows */ 164672dacd9aSBarry Smith /* 164772dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 1648a8c7a070SBarry Smith is square and the user wishes to set the diagonal we use separate 164972dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 165072dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 165172dacd9aSBarry Smith 165272dacd9aSBarry Smith */ 16539c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1654a34163a4SJed Brown ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 165594342113SStefano Zampini ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 165694342113SStefano Zampini if ((diag != 0.0) && cong) { 1657a34163a4SJed Brown ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1658f4df32b1SMatthew Knepley } else if (diag != 0.0) { 1659f4259b30SLisandro Dalcin ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1660e7e72b3dSBarry Smith if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1661512a5fc5SBarry Smith MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 166265a92638SMatthew G. Knepley for (r = 0; r < len; ++r) { 166365a92638SMatthew G. Knepley const PetscInt row = lrows[r] + A->rmap->rstart; 1664f4df32b1SMatthew Knepley ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1665a07cd24cSSatish Balay } 1666a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1667a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16689c957beeSSatish Balay } else { 1669a34163a4SJed Brown ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1670a07cd24cSSatish Balay } 1671606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 16724f9cfa9eSBarry Smith 16734f9cfa9eSBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 16744f9cfa9eSBarry Smith if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1675e56f5c9eSBarry Smith PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1676b2566f29SBarry Smith ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1677e56f5c9eSBarry Smith } 16783a40ed3dSBarry Smith PetscFunctionReturn(0); 16790ac07820SSatish Balay } 168072dacd9aSBarry Smith 16816f0a72daSMatthew G. Knepley PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 16826f0a72daSMatthew G. Knepley { 16836f0a72daSMatthew G. Knepley Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16846f0a72daSMatthew G. Knepley PetscErrorCode ierr; 1685131c27b5Sprj- PetscMPIInt n = A->rmap->n,p = 0; 1686131c27b5Sprj- PetscInt i,j,k,r,len = 0,row,col,count; 16876f0a72daSMatthew G. Knepley PetscInt *lrows,*owners = A->rmap->range; 16886f0a72daSMatthew G. Knepley PetscSFNode *rrows; 16896f0a72daSMatthew G. Knepley PetscSF sf; 16906f0a72daSMatthew G. Knepley const PetscScalar *xx; 16916f0a72daSMatthew G. Knepley PetscScalar *bb,*mask; 16926f0a72daSMatthew G. Knepley Vec xmask,lmask; 16936f0a72daSMatthew G. Knepley Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 16946f0a72daSMatthew G. Knepley PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 16956f0a72daSMatthew G. Knepley PetscScalar *aa; 16966f0a72daSMatthew G. Knepley 16976f0a72daSMatthew G. Knepley PetscFunctionBegin; 16986f0a72daSMatthew G. Knepley /* Create SF where leaves are input rows and roots are owned rows */ 16996f0a72daSMatthew G. Knepley ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 17006f0a72daSMatthew G. Knepley for (r = 0; r < n; ++r) lrows[r] = -1; 17016f0a72daSMatthew G. Knepley ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 17026f0a72daSMatthew G. Knepley for (r = 0; r < N; ++r) { 17036f0a72daSMatthew G. Knepley const PetscInt idx = rows[r]; 17045ba17502SJed Brown if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 17055ba17502SJed Brown if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 17065ba17502SJed Brown ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 17075ba17502SJed Brown } 17086f0a72daSMatthew G. Knepley rrows[r].rank = p; 17096f0a72daSMatthew G. Knepley rrows[r].index = rows[r] - owners[p]; 17106f0a72daSMatthew G. Knepley } 17116f0a72daSMatthew G. Knepley ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 17126f0a72daSMatthew G. Knepley ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 17136f0a72daSMatthew G. Knepley /* Collect flags for rows to be zeroed */ 17146f0a72daSMatthew G. Knepley ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 17156f0a72daSMatthew G. Knepley ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 17166f0a72daSMatthew G. Knepley ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 17176f0a72daSMatthew G. Knepley /* Compress and put in row numbers */ 17186f0a72daSMatthew G. Knepley for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 17196f0a72daSMatthew G. Knepley /* zero diagonal part of matrix */ 17206f0a72daSMatthew G. Knepley ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 17216f0a72daSMatthew G. Knepley /* handle off diagonal part of matrix */ 17222a7a6963SBarry Smith ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 17236f0a72daSMatthew G. Knepley ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 17246f0a72daSMatthew G. Knepley ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 17256f0a72daSMatthew G. Knepley for (i=0; i<len; i++) bb[lrows[i]] = 1; 17266f0a72daSMatthew G. Knepley ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 17276f0a72daSMatthew G. Knepley ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 17286f0a72daSMatthew G. Knepley ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 17296f0a72daSMatthew G. Knepley ierr = VecDestroy(&xmask);CHKERRQ(ierr); 17306f0a72daSMatthew G. Knepley if (x) { 17316f0a72daSMatthew G. Knepley ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 17326f0a72daSMatthew G. Knepley ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 17336f0a72daSMatthew G. Knepley ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 17346f0a72daSMatthew G. Knepley ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 17356f0a72daSMatthew G. Knepley } 17366f0a72daSMatthew G. Knepley ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 17376f0a72daSMatthew G. Knepley /* remove zeroed rows of off diagonal matrix */ 17386f0a72daSMatthew G. Knepley for (i = 0; i < len; ++i) { 17396f0a72daSMatthew G. Knepley row = lrows[i]; 17406f0a72daSMatthew G. Knepley count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 17416f0a72daSMatthew G. Knepley aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 17426f0a72daSMatthew G. Knepley for (k = 0; k < count; ++k) { 17436f0a72daSMatthew G. Knepley aa[0] = 0.0; 17446f0a72daSMatthew G. Knepley aa += bs; 17456f0a72daSMatthew G. Knepley } 17466f0a72daSMatthew G. Knepley } 17476f0a72daSMatthew G. Knepley /* loop over all elements of off process part of matrix zeroing removed columns*/ 17486f0a72daSMatthew G. Knepley for (i = 0; i < l->B->rmap->N; ++i) { 17496f0a72daSMatthew G. Knepley row = i/bs; 17506f0a72daSMatthew G. Knepley for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 17516f0a72daSMatthew G. Knepley for (k = 0; k < bs; ++k) { 17526f0a72daSMatthew G. Knepley col = bs*baij->j[j] + k; 17536f0a72daSMatthew G. Knepley if (PetscAbsScalar(mask[col])) { 17546f0a72daSMatthew G. Knepley aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 175589ae1891SBarry Smith if (x) bb[i] -= aa[0]*xx[col]; 17566f0a72daSMatthew G. Knepley aa[0] = 0.0; 17576f0a72daSMatthew G. Knepley } 17586f0a72daSMatthew G. Knepley } 17596f0a72daSMatthew G. Knepley } 17606f0a72daSMatthew G. Knepley } 17616f0a72daSMatthew G. Knepley if (x) { 17626f0a72daSMatthew G. Knepley ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 17636f0a72daSMatthew G. Knepley ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 17646f0a72daSMatthew G. Knepley } 17656f0a72daSMatthew G. Knepley ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 17666f0a72daSMatthew G. Knepley ierr = VecDestroy(&lmask);CHKERRQ(ierr); 17676f0a72daSMatthew G. Knepley ierr = PetscFree(lrows);CHKERRQ(ierr); 17684f9cfa9eSBarry Smith 17694f9cfa9eSBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 17704f9cfa9eSBarry Smith if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 17714f9cfa9eSBarry Smith PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1772b2566f29SBarry Smith ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 17734f9cfa9eSBarry Smith } 17746f0a72daSMatthew G. Knepley PetscFunctionReturn(0); 17756f0a72daSMatthew G. Knepley } 17766f0a72daSMatthew G. Knepley 1777dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1778bb5a7306SBarry Smith { 1779bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1780dfbe8321SBarry Smith PetscErrorCode ierr; 1781d64ed03dSBarry Smith 1782d64ed03dSBarry Smith PetscFunctionBegin; 1783bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 17843a40ed3dSBarry Smith PetscFunctionReturn(0); 1785bb5a7306SBarry Smith } 1786bb5a7306SBarry Smith 17876849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 17880ac07820SSatish Balay 1789ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 17907fc3c18eSBarry Smith { 17917fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 17927fc3c18eSBarry Smith Mat a,b,c,d; 1793ace3abfcSBarry Smith PetscBool flg; 1794dfbe8321SBarry Smith PetscErrorCode ierr; 17957fc3c18eSBarry Smith 17967fc3c18eSBarry Smith PetscFunctionBegin; 17977fc3c18eSBarry Smith a = matA->A; b = matA->B; 17987fc3c18eSBarry Smith c = matB->A; d = matB->B; 17997fc3c18eSBarry Smith 18007fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1801abc0a331SBarry Smith if (flg) { 18027fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18037fc3c18eSBarry Smith } 1804b2566f29SBarry Smith ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 18057fc3c18eSBarry Smith PetscFunctionReturn(0); 18067fc3c18eSBarry Smith } 18077fc3c18eSBarry Smith 18083c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 18093c896bc6SHong Zhang { 18103c896bc6SHong Zhang PetscErrorCode ierr; 18113c896bc6SHong Zhang Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 18123c896bc6SHong Zhang Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 18133c896bc6SHong Zhang 18143c896bc6SHong Zhang PetscFunctionBegin; 18153c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 18163c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 18173c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 18183c896bc6SHong Zhang } else { 18193c896bc6SHong Zhang ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 18203c896bc6SHong Zhang ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 18213c896bc6SHong Zhang } 1822cdc753b6SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 18233c896bc6SHong Zhang PetscFunctionReturn(0); 18243c896bc6SHong Zhang } 1825273d9f13SBarry Smith 18264994cf47SJed Brown PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1827273d9f13SBarry Smith { 1828dfbe8321SBarry Smith PetscErrorCode ierr; 1829273d9f13SBarry Smith 1830273d9f13SBarry Smith PetscFunctionBegin; 1831f4259b30SLisandro Dalcin ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 1832273d9f13SBarry Smith PetscFunctionReturn(0); 1833273d9f13SBarry Smith } 1834273d9f13SBarry Smith 18354de5dceeSHong Zhang PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 18364de5dceeSHong Zhang { 1837001ddc4fSHong Zhang PetscErrorCode ierr; 1838001ddc4fSHong Zhang PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 18394de5dceeSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 18404de5dceeSHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 18414de5dceeSHong Zhang 18424de5dceeSHong Zhang PetscFunctionBegin; 1843001ddc4fSHong Zhang ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 18444de5dceeSHong Zhang PetscFunctionReturn(0); 18454de5dceeSHong Zhang } 18464de5dceeSHong Zhang 18474fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 18484fe895cdSHong Zhang { 18494fe895cdSHong Zhang PetscErrorCode ierr; 18504fe895cdSHong Zhang Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 18514fe895cdSHong Zhang PetscBLASInt bnz,one=1; 18524fe895cdSHong Zhang Mat_SeqBAIJ *x,*y; 1853b31f67cfSBarry Smith PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 18544fe895cdSHong Zhang 18554fe895cdSHong Zhang PetscFunctionBegin; 18564fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 18574fe895cdSHong Zhang PetscScalar alpha = a; 18584fe895cdSHong Zhang x = (Mat_SeqBAIJ*)xx->A->data; 18594fe895cdSHong Zhang y = (Mat_SeqBAIJ*)yy->A->data; 1860b31f67cfSBarry Smith ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 18618b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 18624fe895cdSHong Zhang x = (Mat_SeqBAIJ*)xx->B->data; 18634fe895cdSHong Zhang y = (Mat_SeqBAIJ*)yy->B->data; 1864b31f67cfSBarry Smith ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 18658b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1866a3fa217bSJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1867ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1868ab784542SHong Zhang ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 18694fe895cdSHong Zhang } else { 18704de5dceeSHong Zhang Mat B; 18714de5dceeSHong Zhang PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 18724de5dceeSHong Zhang ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 18734de5dceeSHong Zhang ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 18744de5dceeSHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 18754de5dceeSHong Zhang ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 18764de5dceeSHong Zhang ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 18774de5dceeSHong Zhang ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 18784de5dceeSHong Zhang ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 18794de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 18804de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 18814de5dceeSHong Zhang ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 18824de5dceeSHong Zhang /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 18834de5dceeSHong Zhang ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 188428be2f97SBarry Smith ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 18854de5dceeSHong Zhang ierr = PetscFree(nnz_d);CHKERRQ(ierr); 18864de5dceeSHong Zhang ierr = PetscFree(nnz_o);CHKERRQ(ierr); 18874fe895cdSHong Zhang } 18884fe895cdSHong Zhang PetscFunctionReturn(0); 18894fe895cdSHong Zhang } 18904fe895cdSHong Zhang 18912726fb6dSPierre Jolivet PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 18922726fb6dSPierre Jolivet { 18932726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX) 18942726fb6dSPierre Jolivet PetscErrorCode ierr; 18952726fb6dSPierre Jolivet Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 18962726fb6dSPierre Jolivet 18972726fb6dSPierre Jolivet PetscFunctionBegin; 18982726fb6dSPierre Jolivet ierr = MatConjugate_SeqBAIJ(a->A);CHKERRQ(ierr); 18992726fb6dSPierre Jolivet ierr = MatConjugate_SeqBAIJ(a->B);CHKERRQ(ierr); 19002726fb6dSPierre Jolivet #else 19012726fb6dSPierre Jolivet PetscFunctionBegin; 19022726fb6dSPierre Jolivet #endif 19032726fb6dSPierre Jolivet PetscFunctionReturn(0); 19042726fb6dSPierre Jolivet } 19052726fb6dSPierre Jolivet 190699cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 190799cafbc1SBarry Smith { 190899cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 190999cafbc1SBarry Smith PetscErrorCode ierr; 191099cafbc1SBarry Smith 191199cafbc1SBarry Smith PetscFunctionBegin; 191299cafbc1SBarry Smith ierr = MatRealPart(a->A);CHKERRQ(ierr); 191399cafbc1SBarry Smith ierr = MatRealPart(a->B);CHKERRQ(ierr); 191499cafbc1SBarry Smith PetscFunctionReturn(0); 191599cafbc1SBarry Smith } 191699cafbc1SBarry Smith 191799cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 191899cafbc1SBarry Smith { 191999cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 192099cafbc1SBarry Smith PetscErrorCode ierr; 192199cafbc1SBarry Smith 192299cafbc1SBarry Smith PetscFunctionBegin; 192399cafbc1SBarry Smith ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 192499cafbc1SBarry Smith ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 192599cafbc1SBarry Smith PetscFunctionReturn(0); 192699cafbc1SBarry Smith } 192799cafbc1SBarry Smith 19287dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 19294aa3045dSJed Brown { 19304aa3045dSJed Brown PetscErrorCode ierr; 19314aa3045dSJed Brown IS iscol_local; 19324aa3045dSJed Brown PetscInt csize; 19334aa3045dSJed Brown 19344aa3045dSJed Brown PetscFunctionBegin; 19354aa3045dSJed Brown ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1936b79d0421SJed Brown if (call == MAT_REUSE_MATRIX) { 1937b79d0421SJed Brown ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 1938e32f2f54SBarry Smith if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1939b79d0421SJed Brown } else { 19404aa3045dSJed Brown ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1941b79d0421SJed Brown } 19427dae84e0SHong Zhang ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1943b79d0421SJed Brown if (call == MAT_INITIAL_MATRIX) { 1944b79d0421SJed Brown ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 19456bf464f9SBarry Smith ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1946b79d0421SJed Brown } 19474aa3045dSJed Brown PetscFunctionReturn(0); 19484aa3045dSJed Brown } 194917df9f7cSHong Zhang 195082094794SBarry Smith /* 195182094794SBarry Smith Not great since it makes two copies of the submatrix, first an SeqBAIJ 195282094794SBarry Smith in local and then by concatenating the local matrices the end result. 19537dae84e0SHong Zhang Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 19548f46ffcaSHong Zhang This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 195582094794SBarry Smith */ 19567dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 195782094794SBarry Smith { 195882094794SBarry Smith PetscErrorCode ierr; 195982094794SBarry Smith PetscMPIInt rank,size; 196082094794SBarry Smith PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1961c9ffca76SHong Zhang PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 196229dcf524SDmitry Karpeev Mat M,Mreuse; 196382094794SBarry Smith MatScalar *vwork,*aa; 1964ce94432eSBarry Smith MPI_Comm comm; 196529dcf524SDmitry Karpeev IS isrow_new, iscol_new; 196682094794SBarry Smith Mat_SeqBAIJ *aij; 196782094794SBarry Smith 196882094794SBarry Smith PetscFunctionBegin; 1969ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 1970ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 1971ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 197229dcf524SDmitry Karpeev /* The compression and expansion should be avoided. Doesn't point 197329dcf524SDmitry Karpeev out errors, might change the indices, hence buggey */ 197429dcf524SDmitry Karpeev ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 197529dcf524SDmitry Karpeev ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 197682094794SBarry Smith 197782094794SBarry Smith if (call == MAT_REUSE_MATRIX) { 197882094794SBarry Smith ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 1979e32f2f54SBarry Smith if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 19807dae84e0SHong Zhang ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 198182094794SBarry Smith } else { 19827dae84e0SHong Zhang ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 198382094794SBarry Smith } 198429dcf524SDmitry Karpeev ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 198529dcf524SDmitry Karpeev ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 198682094794SBarry Smith /* 198782094794SBarry Smith m - number of local rows 198882094794SBarry Smith n - number of columns (same on all processors) 198982094794SBarry Smith rstart - first row in new global matrix generated 199082094794SBarry Smith */ 199182094794SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 199282094794SBarry Smith ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 199382094794SBarry Smith m = m/bs; 199482094794SBarry Smith n = n/bs; 199582094794SBarry Smith 199682094794SBarry Smith if (call == MAT_INITIAL_MATRIX) { 199782094794SBarry Smith aij = (Mat_SeqBAIJ*)(Mreuse)->data; 199882094794SBarry Smith ii = aij->i; 199982094794SBarry Smith jj = aij->j; 200082094794SBarry Smith 200182094794SBarry Smith /* 200282094794SBarry Smith Determine the number of non-zeros in the diagonal and off-diagonal 200382094794SBarry Smith portions of the matrix in order to do correct preallocation 200482094794SBarry Smith */ 200582094794SBarry Smith 200682094794SBarry Smith /* first get start and end of "diagonal" columns */ 200782094794SBarry Smith if (csize == PETSC_DECIDE) { 200882094794SBarry Smith ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 200982094794SBarry Smith if (mglobal == n*bs) { /* square matrix */ 201082094794SBarry Smith nlocal = m; 201182094794SBarry Smith } else { 201282094794SBarry Smith nlocal = n/size + ((n % size) > rank); 201382094794SBarry Smith } 201482094794SBarry Smith } else { 201582094794SBarry Smith nlocal = csize/bs; 201682094794SBarry Smith } 201755b25c41SPierre Jolivet ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 201882094794SBarry Smith rstart = rend - nlocal; 201965e19b50SBarry Smith if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 202082094794SBarry Smith 202182094794SBarry Smith /* next, compute all the lengths */ 2022dcca6d9dSJed Brown ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 202382094794SBarry Smith for (i=0; i<m; i++) { 202482094794SBarry Smith jend = ii[i+1] - ii[i]; 202582094794SBarry Smith olen = 0; 202682094794SBarry Smith dlen = 0; 202782094794SBarry Smith for (j=0; j<jend; j++) { 202882094794SBarry Smith if (*jj < rstart || *jj >= rend) olen++; 202982094794SBarry Smith else dlen++; 203082094794SBarry Smith jj++; 203182094794SBarry Smith } 203282094794SBarry Smith olens[i] = olen; 203382094794SBarry Smith dlens[i] = dlen; 203482094794SBarry Smith } 203582094794SBarry Smith ierr = MatCreate(comm,&M);CHKERRQ(ierr); 203682094794SBarry Smith ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 203782094794SBarry Smith ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 203882094794SBarry Smith ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 20398f46ffcaSHong Zhang ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2040eb9baa12SBarry Smith ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 204182094794SBarry Smith } else { 204282094794SBarry Smith PetscInt ml,nl; 204382094794SBarry Smith 204482094794SBarry Smith M = *newmat; 204582094794SBarry Smith ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2046e32f2f54SBarry Smith if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 204782094794SBarry Smith ierr = MatZeroEntries(M);CHKERRQ(ierr); 204882094794SBarry Smith /* 204982094794SBarry Smith The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 205082094794SBarry Smith rather than the slower MatSetValues(). 205182094794SBarry Smith */ 205282094794SBarry Smith M->was_assembled = PETSC_TRUE; 205382094794SBarry Smith M->assembled = PETSC_FALSE; 205482094794SBarry Smith } 205582094794SBarry Smith ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 205682094794SBarry Smith ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 205782094794SBarry Smith aij = (Mat_SeqBAIJ*)(Mreuse)->data; 205882094794SBarry Smith ii = aij->i; 205982094794SBarry Smith jj = aij->j; 206082094794SBarry Smith aa = aij->a; 206182094794SBarry Smith for (i=0; i<m; i++) { 206282094794SBarry Smith row = rstart/bs + i; 206382094794SBarry Smith nz = ii[i+1] - ii[i]; 206482094794SBarry Smith cwork = jj; jj += nz; 206575f6568bSJed Brown vwork = aa; aa += nz*bs*bs; 206682094794SBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 206782094794SBarry Smith } 206882094794SBarry Smith 206982094794SBarry Smith ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 207082094794SBarry Smith ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 207182094794SBarry Smith *newmat = M; 207282094794SBarry Smith 207382094794SBarry Smith /* save submatrix used in processor for next request */ 207482094794SBarry Smith if (call == MAT_INITIAL_MATRIX) { 207582094794SBarry Smith ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 207682094794SBarry Smith ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 207782094794SBarry Smith } 207882094794SBarry Smith PetscFunctionReturn(0); 207982094794SBarry Smith } 208082094794SBarry Smith 208182094794SBarry Smith PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 208282094794SBarry Smith { 208382094794SBarry Smith MPI_Comm comm,pcomm; 2084a0a83eb5SRémi Lacroix PetscInt clocal_size,nrows; 208582094794SBarry Smith const PetscInt *rows; 2086dbf0e21dSBarry Smith PetscMPIInt size; 2087a0a83eb5SRémi Lacroix IS crowp,lcolp; 208882094794SBarry Smith PetscErrorCode ierr; 208982094794SBarry Smith 209082094794SBarry Smith PetscFunctionBegin; 209182094794SBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 209282094794SBarry Smith /* make a collective version of 'rowp' */ 209382094794SBarry Smith ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 209482094794SBarry Smith if (pcomm==comm) { 209582094794SBarry Smith crowp = rowp; 209682094794SBarry Smith } else { 209782094794SBarry Smith ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 209882094794SBarry Smith ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 209970b3c8c7SBarry Smith ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 210082094794SBarry Smith ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 210182094794SBarry Smith } 2102a0a83eb5SRémi Lacroix ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2103a0a83eb5SRémi Lacroix /* make a local version of 'colp' */ 210482094794SBarry Smith ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2105ffc4695bSBarry Smith ierr = MPI_Comm_size(pcomm,&size);CHKERRMPI(ierr); 2106dbf0e21dSBarry Smith if (size==1) { 210782094794SBarry Smith lcolp = colp; 210882094794SBarry Smith } else { 210975f6568bSJed Brown ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 211082094794SBarry Smith } 2111dbf0e21dSBarry Smith ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 211275f6568bSJed Brown /* now we just get the submatrix */ 21137afc1a8bSJed Brown ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 21147dae84e0SHong Zhang ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2115a0a83eb5SRémi Lacroix /* clean up */ 2116a0a83eb5SRémi Lacroix if (pcomm!=comm) { 2117a0a83eb5SRémi Lacroix ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2118a0a83eb5SRémi Lacroix } 2119dbf0e21dSBarry Smith if (size>1) { 21206bf464f9SBarry Smith ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 212182094794SBarry Smith } 212282094794SBarry Smith PetscFunctionReturn(0); 212382094794SBarry Smith } 212482094794SBarry Smith 21257087cfbeSBarry Smith PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 21268c7482ecSBarry Smith { 21278c7482ecSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 21288c7482ecSBarry Smith Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 21298c7482ecSBarry Smith 21308c7482ecSBarry Smith PetscFunctionBegin; 213126fbe8dcSKarl Rupp if (nghosts) *nghosts = B->nbs; 213226fbe8dcSKarl Rupp if (ghosts) *ghosts = baij->garray; 21338c7482ecSBarry Smith PetscFunctionReturn(0); 21348c7482ecSBarry Smith } 21358c7482ecSBarry Smith 2136d1adec66SJed Brown PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2137f6d58c54SBarry Smith { 2138f6d58c54SBarry Smith Mat B; 2139f6d58c54SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2140f6d58c54SBarry Smith Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2141f6d58c54SBarry Smith Mat_SeqAIJ *b; 2142f6d58c54SBarry Smith PetscErrorCode ierr; 2143f4259b30SLisandro Dalcin PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL; 2144f6d58c54SBarry Smith PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2145f6d58c54SBarry Smith PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2146f6d58c54SBarry Smith 2147f6d58c54SBarry Smith PetscFunctionBegin; 2148ffc4695bSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 2149ffc4695bSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 2150f6d58c54SBarry Smith 2151f6d58c54SBarry Smith /* ---------------------------------------------------------------- 2152f6d58c54SBarry Smith Tell every processor the number of nonzeros per row 2153f6d58c54SBarry Smith */ 2154854ce69bSBarry Smith ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2155f6d58c54SBarry Smith for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2156f6d58c54SBarry Smith lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2157f6d58c54SBarry Smith } 2158785e854fSJed Brown ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2159f6d58c54SBarry Smith displs = recvcounts + size; 2160f6d58c54SBarry Smith for (i=0; i<size; i++) { 2161f6d58c54SBarry Smith recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2162f6d58c54SBarry Smith displs[i] = A->rmap->range[i]/bs; 2163f6d58c54SBarry Smith } 2164f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE) 2165ffc4695bSBarry Smith ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2166f6d58c54SBarry Smith #else 21673d3eaba7SBarry Smith sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2168ffc4695bSBarry Smith ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2169f6d58c54SBarry Smith #endif 2170f6d58c54SBarry Smith /* --------------------------------------------------------------- 2171f6d58c54SBarry Smith Create the sequential matrix of the same type as the local block diagonal 2172f6d58c54SBarry Smith */ 2173f6d58c54SBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2174f6d58c54SBarry Smith ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2175f6d58c54SBarry Smith ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2176f6d58c54SBarry Smith ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2177f6d58c54SBarry Smith b = (Mat_SeqAIJ*)B->data; 2178f6d58c54SBarry Smith 2179f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2180f6d58c54SBarry Smith Copy my part of matrix column indices over 2181f6d58c54SBarry Smith */ 2182f6d58c54SBarry Smith sendcount = ad->nz + bd->nz; 2183f6d58c54SBarry Smith jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2184f6d58c54SBarry Smith a_jsendbuf = ad->j; 2185f6d58c54SBarry Smith b_jsendbuf = bd->j; 2186f6d58c54SBarry Smith n = A->rmap->rend/bs - A->rmap->rstart/bs; 2187f6d58c54SBarry Smith cnt = 0; 2188f6d58c54SBarry Smith for (i=0; i<n; i++) { 2189f6d58c54SBarry Smith 2190f6d58c54SBarry Smith /* put in lower diagonal portion */ 2191f6d58c54SBarry Smith m = bd->i[i+1] - bd->i[i]; 2192f6d58c54SBarry Smith while (m > 0) { 2193f6d58c54SBarry Smith /* is it above diagonal (in bd (compressed) numbering) */ 2194f6d58c54SBarry Smith if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2195f6d58c54SBarry Smith jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2196f6d58c54SBarry Smith m--; 2197f6d58c54SBarry Smith } 2198f6d58c54SBarry Smith 2199f6d58c54SBarry Smith /* put in diagonal portion */ 2200f6d58c54SBarry Smith for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2201f6d58c54SBarry Smith jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2202f6d58c54SBarry Smith } 2203f6d58c54SBarry Smith 2204f6d58c54SBarry Smith /* put in upper diagonal portion */ 2205f6d58c54SBarry Smith while (m-- > 0) { 2206f6d58c54SBarry Smith jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2207f6d58c54SBarry Smith } 2208f6d58c54SBarry Smith } 2209e32f2f54SBarry Smith if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2210f6d58c54SBarry Smith 2211f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2212f6d58c54SBarry Smith Gather all column indices to all processors 2213f6d58c54SBarry Smith */ 2214f6d58c54SBarry Smith for (i=0; i<size; i++) { 2215f6d58c54SBarry Smith recvcounts[i] = 0; 2216f6d58c54SBarry Smith for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2217f6d58c54SBarry Smith recvcounts[i] += lens[j]; 2218f6d58c54SBarry Smith } 2219f6d58c54SBarry Smith } 2220f6d58c54SBarry Smith displs[0] = 0; 2221f6d58c54SBarry Smith for (i=1; i<size; i++) { 2222f6d58c54SBarry Smith displs[i] = displs[i-1] + recvcounts[i-1]; 2223f6d58c54SBarry Smith } 2224f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE) 2225ffc4695bSBarry Smith ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2226f6d58c54SBarry Smith #else 2227ffc4695bSBarry Smith ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2228f6d58c54SBarry Smith #endif 2229f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2230f6d58c54SBarry Smith Assemble the matrix into useable form (note numerical values not yet set) 2231f6d58c54SBarry Smith */ 2232f6d58c54SBarry Smith /* set the b->ilen (length of each row) values */ 2233580bdb30SBarry Smith ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr); 2234f6d58c54SBarry Smith /* set the b->i indices */ 2235f6d58c54SBarry Smith b->i[0] = 0; 2236f6d58c54SBarry Smith for (i=1; i<=A->rmap->N/bs; i++) { 2237f6d58c54SBarry Smith b->i[i] = b->i[i-1] + lens[i-1]; 2238f6d58c54SBarry Smith } 2239f6d58c54SBarry Smith ierr = PetscFree(lens);CHKERRQ(ierr); 2240f6d58c54SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2241f6d58c54SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2242f6d58c54SBarry Smith ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2243f6d58c54SBarry Smith 2244f6d58c54SBarry Smith if (A->symmetric) { 2245f6d58c54SBarry Smith ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2246f6d58c54SBarry Smith } else if (A->hermitian) { 2247f6d58c54SBarry Smith ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2248f6d58c54SBarry Smith } else if (A->structurally_symmetric) { 2249f6d58c54SBarry Smith ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2250f6d58c54SBarry Smith } 2251f6d58c54SBarry Smith *newmat = B; 2252f6d58c54SBarry Smith PetscFunctionReturn(0); 2253f6d58c54SBarry Smith } 2254f6d58c54SBarry Smith 2255b1a666ecSBarry Smith PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2256b1a666ecSBarry Smith { 2257b1a666ecSBarry Smith Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2258b1a666ecSBarry Smith PetscErrorCode ierr; 2259f4259b30SLisandro Dalcin Vec bb1 = NULL; 2260b1a666ecSBarry Smith 2261b1a666ecSBarry Smith PetscFunctionBegin; 2262b1a666ecSBarry Smith if (flag == SOR_APPLY_UPPER) { 2263b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2264b1a666ecSBarry Smith PetscFunctionReturn(0); 2265b1a666ecSBarry Smith } 2266b1a666ecSBarry Smith 22674e980039SJed Brown if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 22684e980039SJed Brown ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 22694e980039SJed Brown } 22704e980039SJed Brown 2271b1a666ecSBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2272b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 2273b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2274b1a666ecSBarry Smith its--; 2275b1a666ecSBarry Smith } 2276b1a666ecSBarry Smith 2277b1a666ecSBarry Smith while (its--) { 2278b1a666ecSBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2279b1a666ecSBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2280b1a666ecSBarry Smith 2281b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 2282b1a666ecSBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2283b1a666ecSBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2284b1a666ecSBarry Smith 2285b1a666ecSBarry Smith /* local sweep */ 2286b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2287b1a666ecSBarry Smith } 2288b1a666ecSBarry Smith } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2289b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 2290b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2291b1a666ecSBarry Smith its--; 2292b1a666ecSBarry Smith } 2293b1a666ecSBarry Smith while (its--) { 2294b1a666ecSBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2295b1a666ecSBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2296b1a666ecSBarry Smith 2297b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 2298b1a666ecSBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2299b1a666ecSBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2300b1a666ecSBarry Smith 2301b1a666ecSBarry Smith /* local sweep */ 2302b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2303b1a666ecSBarry Smith } 2304b1a666ecSBarry Smith } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2305b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 2306b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2307b1a666ecSBarry Smith its--; 2308b1a666ecSBarry Smith } 2309b1a666ecSBarry Smith while (its--) { 2310b1a666ecSBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2311b1a666ecSBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2312b1a666ecSBarry Smith 2313b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 2314b1a666ecSBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2315b1a666ecSBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2316b1a666ecSBarry Smith 2317b1a666ecSBarry Smith /* local sweep */ 2318b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2319b1a666ecSBarry Smith } 2320ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2321b1a666ecSBarry Smith 23226bf464f9SBarry Smith ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2323b1a666ecSBarry Smith PetscFunctionReturn(0); 2324b1a666ecSBarry Smith } 2325b1a666ecSBarry Smith 232647f7623dSRémi Lacroix PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 232747f7623dSRémi Lacroix { 232847f7623dSRémi Lacroix PetscErrorCode ierr; 232947f7623dSRémi Lacroix Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 233047f7623dSRémi Lacroix PetscInt N,i,*garray = aij->garray; 233147f7623dSRémi Lacroix PetscInt ib,jb,bs = A->rmap->bs; 233247f7623dSRémi Lacroix Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 233347f7623dSRémi Lacroix MatScalar *a_val = a_aij->a; 233447f7623dSRémi Lacroix Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 233547f7623dSRémi Lacroix MatScalar *b_val = b_aij->a; 233647f7623dSRémi Lacroix PetscReal *work; 233747f7623dSRémi Lacroix 233847f7623dSRémi Lacroix PetscFunctionBegin; 233947f7623dSRémi Lacroix ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 23401795a4d1SJed Brown ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 234147f7623dSRémi Lacroix if (type == NORM_2) { 234247f7623dSRémi Lacroix for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 234347f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 234447f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 234547f7623dSRémi Lacroix work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 234647f7623dSRémi Lacroix a_val++; 234747f7623dSRémi Lacroix } 234847f7623dSRémi Lacroix } 234947f7623dSRémi Lacroix } 235047f7623dSRémi Lacroix for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 235147f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 235247f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 235347f7623dSRémi Lacroix work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 235447f7623dSRémi Lacroix b_val++; 235547f7623dSRémi Lacroix } 235647f7623dSRémi Lacroix } 235747f7623dSRémi Lacroix } 235847f7623dSRémi Lacroix } else if (type == NORM_1) { 235947f7623dSRémi Lacroix for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 236047f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 236147f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 236247f7623dSRémi Lacroix work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 236347f7623dSRémi Lacroix a_val++; 236447f7623dSRémi Lacroix } 236547f7623dSRémi Lacroix } 236647f7623dSRémi Lacroix } 236747f7623dSRémi Lacroix for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 236847f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 236947f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 237047f7623dSRémi Lacroix work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 237147f7623dSRémi Lacroix b_val++; 237247f7623dSRémi Lacroix } 237347f7623dSRémi Lacroix } 237447f7623dSRémi Lacroix } 237547f7623dSRémi Lacroix } else if (type == NORM_INFINITY) { 237647f7623dSRémi Lacroix for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 237747f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 237847f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 237947f7623dSRémi Lacroix int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 238047f7623dSRémi Lacroix work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 238147f7623dSRémi Lacroix a_val++; 238247f7623dSRémi Lacroix } 238347f7623dSRémi Lacroix } 238447f7623dSRémi Lacroix } 238547f7623dSRémi Lacroix for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 238647f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 238747f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 238847f7623dSRémi Lacroix int col = garray[b_aij->j[i]] * bs + jb; 238947f7623dSRémi Lacroix work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 239047f7623dSRémi Lacroix b_val++; 239147f7623dSRémi Lacroix } 239247f7623dSRémi Lacroix } 239347f7623dSRémi Lacroix } 239447f7623dSRémi Lacroix } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 239547f7623dSRémi Lacroix if (type == NORM_INFINITY) { 2396b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 239747f7623dSRémi Lacroix } else { 2398b2566f29SBarry Smith ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 239947f7623dSRémi Lacroix } 240047f7623dSRémi Lacroix ierr = PetscFree(work);CHKERRQ(ierr); 240147f7623dSRémi Lacroix if (type == NORM_2) { 240247f7623dSRémi Lacroix for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 240347f7623dSRémi Lacroix } 240447f7623dSRémi Lacroix PetscFunctionReturn(0); 240547f7623dSRémi Lacroix } 240647f7623dSRémi Lacroix 2407713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2408bbead8a2SBarry Smith { 2409bbead8a2SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2410bbead8a2SBarry Smith PetscErrorCode ierr; 2411bbead8a2SBarry Smith 2412bbead8a2SBarry Smith PetscFunctionBegin; 2413bbead8a2SBarry Smith ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 24147b6c816cSBarry Smith A->factorerrortype = a->A->factorerrortype; 24157b6c816cSBarry Smith A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 24167b6c816cSBarry Smith A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2417bbead8a2SBarry Smith PetscFunctionReturn(0); 2418bbead8a2SBarry Smith } 2419bbead8a2SBarry Smith 24207d68702bSBarry Smith PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 24217d68702bSBarry Smith { 24227d68702bSBarry Smith PetscErrorCode ierr; 24237d68702bSBarry Smith Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 24246f33a894SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 24257d68702bSBarry Smith 24267d68702bSBarry Smith PetscFunctionBegin; 24276f33a894SBarry Smith if (!Y->preallocated) { 24287d68702bSBarry Smith ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 24296f33a894SBarry Smith } else if (!aij->nz) { 2430b83222d8SBarry Smith PetscInt nonew = aij->nonew; 24316f33a894SBarry Smith ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2432b83222d8SBarry Smith aij->nonew = nonew; 24337d68702bSBarry Smith } 24347d68702bSBarry Smith ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 24357d68702bSBarry Smith PetscFunctionReturn(0); 24367d68702bSBarry Smith } 24378c7482ecSBarry Smith 24383b49f96aSBarry Smith PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 24393b49f96aSBarry Smith { 24403b49f96aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 24413b49f96aSBarry Smith PetscErrorCode ierr; 24423b49f96aSBarry Smith 24433b49f96aSBarry Smith PetscFunctionBegin; 24443b49f96aSBarry Smith if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 24453b49f96aSBarry Smith ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 24463b49f96aSBarry Smith if (d) { 24473b49f96aSBarry Smith PetscInt rstart; 24483b49f96aSBarry Smith ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 24493b49f96aSBarry Smith *d += rstart/A->rmap->bs; 24503b49f96aSBarry Smith 24513b49f96aSBarry Smith } 24523b49f96aSBarry Smith PetscFunctionReturn(0); 24533b49f96aSBarry Smith } 24543b49f96aSBarry Smith 2455a5b7ff6bSBarry Smith PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2456a5b7ff6bSBarry Smith { 2457a5b7ff6bSBarry Smith PetscFunctionBegin; 2458a5b7ff6bSBarry Smith *a = ((Mat_MPIBAIJ*)A->data)->A; 2459a5b7ff6bSBarry Smith PetscFunctionReturn(0); 2460a5b7ff6bSBarry Smith } 2461a5b7ff6bSBarry Smith 246279bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 24633964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2464cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 2465cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 2466cc2dc46cSBarry Smith MatMult_MPIBAIJ, 246797304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ, 24687c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 24697c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 2470f4259b30SLisandro Dalcin NULL, 2471f4259b30SLisandro Dalcin NULL, 2472f4259b30SLisandro Dalcin NULL, 2473f4259b30SLisandro Dalcin /*10*/ NULL, 2474f4259b30SLisandro Dalcin NULL, 2475f4259b30SLisandro Dalcin NULL, 2476b1a666ecSBarry Smith MatSOR_MPIBAIJ, 2477cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 247897304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ, 24797fc3c18eSBarry Smith MatEqual_MPIBAIJ, 2480cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 2481cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 2482cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 248397304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ, 2484cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 2485cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 2486cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 2487d519adbfSMatthew Knepley /*24*/ MatZeroRows_MPIBAIJ, 2488f4259b30SLisandro Dalcin NULL, 2489f4259b30SLisandro Dalcin NULL, 2490f4259b30SLisandro Dalcin NULL, 2491f4259b30SLisandro Dalcin NULL, 24924994cf47SJed Brown /*29*/ MatSetUp_MPIBAIJ, 2493f4259b30SLisandro Dalcin NULL, 2494f4259b30SLisandro Dalcin NULL, 2495a5b7ff6bSBarry Smith MatGetDiagonalBlock_MPIBAIJ, 2496f4259b30SLisandro Dalcin NULL, 2497d519adbfSMatthew Knepley /*34*/ MatDuplicate_MPIBAIJ, 2498f4259b30SLisandro Dalcin NULL, 2499f4259b30SLisandro Dalcin NULL, 2500f4259b30SLisandro Dalcin NULL, 2501f4259b30SLisandro Dalcin NULL, 2502d519adbfSMatthew Knepley /*39*/ MatAXPY_MPIBAIJ, 25037dae84e0SHong Zhang MatCreateSubMatrices_MPIBAIJ, 2504cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 2505cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 25063c896bc6SHong Zhang MatCopy_MPIBAIJ, 2507f4259b30SLisandro Dalcin /*44*/ NULL, 2508cc2dc46cSBarry Smith MatScale_MPIBAIJ, 25097d68702bSBarry Smith MatShift_MPIBAIJ, 2510f4259b30SLisandro Dalcin NULL, 25116f0a72daSMatthew G. Knepley MatZeroRowsColumns_MPIBAIJ, 2512f4259b30SLisandro Dalcin /*49*/ NULL, 2513f4259b30SLisandro Dalcin NULL, 2514f4259b30SLisandro Dalcin NULL, 2515f4259b30SLisandro Dalcin NULL, 2516f4259b30SLisandro Dalcin NULL, 251793dfae19SHong Zhang /*54*/ MatFDColoringCreate_MPIXAIJ, 2518f4259b30SLisandro Dalcin NULL, 2519cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 252082094794SBarry Smith MatPermute_MPIBAIJ, 2521cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 25227dae84e0SHong Zhang /*59*/ MatCreateSubMatrix_MPIBAIJ, 2523f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 2524f14a1c24SBarry Smith MatView_MPIBAIJ, 2525f4259b30SLisandro Dalcin NULL, 2526f4259b30SLisandro Dalcin NULL, 2527f4259b30SLisandro Dalcin /*64*/ NULL, 2528f4259b30SLisandro Dalcin NULL, 2529f4259b30SLisandro Dalcin NULL, 2530f4259b30SLisandro Dalcin NULL, 2531f4259b30SLisandro Dalcin NULL, 2532d519adbfSMatthew Knepley /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2533f4259b30SLisandro Dalcin NULL, 2534f4259b30SLisandro Dalcin NULL, 2535f4259b30SLisandro Dalcin NULL, 2536f4259b30SLisandro Dalcin NULL, 2537f4259b30SLisandro Dalcin /*74*/ NULL, 2538f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 2539f4259b30SLisandro Dalcin NULL, 2540f4259b30SLisandro Dalcin NULL, 2541f4259b30SLisandro Dalcin NULL, 2542f4259b30SLisandro Dalcin /*79*/ NULL, 2543f4259b30SLisandro Dalcin NULL, 2544f4259b30SLisandro Dalcin NULL, 2545f4259b30SLisandro Dalcin NULL, 25465bba2384SShri Abhyankar MatLoad_MPIBAIJ, 2547f4259b30SLisandro Dalcin /*84*/ NULL, 2548f4259b30SLisandro Dalcin NULL, 2549f4259b30SLisandro Dalcin NULL, 2550f4259b30SLisandro Dalcin NULL, 2551f4259b30SLisandro Dalcin NULL, 2552f4259b30SLisandro Dalcin /*89*/ NULL, 2553f4259b30SLisandro Dalcin NULL, 2554f4259b30SLisandro Dalcin NULL, 2555f4259b30SLisandro Dalcin NULL, 2556f4259b30SLisandro Dalcin NULL, 2557f4259b30SLisandro Dalcin /*94*/ NULL, 2558f4259b30SLisandro Dalcin NULL, 2559f4259b30SLisandro Dalcin NULL, 2560f4259b30SLisandro Dalcin NULL, 2561f4259b30SLisandro Dalcin NULL, 2562f4259b30SLisandro Dalcin /*99*/ NULL, 2563f4259b30SLisandro Dalcin NULL, 2564f4259b30SLisandro Dalcin NULL, 25652726fb6dSPierre Jolivet MatConjugate_MPIBAIJ, 2566f4259b30SLisandro Dalcin NULL, 2567f4259b30SLisandro Dalcin /*104*/NULL, 256899cafbc1SBarry Smith MatRealPart_MPIBAIJ, 25698c7482ecSBarry Smith MatImaginaryPart_MPIBAIJ, 2570f4259b30SLisandro Dalcin NULL, 2571f4259b30SLisandro Dalcin NULL, 2572f4259b30SLisandro Dalcin /*109*/NULL, 2573f4259b30SLisandro Dalcin NULL, 2574f4259b30SLisandro Dalcin NULL, 2575f4259b30SLisandro Dalcin NULL, 25763b49f96aSBarry Smith MatMissingDiagonal_MPIBAIJ, 2577d1adec66SJed Brown /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2578f4259b30SLisandro Dalcin NULL, 25794683f7a4SShri Abhyankar MatGetGhosts_MPIBAIJ, 2580f4259b30SLisandro Dalcin NULL, 2581f4259b30SLisandro Dalcin NULL, 2582f4259b30SLisandro Dalcin /*119*/NULL, 2583f4259b30SLisandro Dalcin NULL, 2584f4259b30SLisandro Dalcin NULL, 2585f4259b30SLisandro Dalcin NULL, 2586e8271787SHong Zhang MatGetMultiProcBlock_MPIBAIJ, 2587f4259b30SLisandro Dalcin /*124*/NULL, 258847f7623dSRémi Lacroix MatGetColumnNorms_MPIBAIJ, 25893964eb88SJed Brown MatInvertBlockDiagonal_MPIBAIJ, 2590f4259b30SLisandro Dalcin NULL, 2591f4259b30SLisandro Dalcin NULL, 2592f4259b30SLisandro Dalcin /*129*/ NULL, 2593f4259b30SLisandro Dalcin NULL, 2594f4259b30SLisandro Dalcin NULL, 2595f4259b30SLisandro Dalcin NULL, 2596f4259b30SLisandro Dalcin NULL, 2597f4259b30SLisandro Dalcin /*134*/ NULL, 2598f4259b30SLisandro Dalcin NULL, 2599f4259b30SLisandro Dalcin NULL, 2600f4259b30SLisandro Dalcin NULL, 2601f4259b30SLisandro Dalcin NULL, 260246533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 2603f4259b30SLisandro Dalcin NULL, 2604f4259b30SLisandro Dalcin NULL, 2605bdf6f3fcSHong Zhang MatFDColoringSetUp_MPIXAIJ, 2606f4259b30SLisandro Dalcin NULL, 2607bdf6f3fcSHong Zhang /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 26088c7482ecSBarry Smith }; 260979bdfe76SSatish Balay 261079bdfe76SSatish Balay 2611cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2612c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2613d94109b8SHong Zhang 2614cf12db73SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2615aac34f13SBarry Smith { 2616b8d659d7SLisandro Dalcin PetscInt m,rstart,cstart,cend; 2617f4259b30SLisandro Dalcin PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 2618f4259b30SLisandro Dalcin const PetscInt *JJ =NULL; 2619f4259b30SLisandro Dalcin PetscScalar *values=NULL; 2620d47bf9aaSJed Brown PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2621aac34f13SBarry Smith PetscErrorCode ierr; 26223bd0feecSPierre Jolivet PetscBool nooffprocentries; 2623aac34f13SBarry Smith 2624aac34f13SBarry Smith PetscFunctionBegin; 262526283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 262626283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 262726283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 262826283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2629e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2630d0f46423SBarry Smith m = B->rmap->n/bs; 2631d0f46423SBarry Smith rstart = B->rmap->rstart/bs; 2632d0f46423SBarry Smith cstart = B->cmap->rstart/bs; 2633d0f46423SBarry Smith cend = B->cmap->rend/bs; 2634b8d659d7SLisandro Dalcin 2635e32f2f54SBarry Smith if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2636dcca6d9dSJed Brown ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2637aac34f13SBarry Smith for (i=0; i<m; i++) { 2638cf12db73SBarry Smith nz = ii[i+1] - ii[i]; 2639e32f2f54SBarry Smith if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2640b8d659d7SLisandro Dalcin nz_max = PetscMax(nz_max,nz); 264137cd3c0dSBarry Smith dlen = 0; 264237cd3c0dSBarry Smith olen = 0; 2643cf12db73SBarry Smith JJ = jj + ii[i]; 2644b8d659d7SLisandro Dalcin for (j=0; j<nz; j++) { 264537cd3c0dSBarry Smith if (*JJ < cstart || *JJ >= cend) olen++; 264637cd3c0dSBarry Smith else dlen++; 2647aac34f13SBarry Smith JJ++; 2648aac34f13SBarry Smith } 264937cd3c0dSBarry Smith d_nnz[i] = dlen; 265037cd3c0dSBarry Smith o_nnz[i] = olen; 2651aac34f13SBarry Smith } 2652aac34f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2653fca92195SBarry Smith ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2654aac34f13SBarry Smith 2655b8d659d7SLisandro Dalcin values = (PetscScalar*)V; 2656b8d659d7SLisandro Dalcin if (!values) { 265737cd3c0dSBarry Smith ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2658b8d659d7SLisandro Dalcin } 2659b8d659d7SLisandro Dalcin for (i=0; i<m; i++) { 2660b8d659d7SLisandro Dalcin PetscInt row = i + rstart; 2661cf12db73SBarry Smith PetscInt ncols = ii[i+1] - ii[i]; 2662cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 2663bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2664cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2665b8d659d7SLisandro Dalcin ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 26663adadaf3SJed Brown } else { /* block ordering does not match so we can only insert one block at a time. */ 26673adadaf3SJed Brown PetscInt j; 26683adadaf3SJed Brown for (j=0; j<ncols; j++) { 26693adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 26703adadaf3SJed Brown ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 26713adadaf3SJed Brown } 26723adadaf3SJed Brown } 2673aac34f13SBarry Smith } 2674aac34f13SBarry Smith 2675b8d659d7SLisandro Dalcin if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 26763bd0feecSPierre Jolivet nooffprocentries = B->nooffprocentries; 26773bd0feecSPierre Jolivet B->nooffprocentries = PETSC_TRUE; 2678aac34f13SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2679aac34f13SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 26803bd0feecSPierre Jolivet B->nooffprocentries = nooffprocentries; 26813bd0feecSPierre Jolivet 26827827cd58SJed Brown ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2683aac34f13SBarry Smith PetscFunctionReturn(0); 2684aac34f13SBarry Smith } 2685aac34f13SBarry Smith 2686aac34f13SBarry Smith /*@C 2687664954b6SBarry Smith MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2688aac34f13SBarry Smith 2689d083f849SBarry Smith Collective 2690aac34f13SBarry Smith 2691aac34f13SBarry Smith Input Parameters: 26921c4f3114SJed Brown + B - the matrix 2693dfb205c3SBarry Smith . bs - the block size 2694aac34f13SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2695aac34f13SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2696aac34f13SBarry Smith - v - optional values in the matrix 2697aac34f13SBarry Smith 2698664954b6SBarry Smith Level: advanced 2699aac34f13SBarry Smith 270095452b02SPatrick Sanan Notes: 270195452b02SPatrick Sanan The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 27023adadaf3SJed Brown may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 27033adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 27043adadaf3SJed Brown MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 27053adadaf3SJed Brown block column and the second index is over columns within a block. 27063adadaf3SJed Brown 2707664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2708664954b6SBarry Smith 27093adadaf3SJed Brown .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2710aac34f13SBarry Smith @*/ 27117087cfbeSBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2712aac34f13SBarry Smith { 27134ac538c5SBarry Smith PetscErrorCode ierr; 2714aac34f13SBarry Smith 2715aac34f13SBarry Smith PetscFunctionBegin; 27166ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 27176ba663aaSJed Brown PetscValidType(B,1); 27186ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 27194ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2720aac34f13SBarry Smith PetscFunctionReturn(0); 2721aac34f13SBarry Smith } 2722aac34f13SBarry Smith 2723b2573a8aSBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2724a23d5eceSKris Buschelman { 2725a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 2726dfbe8321SBarry Smith PetscErrorCode ierr; 2727535b19f3SBarry Smith PetscInt i; 27285d2a9ed1SStefano Zampini PetscMPIInt size; 2729a23d5eceSKris Buschelman 2730a23d5eceSKris Buschelman PetscFunctionBegin; 273133d57670SJed Brown ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 273226283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 273326283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2734e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2735899cda47SBarry Smith 2736a23d5eceSKris Buschelman if (d_nnz) { 2737d0f46423SBarry Smith for (i=0; i<B->rmap->n/bs; i++) { 2738e32f2f54SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2739a23d5eceSKris Buschelman } 2740a23d5eceSKris Buschelman } 2741a23d5eceSKris Buschelman if (o_nnz) { 2742d0f46423SBarry Smith for (i=0; i<B->rmap->n/bs; i++) { 2743e32f2f54SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2744a23d5eceSKris Buschelman } 2745a23d5eceSKris Buschelman } 2746a23d5eceSKris Buschelman 2747a23d5eceSKris Buschelman b = (Mat_MPIBAIJ*)B->data; 2748a23d5eceSKris Buschelman b->bs2 = bs*bs; 2749d0f46423SBarry Smith b->mbs = B->rmap->n/bs; 2750d0f46423SBarry Smith b->nbs = B->cmap->n/bs; 2751d0f46423SBarry Smith b->Mbs = B->rmap->N/bs; 2752d0f46423SBarry Smith b->Nbs = B->cmap->N/bs; 2753a23d5eceSKris Buschelman 2754a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2755d0f46423SBarry Smith b->rangebs[i] = B->rmap->range[i]/bs; 2756a23d5eceSKris Buschelman } 2757d0f46423SBarry Smith b->rstartbs = B->rmap->rstart/bs; 2758d0f46423SBarry Smith b->rendbs = B->rmap->rend/bs; 2759d0f46423SBarry Smith b->cstartbs = B->cmap->rstart/bs; 2760d0f46423SBarry Smith b->cendbs = B->cmap->rend/bs; 2761a23d5eceSKris Buschelman 2762cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE) 2763cb7b82ddSBarry Smith ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2764cb7b82ddSBarry Smith #else 2765cb7b82ddSBarry Smith ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2766cb7b82ddSBarry Smith #endif 2767cb7b82ddSBarry Smith ierr = PetscFree(b->garray);CHKERRQ(ierr); 2768cb7b82ddSBarry Smith ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2769cb7b82ddSBarry Smith ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2770cb7b82ddSBarry Smith 2771cb7b82ddSBarry Smith /* Because the B will have been resized we simply destroy it and create a new one each time */ 2772ffc4695bSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2773cb7b82ddSBarry Smith ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2774cb7b82ddSBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 27755d2a9ed1SStefano Zampini ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2776cb7b82ddSBarry Smith ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2777cb7b82ddSBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2778cb7b82ddSBarry Smith 2779526dfc15SBarry Smith if (!B->preallocated) { 2780f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2781d0f46423SBarry Smith ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 27829c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 27833bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2784ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2785526dfc15SBarry Smith } 2786a23d5eceSKris Buschelman 2787526dfc15SBarry Smith ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2788526dfc15SBarry Smith ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2789526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 2790cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 2791cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 2792a23d5eceSKris Buschelman PetscFunctionReturn(0); 2793a23d5eceSKris Buschelman } 2794a23d5eceSKris Buschelman 27957087cfbeSBarry Smith extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 27967087cfbeSBarry Smith extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 27975bf65638SKris Buschelman 2798cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 279982094794SBarry Smith { 280082094794SBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 280182094794SBarry Smith PetscErrorCode ierr; 280282094794SBarry Smith Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 280382094794SBarry Smith PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 280482094794SBarry Smith const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 280582094794SBarry Smith 280682094794SBarry Smith PetscFunctionBegin; 2807854ce69bSBarry Smith ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 280882094794SBarry Smith ii[0] = 0; 280982094794SBarry Smith for (i=0; i<M; i++) { 2810e32f2f54SBarry Smith if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2811e32f2f54SBarry Smith if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 281282094794SBarry Smith ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 28135ee9ba1cSJed Brown /* remove one from count of matrix has diagonal */ 28145ee9ba1cSJed Brown for (j=id[i]; j<id[i+1]; j++) { 28155ee9ba1cSJed Brown if (jd[j] == i) {ii[i+1]--;break;} 28165ee9ba1cSJed Brown } 281782094794SBarry Smith } 2818785e854fSJed Brown ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 281982094794SBarry Smith cnt = 0; 282082094794SBarry Smith for (i=0; i<M; i++) { 282182094794SBarry Smith for (j=io[i]; j<io[i+1]; j++) { 282282094794SBarry Smith if (garray[jo[j]] > rstart) break; 282382094794SBarry Smith jj[cnt++] = garray[jo[j]]; 282482094794SBarry Smith } 282582094794SBarry Smith for (k=id[i]; k<id[i+1]; k++) { 28265ee9ba1cSJed Brown if (jd[k] != i) { 282782094794SBarry Smith jj[cnt++] = rstart + jd[k]; 282882094794SBarry Smith } 28295ee9ba1cSJed Brown } 283082094794SBarry Smith for (; j<io[i+1]; j++) { 283182094794SBarry Smith jj[cnt++] = garray[jo[j]]; 283282094794SBarry Smith } 283382094794SBarry Smith } 2834ce94432eSBarry Smith ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 283582094794SBarry Smith PetscFunctionReturn(0); 283682094794SBarry Smith } 283782094794SBarry Smith 2838c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 283962471d69SBarry Smith 2840cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2841b2573a8aSBarry Smith 2842cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 284362471d69SBarry Smith { 284462471d69SBarry Smith PetscErrorCode ierr; 284562471d69SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 284662471d69SBarry Smith Mat B; 284785a69837SSatish Balay Mat_MPIAIJ *b; 284862471d69SBarry Smith 284962471d69SBarry Smith PetscFunctionBegin; 2850ce94432eSBarry Smith if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 285162471d69SBarry Smith 28520f6d62edSLisandro Dalcin if (reuse == MAT_REUSE_MATRIX) { 28530f6d62edSLisandro Dalcin B = *newmat; 28540f6d62edSLisandro Dalcin } else { 2855ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 28566d0a4a0eSHong Zhang ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2857f090d951SRémi Lacroix ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2858f090d951SRémi Lacroix ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 28590298fd71SBarry Smith ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 28600298fd71SBarry Smith ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 28610f6d62edSLisandro Dalcin } 286262471d69SBarry Smith b = (Mat_MPIAIJ*) B->data; 286362471d69SBarry Smith 28640f6d62edSLisandro Dalcin if (reuse == MAT_REUSE_MATRIX) { 28650f6d62edSLisandro Dalcin ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 28660f6d62edSLisandro Dalcin ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 28670f6d62edSLisandro Dalcin } else { 28686bf464f9SBarry Smith ierr = MatDestroy(&b->A);CHKERRQ(ierr); 28696bf464f9SBarry Smith ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2870ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 287162471d69SBarry Smith ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 287262471d69SBarry Smith ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 28736a719282SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 28746a719282SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 28750f6d62edSLisandro Dalcin } 28760f6d62edSLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 28770f6d62edSLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 28780f6d62edSLisandro Dalcin 2879511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 288028be2f97SBarry Smith ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 288162471d69SBarry Smith } else { 288262471d69SBarry Smith *newmat = B; 288362471d69SBarry Smith } 288462471d69SBarry Smith PetscFunctionReturn(0); 288562471d69SBarry Smith } 288662471d69SBarry Smith 28870bad9183SKris Buschelman /*MC 2888fafad747SKris Buschelman MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 28890bad9183SKris Buschelman 28900bad9183SKris Buschelman Options Database Keys: 28918c07d4e3SBarry Smith + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 28928c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix 2893*6679dcc1SBarry Smith . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 28948c07d4e3SBarry Smith - -mat_use_hash_table <fact> 28950bad9183SKris Buschelman 28960bad9183SKris Buschelman Level: beginner 28970cd7f59aSBarry Smith 28980cd7f59aSBarry Smith Notes: 28990cd7f59aSBarry Smith MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 29000cd7f59aSBarry Smith space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 29010bad9183SKris Buschelman 2902fd292e60Sprj- .seealso: MatCreateBAIJ 29030bad9183SKris Buschelman M*/ 29040bad9183SKris Buschelman 2905cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2906c0cdd4a1SDahai Guo 29078cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2908273d9f13SBarry Smith { 2909273d9f13SBarry Smith Mat_MPIBAIJ *b; 2910dfbe8321SBarry Smith PetscErrorCode ierr; 291194ae4db5SBarry Smith PetscBool flg = PETSC_FALSE; 2912273d9f13SBarry Smith 2913273d9f13SBarry Smith PetscFunctionBegin; 2914b00a9115SJed Brown ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 291582502324SSatish Balay B->data = (void*)b; 291682502324SSatish Balay 2917273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2918273d9f13SBarry Smith B->assembled = PETSC_FALSE; 2919273d9f13SBarry Smith 2920273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 292155b25c41SPierre Jolivet ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 292255b25c41SPierre Jolivet ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr); 2923273d9f13SBarry Smith 2924273d9f13SBarry Smith /* build local table of row and column ownerships */ 2925854ce69bSBarry Smith ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 2926273d9f13SBarry Smith 2927273d9f13SBarry Smith /* build cache for off array entries formed */ 2928ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 292926fbe8dcSKarl Rupp 2930273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 29310298fd71SBarry Smith b->colmap = NULL; 29320298fd71SBarry Smith b->garray = NULL; 2933273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 2934273d9f13SBarry Smith 2935273d9f13SBarry Smith /* stuff used in block assembly */ 2936f4259b30SLisandro Dalcin b->barray = NULL; 2937273d9f13SBarry Smith 2938273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 2939f4259b30SLisandro Dalcin b->lvec = NULL; 2940f4259b30SLisandro Dalcin b->Mvctx = NULL; 2941273d9f13SBarry Smith 2942273d9f13SBarry Smith /* stuff for MatGetRow() */ 2943f4259b30SLisandro Dalcin b->rowindices = NULL; 2944f4259b30SLisandro Dalcin b->rowvalues = NULL; 2945273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 2946273d9f13SBarry Smith 2947273d9f13SBarry Smith /* hash table stuff */ 2948f4259b30SLisandro Dalcin b->ht = NULL; 2949f4259b30SLisandro Dalcin b->hd = NULL; 2950273d9f13SBarry Smith b->ht_size = 0; 2951273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 2952273d9f13SBarry Smith b->ht_fact = 0; 2953273d9f13SBarry Smith b->ht_total_ct = 0; 2954273d9f13SBarry Smith b->ht_insert_ct = 0; 2955273d9f13SBarry Smith 29567dae84e0SHong Zhang /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 29577a868f3eSHong Zhang b->ijonly = PETSC_FALSE; 29587a868f3eSHong Zhang 29598c07d4e3SBarry Smith 2960bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2961bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 2962bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 29637ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 29647ea3e4caSstefano_zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 29657ea3e4caSstefano_zampini #endif 2966bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 2967bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 2968bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 2969bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 2970bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 2971bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 2972c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 297317667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 297494ae4db5SBarry Smith 297594ae4db5SBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 2976abf3b562SBarry Smith ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 297794ae4db5SBarry Smith if (flg) { 297894ae4db5SBarry Smith PetscReal fact = 1.39; 297994ae4db5SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 298094ae4db5SBarry Smith ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 298194ae4db5SBarry Smith if (fact <= 1.0) fact = 1.39; 298294ae4db5SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 298394ae4db5SBarry Smith ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 298494ae4db5SBarry Smith } 298594ae4db5SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 2986273d9f13SBarry Smith PetscFunctionReturn(0); 2987273d9f13SBarry Smith } 2988273d9f13SBarry Smith 2989209238afSKris Buschelman /*MC 2990002d173eSKris Buschelman MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2991209238afSKris Buschelman 2992209238afSKris Buschelman This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 2993209238afSKris Buschelman and MATMPIBAIJ otherwise. 2994209238afSKris Buschelman 2995209238afSKris Buschelman Options Database Keys: 2996209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 2997209238afSKris Buschelman 2998209238afSKris Buschelman Level: beginner 2999209238afSKris Buschelman 300069b1f4b7SBarry Smith .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3001209238afSKris Buschelman M*/ 3002209238afSKris Buschelman 3003273d9f13SBarry Smith /*@C 3004aac34f13SBarry Smith MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3005273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 3006273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 3007273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3008273d9f13SBarry Smith performance can be increased by more than a factor of 50. 3009273d9f13SBarry Smith 3010273d9f13SBarry Smith Collective on Mat 3011273d9f13SBarry Smith 3012273d9f13SBarry Smith Input Parameters: 30131c4f3114SJed Brown + B - the matrix 3014bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3015bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3016273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 3017273d9f13SBarry Smith submatrix (same for all local rows) 3018273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 3019273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 30200298fd71SBarry Smith row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 302195742e49SBarry Smith set it even if it is zero. 3022273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3023273d9f13SBarry Smith submatrix (same for all local rows). 3024273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 3025273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 30260298fd71SBarry Smith each block row) or NULL. 3027273d9f13SBarry Smith 302849a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 3029273d9f13SBarry Smith 3030273d9f13SBarry Smith Options Database Keys: 30318c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 30328c07d4e3SBarry Smith - -mat_use_hash_table <fact> 3033273d9f13SBarry Smith 3034273d9f13SBarry Smith Notes: 3035273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3036273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 3037273d9f13SBarry Smith 3038273d9f13SBarry Smith Storage Information: 3039273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 3040273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 3041273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 3042273d9f13SBarry Smith local matrix (a rectangular submatrix). 3043273d9f13SBarry Smith 3044273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 3045273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 30460298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3047273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 3048273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3049273d9f13SBarry Smith 3050273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3051273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 3052273d9f13SBarry Smith 3053273d9f13SBarry Smith .vb 3054273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 3055a4b1a0f6SJed Brown -------------------------- 3056273d9f13SBarry Smith row 3 |o o o d d d o o o o o o 3057273d9f13SBarry Smith row 4 |o o o d d d o o o o o o 3058273d9f13SBarry Smith row 5 |o o o d d d o o o o o o 3059a4b1a0f6SJed Brown -------------------------- 3060273d9f13SBarry Smith .ve 3061273d9f13SBarry Smith 3062273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 3063273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 3064273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 3065273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 3066273d9f13SBarry Smith 3067273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3068273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 3069273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 3070273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3071273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 3072273d9f13SBarry Smith matrices. 3073273d9f13SBarry Smith 3074aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3075aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3076aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3077aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3078aa95bbe8SBarry Smith 3079273d9f13SBarry Smith Level: intermediate 3080273d9f13SBarry Smith 3081ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3082273d9f13SBarry Smith @*/ 30837087cfbeSBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3084273d9f13SBarry Smith { 30854ac538c5SBarry Smith PetscErrorCode ierr; 3086273d9f13SBarry Smith 3087273d9f13SBarry Smith PetscFunctionBegin; 30886ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 30896ba663aaSJed Brown PetscValidType(B,1); 30906ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 30914ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3092273d9f13SBarry Smith PetscFunctionReturn(0); 3093273d9f13SBarry Smith } 3094273d9f13SBarry Smith 309579bdfe76SSatish Balay /*@C 309669b1f4b7SBarry Smith MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 309779bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 309879bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 309979bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 310079bdfe76SSatish Balay performance can be increased by more than a factor of 50. 310179bdfe76SSatish Balay 3102d083f849SBarry Smith Collective 3103db81eaa0SLois Curfman McInnes 310479bdfe76SSatish Balay Input Parameters: 3105db81eaa0SLois Curfman McInnes + comm - MPI communicator 3106bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3107bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 310879bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 310992e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 311092e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 311192e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 311292e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 311392e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 3114be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3115be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 311647a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 311779bdfe76SSatish Balay submatrix (same for all local rows) 311847a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 311992e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 31200298fd71SBarry Smith row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 312195742e49SBarry Smith and set it even if it is zero. 312247a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 312379bdfe76SSatish Balay submatrix (same for all local rows). 312447a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 312592e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 31260298fd71SBarry Smith each block row) or NULL. 312779bdfe76SSatish Balay 312879bdfe76SSatish Balay Output Parameter: 312979bdfe76SSatish Balay . A - the matrix 313079bdfe76SSatish Balay 3131db81eaa0SLois Curfman McInnes Options Database Keys: 31328c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 31338c07d4e3SBarry Smith - -mat_use_hash_table <fact> 31343ffaccefSLois Curfman McInnes 3135175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3136f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 3137175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3138175b88e8SBarry Smith 3139b259b22eSLois Curfman McInnes Notes: 314049a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 314149a6f317SBarry Smith 314247a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 314347a75d0bSBarry Smith 314479bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 314579bdfe76SSatish Balay (possibly both). 314679bdfe76SSatish Balay 3147be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3148be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 3149be79a94dSBarry Smith 315079bdfe76SSatish Balay Storage Information: 315179bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 315279bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 315379bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 315479bdfe76SSatish Balay local matrix (a rectangular submatrix). 315579bdfe76SSatish Balay 315679bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 315779bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 31580298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 315979bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 316079bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 316179bdfe76SSatish Balay 316279bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 316379bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 316479bdfe76SSatish Balay 3165db81eaa0SLois Curfman McInnes .vb 3166db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 3167a4b1a0f6SJed Brown -------------------------- 3168db81eaa0SLois Curfman McInnes row 3 |o o o d d d o o o o o o 3169db81eaa0SLois Curfman McInnes row 4 |o o o d d d o o o o o o 3170db81eaa0SLois Curfman McInnes row 5 |o o o d d d o o o o o o 3171a4b1a0f6SJed Brown -------------------------- 3172db81eaa0SLois Curfman McInnes .ve 317379bdfe76SSatish Balay 317479bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 317579bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 317679bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 317757b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 317879bdfe76SSatish Balay 3179d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3180d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 318179bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 318292e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 318392e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 31846da5968aSLois Curfman McInnes matrices. 318579bdfe76SSatish Balay 3186027ccd11SLois Curfman McInnes Level: intermediate 3187027ccd11SLois Curfman McInnes 318869b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 318979bdfe76SSatish Balay @*/ 319069b1f4b7SBarry Smith PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 319179bdfe76SSatish Balay { 31926849ba73SBarry Smith PetscErrorCode ierr; 3193b24ad042SBarry Smith PetscMPIInt size; 319479bdfe76SSatish Balay 3195d64ed03dSBarry Smith PetscFunctionBegin; 3196f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 3197f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3198ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3199273d9f13SBarry Smith if (size > 1) { 3200273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3201273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3202273d9f13SBarry Smith } else { 3203273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3204273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 32053914022bSBarry Smith } 32063a40ed3dSBarry Smith PetscFunctionReturn(0); 320779bdfe76SSatish Balay } 3208026e39d0SSatish Balay 32096849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 32100ac07820SSatish Balay { 32110ac07820SSatish Balay Mat mat; 32120ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3213dfbe8321SBarry Smith PetscErrorCode ierr; 3214b24ad042SBarry Smith PetscInt len=0; 32150ac07820SSatish Balay 3216d64ed03dSBarry Smith PetscFunctionBegin; 3217f4259b30SLisandro Dalcin *newmat = NULL; 3218ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3219d0f46423SBarry Smith ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 32207adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 32217fff6886SHong Zhang 3222d5f3da31SBarry Smith mat->factortype = matin->factortype; 3223273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 32240ac07820SSatish Balay mat->assembled = PETSC_TRUE; 32257fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 32267fff6886SHong Zhang 3227273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 3228d0f46423SBarry Smith mat->rmap->bs = matin->rmap->bs; 32290ac07820SSatish Balay a->bs2 = oldmat->bs2; 32300ac07820SSatish Balay a->mbs = oldmat->mbs; 32310ac07820SSatish Balay a->nbs = oldmat->nbs; 32320ac07820SSatish Balay a->Mbs = oldmat->Mbs; 32330ac07820SSatish Balay a->Nbs = oldmat->Nbs; 32340ac07820SSatish Balay 32351e1e43feSBarry Smith ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 32361e1e43feSBarry Smith ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3237899cda47SBarry Smith 32380ac07820SSatish Balay a->size = oldmat->size; 32390ac07820SSatish Balay a->rank = oldmat->rank; 3240aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 3241aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 3242f4259b30SLisandro Dalcin a->rowindices = NULL; 3243f4259b30SLisandro Dalcin a->rowvalues = NULL; 32440ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 3245f4259b30SLisandro Dalcin a->barray = NULL; 3246899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 3247899cda47SBarry Smith a->rendbs = oldmat->rendbs; 3248899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 3249899cda47SBarry Smith a->cendbs = oldmat->cendbs; 32500ac07820SSatish Balay 3251133cdb44SSatish Balay /* hash table stuff */ 3252f4259b30SLisandro Dalcin a->ht = NULL; 3253f4259b30SLisandro Dalcin a->hd = NULL; 3254133cdb44SSatish Balay a->ht_size = 0; 3255133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 325625fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 3257133cdb44SSatish Balay a->ht_total_ct = 0; 3258133cdb44SSatish Balay a->ht_insert_ct = 0; 3259133cdb44SSatish Balay 3260580bdb30SBarry Smith ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr); 32610ac07820SSatish Balay if (oldmat->colmap) { 3262aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 32630f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 326448e59246SSatish Balay #else 3265854ce69bSBarry Smith ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 32663bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3267580bdb30SBarry Smith ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 326848e59246SSatish Balay #endif 3269f4259b30SLisandro Dalcin } else a->colmap = NULL; 32704beb1cfeSHong Zhang 32710ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3272785e854fSJed Brown ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 32733bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3274580bdb30SBarry Smith ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 3275f4259b30SLisandro Dalcin } else a->garray = NULL; 32760ac07820SSatish Balay 3277ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 32780ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 32793bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 32800ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 32813bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 32827fff6886SHong Zhang 32832e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 32843bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 32852e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 32863bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3287140e18c1SBarry Smith ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 32880ac07820SSatish Balay *newmat = mat; 32893a40ed3dSBarry Smith PetscFunctionReturn(0); 32900ac07820SSatish Balay } 329157b952d6SSatish Balay 3292618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3293b51a4376SLisandro Dalcin PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3294b51a4376SLisandro Dalcin { 3295b51a4376SLisandro Dalcin PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3296b51a4376SLisandro Dalcin PetscInt *rowidxs,*colidxs,rs,cs,ce; 3297b51a4376SLisandro Dalcin PetscScalar *matvals; 3298b51a4376SLisandro Dalcin PetscErrorCode ierr; 3299b51a4376SLisandro Dalcin 3300b51a4376SLisandro Dalcin PetscFunctionBegin; 3301b51a4376SLisandro Dalcin ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3302b51a4376SLisandro Dalcin 3303b51a4376SLisandro Dalcin /* read in matrix header */ 3304b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 3305b51a4376SLisandro Dalcin if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3306b51a4376SLisandro Dalcin M = header[1]; N = header[2]; nz = header[3]; 3307b51a4376SLisandro Dalcin if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 3308b51a4376SLisandro Dalcin if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 3309b51a4376SLisandro Dalcin if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3310b51a4376SLisandro Dalcin 3311b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */ 3312b51a4376SLisandro Dalcin ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3313618cc2edSLisandro Dalcin /* set local sizes if not set already */ 3314618cc2edSLisandro Dalcin if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3315618cc2edSLisandro Dalcin if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3316b51a4376SLisandro Dalcin /* set global sizes if not set already */ 3317b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 3318b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 3319b51a4376SLisandro Dalcin ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3320b51a4376SLisandro Dalcin ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3321b51a4376SLisandro Dalcin 3322b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */ 3323b51a4376SLisandro Dalcin ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 3324b51a4376SLisandro Dalcin if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 3325b51a4376SLisandro Dalcin ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3326b51a4376SLisandro Dalcin ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 3327b51a4376SLisandro Dalcin ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL); 3328b51a4376SLisandro Dalcin ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce); 3329b51a4376SLisandro Dalcin mbs = m/bs; nbs = n/bs; 3330b51a4376SLisandro Dalcin 3331b51a4376SLisandro Dalcin /* read in row lengths and build row indices */ 3332b51a4376SLisandro Dalcin ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3333b51a4376SLisandro Dalcin ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3334b51a4376SLisandro Dalcin rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3335b51a4376SLisandro Dalcin ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 3336b51a4376SLisandro Dalcin if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 3337b51a4376SLisandro Dalcin 3338b51a4376SLisandro Dalcin /* read in column indices and matrix values */ 3339b51a4376SLisandro Dalcin ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3340b51a4376SLisandro Dalcin ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3341b51a4376SLisandro Dalcin ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3342b51a4376SLisandro Dalcin 3343b51a4376SLisandro Dalcin { /* preallocate matrix storage */ 3344b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3345b51a4376SLisandro Dalcin PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3346618cc2edSLisandro Dalcin PetscBool sbaij,done; 3347b51a4376SLisandro Dalcin PetscInt *d_nnz,*o_nnz; 3348b51a4376SLisandro Dalcin 3349b51a4376SLisandro Dalcin ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3350b51a4376SLisandro Dalcin ierr = PetscHSetICreate(&ht);CHKERRQ(ierr); 3351b51a4376SLisandro Dalcin ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr); 3352618cc2edSLisandro Dalcin ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr); 3353b51a4376SLisandro Dalcin for (i=0; i<mbs; i++) { 3354b51a4376SLisandro Dalcin ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3355b51a4376SLisandro Dalcin ierr = PetscHSetIClear(ht);CHKERRQ(ierr); 3356618cc2edSLisandro Dalcin for (k=0; k<bs; k++) { 3357618cc2edSLisandro Dalcin PetscInt row = bs*i + k; 3358618cc2edSLisandro Dalcin for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3359618cc2edSLisandro Dalcin PetscInt col = colidxs[j]; 3360618cc2edSLisandro Dalcin if (!sbaij || col >= row) { 3361618cc2edSLisandro Dalcin if (col >= cs && col < ce) { 3362618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3363b51a4376SLisandro Dalcin } else { 3364618cc2edSLisandro Dalcin ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr); 3365b51a4376SLisandro Dalcin if (done) o_nnz[i]++; 3366b51a4376SLisandro Dalcin } 3367b51a4376SLisandro Dalcin } 3368618cc2edSLisandro Dalcin } 3369618cc2edSLisandro Dalcin } 3370618cc2edSLisandro Dalcin } 3371b51a4376SLisandro Dalcin ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3372b51a4376SLisandro Dalcin ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr); 3373b51a4376SLisandro Dalcin ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3374618cc2edSLisandro Dalcin ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3375b51a4376SLisandro Dalcin ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3376b51a4376SLisandro Dalcin } 3377b51a4376SLisandro Dalcin 3378b51a4376SLisandro Dalcin /* store matrix values */ 3379b51a4376SLisandro Dalcin for (i=0; i<m; i++) { 3380b51a4376SLisandro Dalcin PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3381618cc2edSLisandro Dalcin ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3382b51a4376SLisandro Dalcin } 3383b51a4376SLisandro Dalcin 3384b51a4376SLisandro Dalcin ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3385b51a4376SLisandro Dalcin ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3386b51a4376SLisandro Dalcin ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3387b51a4376SLisandro Dalcin ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3388b51a4376SLisandro Dalcin PetscFunctionReturn(0); 3389b51a4376SLisandro Dalcin } 3390b51a4376SLisandro Dalcin 3391b51a4376SLisandro Dalcin PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 33924683f7a4SShri Abhyankar { 33934683f7a4SShri Abhyankar PetscErrorCode ierr; 33947f489da9SVaclav Hapla PetscBool isbinary; 33954683f7a4SShri Abhyankar 33964683f7a4SShri Abhyankar PetscFunctionBegin; 33977f489da9SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 3398b51a4376SLisandro Dalcin if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3399b51a4376SLisandro Dalcin ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 34004683f7a4SShri Abhyankar PetscFunctionReturn(0); 34014683f7a4SShri Abhyankar } 34024683f7a4SShri Abhyankar 3403133cdb44SSatish Balay /*@ 3404133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3405133cdb44SSatish Balay 3406133cdb44SSatish Balay Input Parameters: 3407a2b725a8SWilliam Gropp + mat - the matrix 3408a2b725a8SWilliam Gropp - fact - factor 3409133cdb44SSatish Balay 3410c5eb9154SBarry Smith Not Collective, each process can use a different factor 3411fee21e36SBarry Smith 34128c890885SBarry Smith Level: advanced 34138c890885SBarry Smith 3414133cdb44SSatish Balay Notes: 34158c07d4e3SBarry Smith This can also be set by the command line option: -mat_use_hash_table <fact> 3416133cdb44SSatish Balay 3417133cdb44SSatish Balay .seealso: MatSetOption() 3418133cdb44SSatish Balay @*/ 34197087cfbeSBarry Smith PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3420133cdb44SSatish Balay { 34214ac538c5SBarry Smith PetscErrorCode ierr; 34225bf65638SKris Buschelman 34235bf65638SKris Buschelman PetscFunctionBegin; 34244ac538c5SBarry Smith ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 34255bf65638SKris Buschelman PetscFunctionReturn(0); 34265bf65638SKris Buschelman } 34275bf65638SKris Buschelman 34287087cfbeSBarry Smith PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 34295bf65638SKris Buschelman { 343025fdafccSSatish Balay Mat_MPIBAIJ *baij; 3431133cdb44SSatish Balay 3432133cdb44SSatish Balay PetscFunctionBegin; 3433133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 3434133cdb44SSatish Balay baij->ht_fact = fact; 3435133cdb44SSatish Balay PetscFunctionReturn(0); 3436133cdb44SSatish Balay } 3437f2a5309cSSatish Balay 34389230625dSJed Brown PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3439f2a5309cSSatish Balay { 3440f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3441ab4d48faSStefano Zampini PetscBool flg; 3442ab4d48faSStefano Zampini PetscErrorCode ierr; 34435fd66863SKarl Rupp 3444f2a5309cSSatish Balay PetscFunctionBegin; 3445ab4d48faSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr); 3446ab4d48faSStefano Zampini if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 344721e72a00SBarry Smith if (Ad) *Ad = a->A; 344821e72a00SBarry Smith if (Ao) *Ao = a->B; 344921e72a00SBarry Smith if (colmap) *colmap = a->garray; 3450f2a5309cSSatish Balay PetscFunctionReturn(0); 3451f2a5309cSSatish Balay } 345285535b8eSBarry Smith 345385535b8eSBarry Smith /* 345485535b8eSBarry Smith Special version for direct calls from Fortran (to eliminate two function call overheads 345585535b8eSBarry Smith */ 345685535b8eSBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 345785535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 345885535b8eSBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 345985535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 346085535b8eSBarry Smith #endif 346185535b8eSBarry Smith 346285535b8eSBarry Smith /*@C 346385535b8eSBarry Smith MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 346485535b8eSBarry Smith 346585535b8eSBarry Smith Collective on Mat 346685535b8eSBarry Smith 346785535b8eSBarry Smith Input Parameters: 346885535b8eSBarry Smith + mat - the matrix 346985535b8eSBarry Smith . min - number of input rows 347085535b8eSBarry Smith . im - input rows 347185535b8eSBarry Smith . nin - number of input columns 347285535b8eSBarry Smith . in - input columns 347385535b8eSBarry Smith . v - numerical values input 347485535b8eSBarry Smith - addvin - INSERT_VALUES or ADD_VALUES 347585535b8eSBarry Smith 347695452b02SPatrick Sanan Notes: 347795452b02SPatrick Sanan This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 347885535b8eSBarry Smith 347985535b8eSBarry Smith Level: advanced 348085535b8eSBarry Smith 348185535b8eSBarry Smith .seealso: MatSetValuesBlocked() 348285535b8eSBarry Smith @*/ 348385535b8eSBarry Smith PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 348485535b8eSBarry Smith { 348585535b8eSBarry Smith /* convert input arguments to C version */ 348685535b8eSBarry Smith Mat mat = *matin; 348785535b8eSBarry Smith PetscInt m = *min, n = *nin; 348885535b8eSBarry Smith InsertMode addv = *addvin; 348985535b8eSBarry Smith 349085535b8eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 349185535b8eSBarry Smith const MatScalar *value; 349285535b8eSBarry Smith MatScalar *barray = baij->barray; 3493ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 349485535b8eSBarry Smith PetscErrorCode ierr; 349585535b8eSBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 349685535b8eSBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3497d0f46423SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 349885535b8eSBarry Smith 349985535b8eSBarry Smith PetscFunctionBegin; 350085535b8eSBarry Smith /* tasks normally handled by MatSetValuesBlocked() */ 350126fbe8dcSKarl Rupp if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 350276bd3646SJed Brown else if (PetscUnlikely(mat->insertmode != addv)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 350376bd3646SJed Brown if (PetscUnlikely(mat->factortype)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 350485535b8eSBarry Smith if (mat->assembled) { 350585535b8eSBarry Smith mat->was_assembled = PETSC_TRUE; 350685535b8eSBarry Smith mat->assembled = PETSC_FALSE; 350785535b8eSBarry Smith } 350885535b8eSBarry Smith ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 350985535b8eSBarry Smith 351085535b8eSBarry Smith 351185535b8eSBarry Smith if (!barray) { 3512785e854fSJed Brown ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 351385535b8eSBarry Smith baij->barray = barray; 351485535b8eSBarry Smith } 351585535b8eSBarry Smith 351626fbe8dcSKarl Rupp if (roworiented) stepval = (n-1)*bs; 351726fbe8dcSKarl Rupp else stepval = (m-1)*bs; 351826fbe8dcSKarl Rupp 351985535b8eSBarry Smith for (i=0; i<m; i++) { 352085535b8eSBarry Smith if (im[i] < 0) continue; 3521cf9c20a2SJed Brown if (PetscUnlikelyDebug(im[i] >= baij->Mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 352285535b8eSBarry Smith if (im[i] >= rstart && im[i] < rend) { 352385535b8eSBarry Smith row = im[i] - rstart; 352485535b8eSBarry Smith for (j=0; j<n; j++) { 352585535b8eSBarry Smith /* If NumCol = 1 then a copy is not required */ 352685535b8eSBarry Smith if ((roworiented) && (n == 1)) { 352785535b8eSBarry Smith barray = (MatScalar*)v + i*bs2; 352885535b8eSBarry Smith } else if ((!roworiented) && (m == 1)) { 352985535b8eSBarry Smith barray = (MatScalar*)v + j*bs2; 353085535b8eSBarry Smith } else { /* Here a copy is required */ 353185535b8eSBarry Smith if (roworiented) { 353285535b8eSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 353385535b8eSBarry Smith } else { 353485535b8eSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 353585535b8eSBarry Smith } 353685535b8eSBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 353785535b8eSBarry Smith for (jj=0; jj<bs; jj++) { 353885535b8eSBarry Smith *barray++ = *value++; 353985535b8eSBarry Smith } 354085535b8eSBarry Smith } 354185535b8eSBarry Smith barray -=bs2; 354285535b8eSBarry Smith } 354385535b8eSBarry Smith 354485535b8eSBarry Smith if (in[j] >= cstart && in[j] < cend) { 354585535b8eSBarry Smith col = in[j] - cstart; 35468ab52850SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 354726fbe8dcSKarl Rupp } else if (in[j] < 0) continue; 3548cf9c20a2SJed Brown else if (PetscUnlikelyDebug(in[j] >= baij->Nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 354985535b8eSBarry Smith else { 355085535b8eSBarry Smith if (mat->was_assembled) { 355185535b8eSBarry Smith if (!baij->colmap) { 3552ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 355385535b8eSBarry Smith } 355485535b8eSBarry Smith 355585535b8eSBarry Smith #if defined(PETSC_USE_DEBUG) 355685535b8eSBarry Smith #if defined(PETSC_USE_CTABLE) 355785535b8eSBarry Smith { PetscInt data; 355885535b8eSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3559e32f2f54SBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 356085535b8eSBarry Smith } 356185535b8eSBarry Smith #else 3562e32f2f54SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 356385535b8eSBarry Smith #endif 356485535b8eSBarry Smith #endif 356585535b8eSBarry Smith #if defined(PETSC_USE_CTABLE) 356685535b8eSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 356785535b8eSBarry Smith col = (col - 1)/bs; 356885535b8eSBarry Smith #else 356985535b8eSBarry Smith col = (baij->colmap[in[j]] - 1)/bs; 357085535b8eSBarry Smith #endif 357185535b8eSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3572ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 357385535b8eSBarry Smith col = in[j]; 357485535b8eSBarry Smith } 357526fbe8dcSKarl Rupp } else col = in[j]; 35768ab52850SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 357785535b8eSBarry Smith } 357885535b8eSBarry Smith } 357985535b8eSBarry Smith } else { 358085535b8eSBarry Smith if (!baij->donotstash) { 358185535b8eSBarry Smith if (roworiented) { 358285535b8eSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 358385535b8eSBarry Smith } else { 358485535b8eSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 358585535b8eSBarry Smith } 358685535b8eSBarry Smith } 358785535b8eSBarry Smith } 358885535b8eSBarry Smith } 358985535b8eSBarry Smith 359085535b8eSBarry Smith /* task normally handled by MatSetValuesBlocked() */ 359185535b8eSBarry Smith ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 359285535b8eSBarry Smith PetscFunctionReturn(0); 359385535b8eSBarry Smith } 3594dfb205c3SBarry Smith 3595dfb205c3SBarry Smith /*@ 3596483a2f95SBarry Smith MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3597dfb205c3SBarry Smith CSR format the local rows. 3598dfb205c3SBarry Smith 3599d083f849SBarry Smith Collective 3600dfb205c3SBarry Smith 3601dfb205c3SBarry Smith Input Parameters: 3602dfb205c3SBarry Smith + comm - MPI communicator 3603dfb205c3SBarry Smith . bs - the block size, only a block size of 1 is supported 3604dfb205c3SBarry Smith . m - number of local rows (Cannot be PETSC_DECIDE) 3605dfb205c3SBarry Smith . n - This value should be the same as the local size used in creating the 3606dfb205c3SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3607dfb205c3SBarry Smith calculated if N is given) For square matrices n is almost always m. 3608dfb205c3SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3609dfb205c3SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3610483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3611dfb205c3SBarry Smith . j - column indices 3612dfb205c3SBarry Smith - a - matrix values 3613dfb205c3SBarry Smith 3614dfb205c3SBarry Smith Output Parameter: 3615dfb205c3SBarry Smith . mat - the matrix 3616dfb205c3SBarry Smith 3617dfb205c3SBarry Smith Level: intermediate 3618dfb205c3SBarry Smith 3619dfb205c3SBarry Smith Notes: 3620dfb205c3SBarry Smith The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3621dfb205c3SBarry Smith thus you CANNOT change the matrix entries by changing the values of a[] after you have 3622dfb205c3SBarry Smith called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3623dfb205c3SBarry Smith 36243adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 36253adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 36263adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 36273adadaf3SJed Brown with column-major ordering within blocks. 36283adadaf3SJed Brown 3629dfb205c3SBarry Smith The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3630dfb205c3SBarry Smith 3631dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 363269b1f4b7SBarry Smith MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3633dfb205c3SBarry Smith @*/ 36347087cfbeSBarry Smith PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3635dfb205c3SBarry Smith { 3636dfb205c3SBarry Smith PetscErrorCode ierr; 3637dfb205c3SBarry Smith 3638dfb205c3SBarry Smith PetscFunctionBegin; 3639f23aa3ddSBarry Smith if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3640dfb205c3SBarry Smith if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3641dfb205c3SBarry Smith ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3642dfb205c3SBarry Smith ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 36439a43d2d5SJed Brown ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 364427f91139SJed Brown ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 364527f91139SJed Brown ierr = MatSetUp(*mat);CHKERRQ(ierr); 3646d47bf9aaSJed Brown ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3647dfb205c3SBarry Smith ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3648d47bf9aaSJed Brown ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3649dfb205c3SBarry Smith PetscFunctionReturn(0); 3650dfb205c3SBarry Smith } 3651e561ad89SHong Zhang 3652bd153df0SHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3653e561ad89SHong Zhang { 3654e561ad89SHong Zhang PetscErrorCode ierr; 3655bd153df0SHong Zhang PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3656bd153df0SHong Zhang PetscInt *indx; 3657bd153df0SHong Zhang PetscScalar *values; 3658e561ad89SHong Zhang 3659e561ad89SHong Zhang PetscFunctionBegin; 3660e561ad89SHong Zhang ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3661bd153df0SHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3662bd153df0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 36632c6ba4edSHong Zhang PetscInt *dnz,*onz,mbs,Nbs,nbs; 3664bd153df0SHong Zhang PetscInt *bindx,rmax=a->rmax,j; 366577f764caSHong Zhang PetscMPIInt rank,size; 3666e561ad89SHong Zhang 3667bd153df0SHong Zhang ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3668bd153df0SHong Zhang mbs = m/bs; Nbs = N/cbs; 3669bd153df0SHong Zhang if (n == PETSC_DECIDE) { 3670da91a574SPierre Jolivet ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N); 3671bd153df0SHong Zhang } 3672da91a574SPierre Jolivet nbs = n/cbs; 3673e561ad89SHong Zhang 3674647a6520SHong Zhang ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 367577f764caSHong Zhang ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 367677f764caSHong Zhang 3677ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3678ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr); 367977f764caSHong Zhang if (rank == size-1) { 368077f764caSHong Zhang /* Check sum(nbs) = Nbs */ 36812c6ba4edSHong Zhang if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs); 368277f764caSHong Zhang } 368377f764caSHong Zhang 368477f764caSHong Zhang rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3685bd153df0SHong Zhang for (i=0; i<mbs; i++) { 3686647a6520SHong Zhang ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3687647a6520SHong Zhang nnz = nnz/bs; 3688647a6520SHong Zhang for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3689647a6520SHong Zhang ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3690647a6520SHong Zhang ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3691e561ad89SHong Zhang } 3692647a6520SHong Zhang ierr = PetscFree(bindx);CHKERRQ(ierr); 3693e561ad89SHong Zhang 3694e561ad89SHong Zhang ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 369577f764caSHong Zhang ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3696e561ad89SHong Zhang ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 36978761c3d6SHong Zhang ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 36988761c3d6SHong Zhang ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3699e561ad89SHong Zhang ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3700e561ad89SHong Zhang ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3701f2e2784eSPierre Jolivet ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3702e561ad89SHong Zhang } 3703e561ad89SHong Zhang 3704bd153df0SHong Zhang /* numeric phase */ 3705647a6520SHong Zhang ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3706bd153df0SHong Zhang ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3707e561ad89SHong Zhang 3708e561ad89SHong Zhang for (i=0; i<m; i++) { 3709e561ad89SHong Zhang ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3710e561ad89SHong Zhang Ii = i + rstart; 3711bd153df0SHong Zhang ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3712e561ad89SHong Zhang ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3713e561ad89SHong Zhang } 3714bd153df0SHong Zhang ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3715bd153df0SHong Zhang ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3716e561ad89SHong Zhang PetscFunctionReturn(0); 3717e561ad89SHong Zhang } 3718