1c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2c5d9258eSSatish Balay 3b51a4376SLisandro Dalcin #include <petsc/private/hashseti.h> 4c6db04a5SJed Brown #include <petscblaslapack.h> 565a92638SMatthew G. Knepley #include <petscsf.h> 679bdfe76SSatish Balay 77ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 87ea3e4caSstefano_zampini PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 97ea3e4caSstefano_zampini #endif 107ea3e4caSstefano_zampini 11985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 127843d17aSBarry Smith { 137843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 14dfbe8321SBarry Smith PetscErrorCode ierr; 154e879edeSHong Zhang PetscInt i,*idxb = NULL,m = A->rmap->n,bs = A->cmap->bs; 164e879edeSHong Zhang PetscScalar *va,*vv; 174e879edeSHong Zhang Vec vB,vA; 184e879edeSHong Zhang const PetscScalar *vb; 197843d17aSBarry Smith 207843d17aSBarry Smith PetscFunctionBegin; 214e879edeSHong Zhang ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr); 224e879edeSHong Zhang ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr); 234e879edeSHong Zhang 244e879edeSHong Zhang ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr); 25985db425SBarry Smith if (idx) { 264e879edeSHong Zhang for (i=0; i<m; i++) { 2726fbe8dcSKarl Rupp if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2826fbe8dcSKarl Rupp } 29985db425SBarry Smith } 307843d17aSBarry Smith 314e879edeSHong Zhang ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr); 3243359b5eSHong Zhang ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr); 334e879edeSHong Zhang ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr); 347843d17aSBarry Smith 354e879edeSHong Zhang ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr); 364e879edeSHong Zhang ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr); 374e879edeSHong Zhang for (i=0; i<m; i++) { 3826fbe8dcSKarl Rupp if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 394e879edeSHong Zhang vv[i] = vb[i]; 404e879edeSHong Zhang if (idx) idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 414e879edeSHong Zhang } else { 424e879edeSHong Zhang vv[i] = va[i]; 4343359b5eSHong Zhang if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs*a->garray[idxb[i]/bs] + (idxb[i] % bs)) 444e879edeSHong Zhang idx[i] = bs*a->garray[idxb[i]/bs] + (idxb[i] % bs); 4526fbe8dcSKarl Rupp } 467843d17aSBarry Smith } 474e879edeSHong Zhang ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr); 484e879edeSHong Zhang ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr); 494e879edeSHong Zhang ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr); 50c31cb41cSBarry Smith ierr = PetscFree(idxb);CHKERRQ(ierr); 514e879edeSHong Zhang ierr = VecDestroy(&vA);CHKERRQ(ierr); 524e879edeSHong Zhang ierr = VecDestroy(&vB);CHKERRQ(ierr); 537843d17aSBarry Smith PetscFunctionReturn(0); 547843d17aSBarry Smith } 557843d17aSBarry Smith 567087cfbeSBarry Smith PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 577fc3c18eSBarry Smith { 587fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 59dfbe8321SBarry Smith PetscErrorCode ierr; 607fc3c18eSBarry Smith 617fc3c18eSBarry Smith PetscFunctionBegin; 627fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 637fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 647fc3c18eSBarry Smith PetscFunctionReturn(0); 657fc3c18eSBarry Smith } 667fc3c18eSBarry Smith 677087cfbeSBarry Smith PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 687fc3c18eSBarry Smith { 697fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 70dfbe8321SBarry Smith PetscErrorCode ierr; 717fc3c18eSBarry Smith 727fc3c18eSBarry Smith PetscFunctionBegin; 737fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 747fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 757fc3c18eSBarry Smith PetscFunctionReturn(0); 767fc3c18eSBarry Smith } 777fc3c18eSBarry Smith 78537820f0SBarry Smith /* 79537820f0SBarry Smith Local utility routine that creates a mapping from the global column 8057b952d6SSatish Balay number to the local number in the off-diagonal part of the local 81e06f6af7SJed Brown storage of the matrix. This is done in a non scalable way since the 8257b952d6SSatish Balay length of colmap equals the global matrix length. 8357b952d6SSatish Balay */ 84ab9863d7SBarry Smith PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 8557b952d6SSatish Balay { 8657b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 8757b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 886849ba73SBarry Smith PetscErrorCode ierr; 89d0f46423SBarry Smith PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 9057b952d6SSatish Balay 91d64ed03dSBarry Smith PetscFunctionBegin; 92aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 93e23dfa41SBarry Smith ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 9448e59246SSatish Balay for (i=0; i<nbs; i++) { 953861aac3SJed Brown ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 9648e59246SSatish Balay } 9748e59246SSatish Balay #else 98580bdb30SBarry Smith ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 993bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 100928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 10148e59246SSatish Balay #endif 1023a40ed3dSBarry Smith PetscFunctionReturn(0); 10357b952d6SSatish Balay } 10457b952d6SSatish Balay 105d40312a9SBarry Smith #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol) \ 10680c1aa95SSatish Balay { \ 10780c1aa95SSatish Balay brow = row/bs; \ 10880c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 109ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 11080c1aa95SSatish Balay bcol = col/bs; \ 11180c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 112ab26458aSBarry Smith low = 0; high = nrow; \ 113ab26458aSBarry Smith while (high-low > 3) { \ 114ab26458aSBarry Smith t = (low+high)/2; \ 115ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 116ab26458aSBarry Smith else low = t; \ 117ab26458aSBarry Smith } \ 118ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 11980c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 12080c1aa95SSatish Balay if (rp[_i] == bcol) { \ 12180c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 122eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 123eada6651SSatish Balay else *bap = value; \ 124ac7a638eSSatish Balay goto a_noinsert; \ 12580c1aa95SSatish Balay } \ 12680c1aa95SSatish Balay } \ 12789280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 1289ace16cdSJacob Faibussowitsch PetscAssertFalse(a->nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 129fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 13080c1aa95SSatish Balay N = nrow++ - 1; \ 13180c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 132580bdb30SBarry Smith ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 133580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \ 134580bdb30SBarry Smith ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 13580c1aa95SSatish Balay rp[_i] = bcol; \ 13680c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 137ac7a638eSSatish Balay a_noinsert:; \ 13880c1aa95SSatish Balay ailen[brow] = nrow; \ 13980c1aa95SSatish Balay } 14057b952d6SSatish Balay 141d40312a9SBarry Smith #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol) \ 142ac7a638eSSatish Balay { \ 143ac7a638eSSatish Balay brow = row/bs; \ 144ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 145ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 146ac7a638eSSatish Balay bcol = col/bs; \ 147ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 148ac7a638eSSatish Balay low = 0; high = nrow; \ 149ac7a638eSSatish Balay while (high-low > 3) { \ 150ac7a638eSSatish Balay t = (low+high)/2; \ 151ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 152ac7a638eSSatish Balay else low = t; \ 153ac7a638eSSatish Balay } \ 154ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 155ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 156ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 157ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 158ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 159ac7a638eSSatish Balay else *bap = value; \ 160ac7a638eSSatish Balay goto b_noinsert; \ 161ac7a638eSSatish Balay } \ 162ac7a638eSSatish Balay } \ 16389280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 1649ace16cdSJacob Faibussowitsch PetscAssertFalse(b->nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 165fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 166ac7a638eSSatish Balay N = nrow++ - 1; \ 167ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 168580bdb30SBarry Smith ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\ 169580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\ 170580bdb30SBarry Smith ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr); \ 171ac7a638eSSatish Balay rp[_i] = bcol; \ 172ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 173ac7a638eSSatish Balay b_noinsert:; \ 174ac7a638eSSatish Balay bilen[brow] = nrow; \ 175ac7a638eSSatish Balay } 176ac7a638eSSatish Balay 177b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 17857b952d6SSatish Balay { 17957b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 18093fea6afSBarry Smith MatScalar value; 181ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 182dfbe8321SBarry Smith PetscErrorCode ierr; 183b24ad042SBarry Smith PetscInt i,j,row,col; 184d0f46423SBarry Smith PetscInt rstart_orig=mat->rmap->rstart; 185d0f46423SBarry Smith PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 186d0f46423SBarry Smith PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 18757b952d6SSatish Balay 188eada6651SSatish Balay /* Some Variables required in the macro */ 18980c1aa95SSatish Balay Mat A = baij->A; 19080c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 191b24ad042SBarry Smith PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 1923eda8832SBarry Smith MatScalar *aa =a->a; 193ac7a638eSSatish Balay 194ac7a638eSSatish Balay Mat B = baij->B; 195ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 196b24ad042SBarry Smith PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 1973eda8832SBarry Smith MatScalar *ba =b->a; 198ac7a638eSSatish Balay 199b24ad042SBarry Smith PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 200b24ad042SBarry Smith PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 2013eda8832SBarry Smith MatScalar *ap,*bap; 20280c1aa95SSatish Balay 203d64ed03dSBarry Smith PetscFunctionBegin; 20457b952d6SSatish Balay for (i=0; i<m; i++) { 2055ef9f2a5SBarry Smith if (im[i] < 0) continue; 2069ace16cdSJacob Faibussowitsch PetscAssertFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 20757b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 20857b952d6SSatish Balay row = im[i] - rstart_orig; 20957b952d6SSatish Balay for (j=0; j<n; j++) { 21057b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig) { 21157b952d6SSatish Balay col = in[j] - cstart_orig; 212db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 213db4deed7SKarl Rupp else value = v[i+j*m]; 214d40312a9SBarry Smith MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]); 21573959e64SBarry Smith } else if (in[j] < 0) continue; 2169ace16cdSJacob Faibussowitsch else PetscAssertFalse(in[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],mat->cmap->N-1); 2179245e749SBarry Smith else { 21857b952d6SSatish Balay if (mat->was_assembled) { 219905e6a2fSBarry Smith if (!baij->colmap) { 220ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 221905e6a2fSBarry Smith } 222aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 2230f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 224bba1ac68SSatish Balay col = col - 1; 22548e59246SSatish Balay #else 226bba1ac68SSatish Balay col = baij->colmap[in[j]/bs] - 1; 22748e59246SSatish Balay #endif 228c9ef50b2SBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 229ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 2308295de27SSatish Balay col = in[j]; 2319bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2329bf004c3SSatish Balay B = baij->B; 2339bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 2349bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2359bf004c3SSatish Balay ba =b->a; 2369ace16cdSJacob Faibussowitsch } else PetscAssertFalse(col < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 237c9ef50b2SBarry Smith else col += in[j]%bs; 2388295de27SSatish Balay } else col = in[j]; 239db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 240db4deed7SKarl Rupp else value = v[i+j*m]; 241d40312a9SBarry Smith MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]); 24290da58bdSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 24357b952d6SSatish Balay } 24457b952d6SSatish Balay } 245d64ed03dSBarry Smith } else { 2469ace16cdSJacob Faibussowitsch PetscAssertFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 24790f02eecSBarry Smith if (!baij->donotstash) { 2485080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 249ff2fd236SBarry Smith if (roworiented) { 250b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 251ff2fd236SBarry Smith } else { 252b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 25357b952d6SSatish Balay } 25457b952d6SSatish Balay } 25557b952d6SSatish Balay } 25690f02eecSBarry Smith } 2573a40ed3dSBarry Smith PetscFunctionReturn(0); 25857b952d6SSatish Balay } 25957b952d6SSatish Balay 260*9fbee547SJacob Faibussowitsch static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol) 261880c6e6aSBarry Smith { 262880c6e6aSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2638ab52850SBarry Smith PetscInt *rp,low,high,t,ii,jj,nrow,i,rmax,N; 264880c6e6aSBarry Smith PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 265880c6e6aSBarry Smith PetscErrorCode ierr; 2668ab52850SBarry Smith PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs; 267880c6e6aSBarry Smith PetscBool roworiented=a->roworiented; 268880c6e6aSBarry Smith const PetscScalar *value = v; 269880c6e6aSBarry Smith MatScalar *ap,*aa = a->a,*bap; 270880c6e6aSBarry Smith 271880c6e6aSBarry Smith PetscFunctionBegin; 272880c6e6aSBarry Smith rp = aj + ai[row]; 273880c6e6aSBarry Smith ap = aa + bs2*ai[row]; 274880c6e6aSBarry Smith rmax = imax[row]; 275880c6e6aSBarry Smith nrow = ailen[row]; 2768ab52850SBarry Smith value = v; 2778ab52850SBarry Smith low = 0; 2788ab52850SBarry Smith high = nrow; 279880c6e6aSBarry Smith while (high-low > 7) { 280880c6e6aSBarry Smith t = (low+high)/2; 281880c6e6aSBarry Smith if (rp[t] > col) high = t; 282880c6e6aSBarry Smith else low = t; 283880c6e6aSBarry Smith } 284880c6e6aSBarry Smith for (i=low; i<high; i++) { 285880c6e6aSBarry Smith if (rp[i] > col) break; 286880c6e6aSBarry Smith if (rp[i] == col) { 287880c6e6aSBarry Smith bap = ap + bs2*i; 288880c6e6aSBarry Smith if (roworiented) { 289880c6e6aSBarry Smith if (is == ADD_VALUES) { 2908ab52850SBarry Smith for (ii=0; ii<bs; ii++) { 291880c6e6aSBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 292880c6e6aSBarry Smith bap[jj] += *value++; 293880c6e6aSBarry Smith } 294880c6e6aSBarry Smith } 295880c6e6aSBarry Smith } else { 2968ab52850SBarry Smith for (ii=0; ii<bs; ii++) { 297880c6e6aSBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 298880c6e6aSBarry Smith bap[jj] = *value++; 299880c6e6aSBarry Smith } 300880c6e6aSBarry Smith } 301880c6e6aSBarry Smith } 302880c6e6aSBarry Smith } else { 303880c6e6aSBarry Smith if (is == ADD_VALUES) { 3048ab52850SBarry Smith for (ii=0; ii<bs; ii++,value+=bs) { 305880c6e6aSBarry Smith for (jj=0; jj<bs; jj++) { 306880c6e6aSBarry Smith bap[jj] += value[jj]; 307880c6e6aSBarry Smith } 308880c6e6aSBarry Smith bap += bs; 309880c6e6aSBarry Smith } 310880c6e6aSBarry Smith } else { 3118ab52850SBarry Smith for (ii=0; ii<bs; ii++,value+=bs) { 312880c6e6aSBarry Smith for (jj=0; jj<bs; jj++) { 313880c6e6aSBarry Smith bap[jj] = value[jj]; 314880c6e6aSBarry Smith } 315880c6e6aSBarry Smith bap += bs; 316880c6e6aSBarry Smith } 317880c6e6aSBarry Smith } 318880c6e6aSBarry Smith } 319880c6e6aSBarry Smith goto noinsert2; 320880c6e6aSBarry Smith } 321880c6e6aSBarry Smith } 322880c6e6aSBarry Smith if (nonew == 1) goto noinsert2; 3239ace16cdSJacob Faibussowitsch PetscAssertFalse(nonew == -1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 324880c6e6aSBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 325880c6e6aSBarry Smith N = nrow++ - 1; high++; 326880c6e6aSBarry Smith /* shift up all the later entries in this row */ 327580bdb30SBarry Smith ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr); 328580bdb30SBarry Smith ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr); 329880c6e6aSBarry Smith rp[i] = col; 330880c6e6aSBarry Smith bap = ap + bs2*i; 331880c6e6aSBarry Smith if (roworiented) { 3328ab52850SBarry Smith for (ii=0; ii<bs; ii++) { 333880c6e6aSBarry Smith for (jj=ii; jj<bs2; jj+=bs) { 334880c6e6aSBarry Smith bap[jj] = *value++; 335880c6e6aSBarry Smith } 336880c6e6aSBarry Smith } 337880c6e6aSBarry Smith } else { 3388ab52850SBarry Smith for (ii=0; ii<bs; ii++) { 339880c6e6aSBarry Smith for (jj=0; jj<bs; jj++) { 340880c6e6aSBarry Smith *bap++ = *value++; 341880c6e6aSBarry Smith } 342880c6e6aSBarry Smith } 343880c6e6aSBarry Smith } 344880c6e6aSBarry Smith noinsert2:; 345880c6e6aSBarry Smith ailen[row] = nrow; 346880c6e6aSBarry Smith PetscFunctionReturn(0); 347880c6e6aSBarry Smith } 348880c6e6aSBarry Smith 3498ab52850SBarry Smith /* 3508ab52850SBarry Smith This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 3518ab52850SBarry Smith by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 3528ab52850SBarry Smith */ 35397e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 354ab26458aSBarry Smith { 355ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 356dd6ea824SBarry Smith const PetscScalar *value; 357f15d580aSBarry Smith MatScalar *barray = baij->barray; 358ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 359dfbe8321SBarry Smith PetscErrorCode ierr; 360899cda47SBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 361899cda47SBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 362d0f46423SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 363ab26458aSBarry Smith 364b16ae2b1SBarry Smith PetscFunctionBegin; 36530793edcSSatish Balay if (!barray) { 366785e854fSJed Brown ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 36782502324SSatish Balay baij->barray = barray; 36830793edcSSatish Balay } 36930793edcSSatish Balay 37026fbe8dcSKarl Rupp if (roworiented) stepval = (n-1)*bs; 37126fbe8dcSKarl Rupp else stepval = (m-1)*bs; 37226fbe8dcSKarl Rupp 373ab26458aSBarry Smith for (i=0; i<m; i++) { 3745ef9f2a5SBarry Smith if (im[i] < 0) continue; 3759ace16cdSJacob Faibussowitsch PetscAssertFalseDebug(im[i] >= baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 376ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 377ab26458aSBarry Smith row = im[i] - rstart; 378ab26458aSBarry Smith for (j=0; j<n; j++) { 37915b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 38015b57d14SSatish Balay if ((roworiented) && (n == 1)) { 381f15d580aSBarry Smith barray = (MatScalar*)v + i*bs2; 38215b57d14SSatish Balay } else if ((!roworiented) && (m == 1)) { 383f15d580aSBarry Smith barray = (MatScalar*)v + j*bs2; 38415b57d14SSatish Balay } else { /* Here a copy is required */ 385ab26458aSBarry Smith if (roworiented) { 38653ef36baSBarry Smith value = v + (i*(stepval+bs) + j)*bs; 387ab26458aSBarry Smith } else { 38853ef36baSBarry Smith value = v + (j*(stepval+bs) + i)*bs; 389abef11f7SSatish Balay } 39053ef36baSBarry Smith for (ii=0; ii<bs; ii++,value+=bs+stepval) { 39126fbe8dcSKarl Rupp for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 39253ef36baSBarry Smith barray += bs; 39347513183SBarry Smith } 39430793edcSSatish Balay barray -= bs2; 39515b57d14SSatish Balay } 396abef11f7SSatish Balay 397abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend) { 398abef11f7SSatish Balay col = in[j] - cstart; 3998ab52850SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 40026fbe8dcSKarl Rupp } else if (in[j] < 0) continue; 4019ace16cdSJacob Faibussowitsch else PetscAssertFalseDebug(in[j] >= baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 4029245e749SBarry Smith else { 403ab26458aSBarry Smith if (mat->was_assembled) { 404ab26458aSBarry Smith if (!baij->colmap) { 405ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 406ab26458aSBarry Smith } 407a5eb4965SSatish Balay 4082515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 409aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 410b24ad042SBarry Smith { PetscInt data; 4110f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 4129ace16cdSJacob Faibussowitsch PetscAssertFalse((data - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 413fa46199cSSatish Balay } 41448e59246SSatish Balay #else 4159ace16cdSJacob Faibussowitsch PetscAssertFalse((baij->colmap[in[j]] - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 416a5eb4965SSatish Balay #endif 41748e59246SSatish Balay #endif 418aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 4190f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 420fa46199cSSatish Balay col = (col - 1)/bs; 42148e59246SSatish Balay #else 422a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 42348e59246SSatish Balay #endif 4240e9bae81SBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 425ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 426ab26458aSBarry Smith col = in[j]; 4279ace16cdSJacob Faibussowitsch } else PetscAssertFalse(col < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix",im[i],in[j]); 428db4deed7SKarl Rupp } else col = in[j]; 4298ab52850SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 430ab26458aSBarry Smith } 431ab26458aSBarry Smith } 432d64ed03dSBarry Smith } else { 4339ace16cdSJacob Faibussowitsch PetscAssertFalse(mat->nooffprocentries,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 434ab26458aSBarry Smith if (!baij->donotstash) { 435ff2fd236SBarry Smith if (roworiented) { 4366fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 437ff2fd236SBarry Smith } else { 4386fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 439ff2fd236SBarry Smith } 440abef11f7SSatish Balay } 441ab26458aSBarry Smith } 442ab26458aSBarry Smith } 4433a40ed3dSBarry Smith PetscFunctionReturn(0); 444ab26458aSBarry Smith } 4456fa18ffdSBarry Smith 4460bdbc534SSatish Balay #define HASH_KEY 0.6180339887 447b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 448b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 449b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 45097e5c40aSBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 4510bdbc534SSatish Balay { 4520bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 453ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 454dfbe8321SBarry Smith PetscErrorCode ierr; 455b24ad042SBarry Smith PetscInt i,j,row,col; 456d0f46423SBarry Smith PetscInt rstart_orig=mat->rmap->rstart; 457d0f46423SBarry Smith PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 458d0f46423SBarry Smith PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 459329f5518SBarry Smith PetscReal tmp; 4603eda8832SBarry Smith MatScalar **HD = baij->hd,value; 461b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4620bdbc534SSatish Balay 4630bdbc534SSatish Balay PetscFunctionBegin; 4640bdbc534SSatish Balay for (i=0; i<m; i++) { 46576bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 4669ace16cdSJacob Faibussowitsch PetscAssertFalse(im[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 4679ace16cdSJacob Faibussowitsch PetscAssertFalse(im[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],mat->rmap->N-1); 46876bd3646SJed Brown } 4690bdbc534SSatish Balay row = im[i]; 470c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 4710bdbc534SSatish Balay for (j=0; j<n; j++) { 4720bdbc534SSatish Balay col = in[j]; 473db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 474db4deed7SKarl Rupp else value = v[i+j*m]; 475b24ad042SBarry Smith /* Look up PetscInto the Hash Table */ 476c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 477c2760754SSatish Balay h1 = HASH(size,key,tmp); 4780bdbc534SSatish Balay 479c2760754SSatish Balay idx = h1; 48076bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 481187ce0cbSSatish Balay insert_ct++; 482187ce0cbSSatish Balay total_ct++; 483187ce0cbSSatish Balay if (HT[idx] != key) { 484187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 485187ce0cbSSatish Balay if (idx == size) { 486187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 4879ace16cdSJacob Faibussowitsch PetscAssertFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 488187ce0cbSSatish Balay } 489187ce0cbSSatish Balay } 49076bd3646SJed Brown } else if (HT[idx] != key) { 491c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 492c2760754SSatish Balay if (idx == size) { 493c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 4949ace16cdSJacob Faibussowitsch PetscAssertFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 495c2760754SSatish Balay } 496c2760754SSatish Balay } 497c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 498c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 499c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 5000bdbc534SSatish Balay } 50126fbe8dcSKarl Rupp } else if (!baij->donotstash) { 502ff2fd236SBarry Smith if (roworiented) { 503b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 504ff2fd236SBarry Smith } else { 505b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 5060bdbc534SSatish Balay } 5070bdbc534SSatish Balay } 5080bdbc534SSatish Balay } 50976bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 510abf3b562SBarry Smith baij->ht_total_ct += total_ct; 511abf3b562SBarry Smith baij->ht_insert_ct += insert_ct; 51276bd3646SJed Brown } 5130bdbc534SSatish Balay PetscFunctionReturn(0); 5140bdbc534SSatish Balay } 5150bdbc534SSatish Balay 51697e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 5170bdbc534SSatish Balay { 5180bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 519ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 520dfbe8321SBarry Smith PetscErrorCode ierr; 521b24ad042SBarry Smith PetscInt i,j,ii,jj,row,col; 522899cda47SBarry Smith PetscInt rstart=baij->rstartbs; 523d0f46423SBarry Smith PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 524b24ad042SBarry Smith PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 525329f5518SBarry Smith PetscReal tmp; 5263eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 527dd6ea824SBarry Smith const PetscScalar *v_t,*value; 528b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 5290bdbc534SSatish Balay 530d0a41580SSatish Balay PetscFunctionBegin; 53126fbe8dcSKarl Rupp if (roworiented) stepval = (n-1)*bs; 53226fbe8dcSKarl Rupp else stepval = (m-1)*bs; 53326fbe8dcSKarl Rupp 5340bdbc534SSatish Balay for (i=0; i<m; i++) { 53576bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 5369ace16cdSJacob Faibussowitsch PetscAssertFalse(im[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,im[i]); 5379ace16cdSJacob Faibussowitsch PetscAssertFalse(im[i] >= baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 53876bd3646SJed Brown } 5390bdbc534SSatish Balay row = im[i]; 540ab715e2cSSatish Balay v_t = v + i*nbs2; 541c2760754SSatish Balay if (row >= rstart && row < rend) { 5420bdbc534SSatish Balay for (j=0; j<n; j++) { 5430bdbc534SSatish Balay col = in[j]; 5440bdbc534SSatish Balay 5450bdbc534SSatish Balay /* Look up into the Hash Table */ 546c2760754SSatish Balay key = row*Nbs+col+1; 547c2760754SSatish Balay h1 = HASH(size,key,tmp); 5480bdbc534SSatish Balay 549c2760754SSatish Balay idx = h1; 55076bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 551187ce0cbSSatish Balay total_ct++; 552187ce0cbSSatish Balay insert_ct++; 553187ce0cbSSatish Balay if (HT[idx] != key) { 554187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 555187ce0cbSSatish Balay if (idx == size) { 556187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 5579ace16cdSJacob Faibussowitsch PetscAssertFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 558187ce0cbSSatish Balay } 559187ce0cbSSatish Balay } 56076bd3646SJed Brown } else if (HT[idx] != key) { 561c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 562c2760754SSatish Balay if (idx == size) { 563c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 5649ace16cdSJacob Faibussowitsch PetscAssertFalse(idx == h1,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 565c2760754SSatish Balay } 566c2760754SSatish Balay } 567c2760754SSatish Balay baij_a = HD[idx]; 5680bdbc534SSatish Balay if (roworiented) { 569c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 570187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 571187ce0cbSSatish Balay value = v_t; 572187ce0cbSSatish Balay v_t += bs; 573fef45726SSatish Balay if (addv == ADD_VALUES) { 574c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 575c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 576fef45726SSatish Balay baij_a[jj] += *value++; 577b4cc0f5aSSatish Balay } 578b4cc0f5aSSatish Balay } 579fef45726SSatish Balay } else { 580c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 581c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 582fef45726SSatish Balay baij_a[jj] = *value++; 583fef45726SSatish Balay } 584fef45726SSatish Balay } 585fef45726SSatish Balay } 5860bdbc534SSatish Balay } else { 5870bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 588fef45726SSatish Balay if (addv == ADD_VALUES) { 589b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 5900bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 591fef45726SSatish Balay baij_a[jj] += *value++; 592fef45726SSatish Balay } 593fef45726SSatish Balay } 594fef45726SSatish Balay } else { 595fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 596fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 597fef45726SSatish Balay baij_a[jj] = *value++; 598fef45726SSatish Balay } 599b4cc0f5aSSatish Balay } 6000bdbc534SSatish Balay } 6010bdbc534SSatish Balay } 6020bdbc534SSatish Balay } 6030bdbc534SSatish Balay } else { 6040bdbc534SSatish Balay if (!baij->donotstash) { 6050bdbc534SSatish Balay if (roworiented) { 6068798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6070bdbc534SSatish Balay } else { 6088798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 6090bdbc534SSatish Balay } 6100bdbc534SSatish Balay } 6110bdbc534SSatish Balay } 6120bdbc534SSatish Balay } 61376bd3646SJed Brown if (PetscDefined(USE_DEBUG)) { 614abf3b562SBarry Smith baij->ht_total_ct += total_ct; 615abf3b562SBarry Smith baij->ht_insert_ct += insert_ct; 61676bd3646SJed Brown } 6170bdbc534SSatish Balay PetscFunctionReturn(0); 6180bdbc534SSatish Balay } 619133cdb44SSatish Balay 620b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 621d6de1c52SSatish Balay { 622d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 6236849ba73SBarry Smith PetscErrorCode ierr; 624d0f46423SBarry Smith PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 625d0f46423SBarry Smith PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 626d6de1c52SSatish Balay 627133cdb44SSatish Balay PetscFunctionBegin; 628d6de1c52SSatish Balay for (i=0; i<m; i++) { 62998921bdaSJacob Faibussowitsch if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %" PetscInt_FMT,idxm[i]);*/ 6309ace16cdSJacob Faibussowitsch PetscAssertFalse(idxm[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT,idxm[i],mat->rmap->N-1); 631d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 632d6de1c52SSatish Balay row = idxm[i] - bsrstart; 633d6de1c52SSatish Balay for (j=0; j<n; j++) { 63498921bdaSJacob Faibussowitsch if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %" PetscInt_FMT,idxn[j]); */ 6359ace16cdSJacob Faibussowitsch PetscAssertFalse(idxn[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT,idxn[j],mat->cmap->N-1); 636d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend) { 637d6de1c52SSatish Balay col = idxn[j] - bscstart; 63898dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 639d64ed03dSBarry Smith } else { 640905e6a2fSBarry Smith if (!baij->colmap) { 641ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 642905e6a2fSBarry Smith } 643aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 6440f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 645fa46199cSSatish Balay data--; 64648e59246SSatish Balay #else 64748e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 64848e59246SSatish Balay #endif 64948e59246SSatish Balay if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 650d9d09a02SSatish Balay else { 65148e59246SSatish Balay col = data + idxn[j]%bs; 65298dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 653d6de1c52SSatish Balay } 654d6de1c52SSatish Balay } 655d6de1c52SSatish Balay } 656f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 657d6de1c52SSatish Balay } 6583a40ed3dSBarry Smith PetscFunctionReturn(0); 659d6de1c52SSatish Balay } 660d6de1c52SSatish Balay 661dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 662d6de1c52SSatish Balay { 663d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 664d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 665dfbe8321SBarry Smith PetscErrorCode ierr; 666d0f46423SBarry Smith PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 667329f5518SBarry Smith PetscReal sum = 0.0; 6683eda8832SBarry Smith MatScalar *v; 669d6de1c52SSatish Balay 670d64ed03dSBarry Smith PetscFunctionBegin; 671d6de1c52SSatish Balay if (baij->size == 1) { 672064f8208SBarry Smith ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 673d6de1c52SSatish Balay } else { 674d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 675d6de1c52SSatish Balay v = amat->a; 6768a62d963SHong Zhang nz = amat->nz*bs2; 6778a62d963SHong Zhang for (i=0; i<nz; i++) { 678329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 679d6de1c52SSatish Balay } 680d6de1c52SSatish Balay v = bmat->a; 6818a62d963SHong Zhang nz = bmat->nz*bs2; 6828a62d963SHong Zhang for (i=0; i<nz; i++) { 683329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 684d6de1c52SSatish Balay } 685820f2d46SBarry Smith ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 6868f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 6878a62d963SHong Zhang } else if (type == NORM_1) { /* max column sum */ 6888a62d963SHong Zhang PetscReal *tmp,*tmp2; 689899cda47SBarry Smith PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 6908f8f2f0dSBarry Smith ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr); 691857a15f1SBarry Smith ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr); 6928a62d963SHong Zhang v = amat->a; jj = amat->j; 6938a62d963SHong Zhang for (i=0; i<amat->nz; i++) { 6948a62d963SHong Zhang for (j=0; j<bs; j++) { 6958a62d963SHong Zhang col = bs*(cstart + *jj) + j; /* column index */ 6968a62d963SHong Zhang for (row=0; row<bs; row++) { 6978a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 6988a62d963SHong Zhang } 6998a62d963SHong Zhang } 7008a62d963SHong Zhang jj++; 7018a62d963SHong Zhang } 7028a62d963SHong Zhang v = bmat->a; jj = bmat->j; 7038a62d963SHong Zhang for (i=0; i<bmat->nz; i++) { 7048a62d963SHong Zhang for (j=0; j<bs; j++) { 7058a62d963SHong Zhang col = bs*garray[*jj] + j; 7068a62d963SHong Zhang for (row=0; row<bs; row++) { 7078a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 7088a62d963SHong Zhang } 7098a62d963SHong Zhang } 7108a62d963SHong Zhang jj++; 7118a62d963SHong Zhang } 712820f2d46SBarry Smith ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 7138a62d963SHong Zhang *nrm = 0.0; 714d0f46423SBarry Smith for (j=0; j<mat->cmap->N; j++) { 7158a62d963SHong Zhang if (tmp2[j] > *nrm) *nrm = tmp2[j]; 7168a62d963SHong Zhang } 717857a15f1SBarry Smith ierr = PetscFree(tmp);CHKERRQ(ierr); 718857a15f1SBarry Smith ierr = PetscFree(tmp2);CHKERRQ(ierr); 7198a62d963SHong Zhang } else if (type == NORM_INFINITY) { /* max row sum */ 720577dd1f9SKris Buschelman PetscReal *sums; 721785e854fSJed Brown ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 7228a62d963SHong Zhang sum = 0.0; 7238a62d963SHong Zhang for (j=0; j<amat->mbs; j++) { 7248a62d963SHong Zhang for (row=0; row<bs; row++) sums[row] = 0.0; 7258a62d963SHong Zhang v = amat->a + bs2*amat->i[j]; 7268a62d963SHong Zhang nz = amat->i[j+1]-amat->i[j]; 7278a62d963SHong Zhang for (i=0; i<nz; i++) { 7288a62d963SHong Zhang for (col=0; col<bs; col++) { 7298a62d963SHong Zhang for (row=0; row<bs; row++) { 7308a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 7318a62d963SHong Zhang } 7328a62d963SHong Zhang } 7338a62d963SHong Zhang } 7348a62d963SHong Zhang v = bmat->a + bs2*bmat->i[j]; 7358a62d963SHong Zhang nz = bmat->i[j+1]-bmat->i[j]; 7368a62d963SHong Zhang for (i=0; i<nz; i++) { 7378a62d963SHong Zhang for (col=0; col<bs; col++) { 7388a62d963SHong Zhang for (row=0; row<bs; row++) { 7398a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 7408a62d963SHong Zhang } 7418a62d963SHong Zhang } 7428a62d963SHong Zhang } 7438a62d963SHong Zhang for (row=0; row<bs; row++) { 7448a62d963SHong Zhang if (sums[row] > sum) sum = sums[row]; 7458a62d963SHong Zhang } 7468a62d963SHong Zhang } 747820f2d46SBarry Smith ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 748577dd1f9SKris Buschelman ierr = PetscFree(sums);CHKERRQ(ierr); 749ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 750d64ed03dSBarry Smith } 7513a40ed3dSBarry Smith PetscFunctionReturn(0); 752d6de1c52SSatish Balay } 75357b952d6SSatish Balay 754fef45726SSatish Balay /* 755fef45726SSatish Balay Creates the hash table, and sets the table 756fef45726SSatish Balay This table is created only once. 757fef45726SSatish Balay If new entried need to be added to the matrix 758fef45726SSatish Balay then the hash table has to be destroyed and 759fef45726SSatish Balay recreated. 760fef45726SSatish Balay */ 761dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 762596b8d2eSBarry Smith { 763596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 764596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 765596b8d2eSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 766b24ad042SBarry Smith PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 7676849ba73SBarry Smith PetscErrorCode ierr; 768fca92195SBarry Smith PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 769899cda47SBarry Smith PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 770b24ad042SBarry Smith PetscInt *HT,key; 7713eda8832SBarry Smith MatScalar **HD; 772329f5518SBarry Smith PetscReal tmp; 7736cf91177SBarry Smith #if defined(PETSC_USE_INFO) 774b24ad042SBarry Smith PetscInt ct=0,max=0; 7754a15367fSSatish Balay #endif 776fef45726SSatish Balay 777d64ed03dSBarry Smith PetscFunctionBegin; 778fca92195SBarry Smith if (baij->ht) PetscFunctionReturn(0); 779fef45726SSatish Balay 780fca92195SBarry Smith baij->ht_size = (PetscInt)(factor*nz); 781fca92195SBarry Smith ht_size = baij->ht_size; 7820bdbc534SSatish Balay 783fef45726SSatish Balay /* Allocate Memory for Hash Table */ 7841795a4d1SJed Brown ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 785b9e4cc15SSatish Balay HD = baij->hd; 786a07cd24cSSatish Balay HT = baij->ht; 787b9e4cc15SSatish Balay 788596b8d2eSBarry Smith /* Loop Over A */ 7890bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 790596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 7910bdbc534SSatish Balay row = i+rstart; 7920bdbc534SSatish Balay col = aj[j]+cstart; 793596b8d2eSBarry Smith 794187ce0cbSSatish Balay key = row*Nbs + col + 1; 795fca92195SBarry Smith h1 = HASH(ht_size,key,tmp); 796fca92195SBarry Smith for (k=0; k<ht_size; k++) { 797fca92195SBarry Smith if (!HT[(h1+k)%ht_size]) { 798fca92195SBarry Smith HT[(h1+k)%ht_size] = key; 799fca92195SBarry Smith HD[(h1+k)%ht_size] = a->a + j*bs2; 800596b8d2eSBarry Smith break; 8016cf91177SBarry Smith #if defined(PETSC_USE_INFO) 802187ce0cbSSatish Balay } else { 803187ce0cbSSatish Balay ct++; 804187ce0cbSSatish Balay #endif 805596b8d2eSBarry Smith } 806187ce0cbSSatish Balay } 8076cf91177SBarry Smith #if defined(PETSC_USE_INFO) 808187ce0cbSSatish Balay if (k> max) max = k; 809187ce0cbSSatish Balay #endif 810596b8d2eSBarry Smith } 811596b8d2eSBarry Smith } 812596b8d2eSBarry Smith /* Loop Over B */ 8130bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 814596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 8150bdbc534SSatish Balay row = i+rstart; 8160bdbc534SSatish Balay col = garray[bj[j]]; 817187ce0cbSSatish Balay key = row*Nbs + col + 1; 818fca92195SBarry Smith h1 = HASH(ht_size,key,tmp); 819fca92195SBarry Smith for (k=0; k<ht_size; k++) { 820fca92195SBarry Smith if (!HT[(h1+k)%ht_size]) { 821fca92195SBarry Smith HT[(h1+k)%ht_size] = key; 822fca92195SBarry Smith HD[(h1+k)%ht_size] = b->a + j*bs2; 823596b8d2eSBarry Smith break; 8246cf91177SBarry Smith #if defined(PETSC_USE_INFO) 825187ce0cbSSatish Balay } else { 826187ce0cbSSatish Balay ct++; 827187ce0cbSSatish Balay #endif 828596b8d2eSBarry Smith } 829187ce0cbSSatish Balay } 8306cf91177SBarry Smith #if defined(PETSC_USE_INFO) 831187ce0cbSSatish Balay if (k> max) max = k; 832187ce0cbSSatish Balay #endif 833596b8d2eSBarry Smith } 834596b8d2eSBarry Smith } 835596b8d2eSBarry Smith 836596b8d2eSBarry Smith /* Print Summary */ 8376cf91177SBarry Smith #if defined(PETSC_USE_INFO) 838fca92195SBarry Smith for (i=0,j=0; i<ht_size; i++) { 83926fbe8dcSKarl Rupp if (HT[i]) j++; 840c38d4ed2SBarry Smith } 8417d3de750SJacob Faibussowitsch ierr = PetscInfo(mat,"Average Search = %5.2g,max search = %" PetscInt_FMT "\n",(!j) ? (double)0.0:(double)(((PetscReal)(ct+j))/(double)j),max);CHKERRQ(ierr); 842187ce0cbSSatish Balay #endif 8433a40ed3dSBarry Smith PetscFunctionReturn(0); 844596b8d2eSBarry Smith } 84557b952d6SSatish Balay 846dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 847bbb85fb3SSatish Balay { 848bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 849dfbe8321SBarry Smith PetscErrorCode ierr; 850b24ad042SBarry Smith PetscInt nstash,reallocs; 851bbb85fb3SSatish Balay 852bbb85fb3SSatish Balay PetscFunctionBegin; 85326fbe8dcSKarl Rupp if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 854bbb85fb3SSatish Balay 855d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 8561e2582c4SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 8578798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 8587d3de750SJacob Faibussowitsch ierr = PetscInfo(mat,"Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 85946680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 8607d3de750SJacob Faibussowitsch ierr = PetscInfo(mat,"Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 861bbb85fb3SSatish Balay PetscFunctionReturn(0); 862bbb85fb3SSatish Balay } 863bbb85fb3SSatish Balay 864dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 865bbb85fb3SSatish Balay { 866bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 86791c97fd4SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 8686849ba73SBarry Smith PetscErrorCode ierr; 869b24ad042SBarry Smith PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 870e44c0bd4SBarry Smith PetscInt *row,*col; 871ace3abfcSBarry Smith PetscBool r1,r2,r3,other_disassembled; 8723eda8832SBarry Smith MatScalar *val; 873b24ad042SBarry Smith PetscMPIInt n; 874bbb85fb3SSatish Balay 875bbb85fb3SSatish Balay PetscFunctionBegin; 8765fd66863SKarl Rupp /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 8774cb17eb5SBarry Smith if (!baij->donotstash && !mat->nooffprocentries) { 878a2d1c673SSatish Balay while (1) { 8798798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 880a2d1c673SSatish Balay if (!flg) break; 881a2d1c673SSatish Balay 882bbb85fb3SSatish Balay for (i=0; i<n;) { 883bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 88426fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 88526fbe8dcSKarl Rupp if (row[j] != rstart) break; 88626fbe8dcSKarl Rupp } 887bbb85fb3SSatish Balay if (j < n) ncols = j-i; 888bbb85fb3SSatish Balay else ncols = n-i; 889bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 8904b4eb8d3SJed Brown ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 891bbb85fb3SSatish Balay i = j; 892bbb85fb3SSatish Balay } 893bbb85fb3SSatish Balay } 8948798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 895a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 896a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 897a2d1c673SSatish Balay restore the original flags */ 898a2d1c673SSatish Balay r1 = baij->roworiented; 899a2d1c673SSatish Balay r2 = a->roworiented; 90091c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 90126fbe8dcSKarl Rupp 9027c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 9037c922b88SBarry Smith a->roworiented = PETSC_FALSE; 90426fbe8dcSKarl Rupp 90591c97fd4SSatish Balay (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 906a2d1c673SSatish Balay while (1) { 9078798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 908a2d1c673SSatish Balay if (!flg) break; 909a2d1c673SSatish Balay 910a2d1c673SSatish Balay for (i=0; i<n;) { 911a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 91226fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 91326fbe8dcSKarl Rupp if (row[j] != rstart) break; 91426fbe8dcSKarl Rupp } 915a2d1c673SSatish Balay if (j < n) ncols = j-i; 916a2d1c673SSatish Balay else ncols = n-i; 9174b4eb8d3SJed Brown ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr); 918a2d1c673SSatish Balay i = j; 919a2d1c673SSatish Balay } 920a2d1c673SSatish Balay } 9218798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 92226fbe8dcSKarl Rupp 923a2d1c673SSatish Balay baij->roworiented = r1; 924a2d1c673SSatish Balay a->roworiented = r2; 92526fbe8dcSKarl Rupp 92691c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 927bbb85fb3SSatish Balay } 928bbb85fb3SSatish Balay 929bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 930bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 931bbb85fb3SSatish Balay 932bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 933bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 934bbb85fb3SSatish Balay /* 935bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 936bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 937bbb85fb3SSatish Balay */ 938bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 939820f2d46SBarry Smith ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 940bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 941ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 942bbb85fb3SSatish Balay } 943bbb85fb3SSatish Balay } 944bbb85fb3SSatish Balay 945bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 946bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 947bbb85fb3SSatish Balay } 948bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 949bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 950bbb85fb3SSatish Balay 9516cf91177SBarry Smith #if defined(PETSC_USE_INFO) 952bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 9537d3de750SJacob Faibussowitsch ierr = PetscInfo(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 95426fbe8dcSKarl Rupp 955bbb85fb3SSatish Balay baij->ht_total_ct = 0; 956bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 957bbb85fb3SSatish Balay } 958bbb85fb3SSatish Balay #endif 959bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 960bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 96126fbe8dcSKarl Rupp 962bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 963bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 964bbb85fb3SSatish Balay } 965bbb85fb3SSatish Balay 966fca92195SBarry Smith ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 96726fbe8dcSKarl Rupp 968f4259b30SLisandro Dalcin baij->rowvalues = NULL; 9694f9cfa9eSBarry Smith 9704f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 9714f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 972e56f5c9eSBarry Smith PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 973820f2d46SBarry Smith ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 974e56f5c9eSBarry Smith } 975bbb85fb3SSatish Balay PetscFunctionReturn(0); 976bbb85fb3SSatish Balay } 97757b952d6SSatish Balay 9787da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 9799804daf3SBarry Smith #include <petscdraw.h> 9806849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 98157b952d6SSatish Balay { 98257b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 983dfbe8321SBarry Smith PetscErrorCode ierr; 9847da1fb6eSBarry Smith PetscMPIInt rank = baij->rank; 985d0f46423SBarry Smith PetscInt bs = mat->rmap->bs; 986ace3abfcSBarry Smith PetscBool iascii,isdraw; 987b0a32e0cSBarry Smith PetscViewer sviewer; 988f3ef73ceSBarry Smith PetscViewerFormat format; 98957b952d6SSatish Balay 990d64ed03dSBarry Smith PetscFunctionBegin; 991251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 992251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 99332077d6dSBarry Smith if (iascii) { 994b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 995456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 9964e220ebcSLois Curfman McInnes MatInfo info; 997ffc4695bSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr); 998d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 9991575c14dSBarry Smith ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1000c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", 1001b1e9c6f1SBarry Smith rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr); 1002d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1003c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1004d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1005c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %" PetscInt_FMT " \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1006b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 10071575c14dSBarry Smith ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 100807d81ca4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 100957b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 10103a40ed3dSBarry Smith PetscFunctionReturn(0); 1011fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 1012c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," block size is %" PetscInt_FMT "\n",bs);CHKERRQ(ierr); 10133a40ed3dSBarry Smith PetscFunctionReturn(0); 101404929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 101504929863SHong Zhang PetscFunctionReturn(0); 101657b952d6SSatish Balay } 101757b952d6SSatish Balay } 101857b952d6SSatish Balay 10190f5bd95cSBarry Smith if (isdraw) { 1020b0a32e0cSBarry Smith PetscDraw draw; 1021ace3abfcSBarry Smith PetscBool isnull; 1022b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 102345f3bb6eSLisandro Dalcin ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 102445f3bb6eSLisandro Dalcin if (isnull) PetscFunctionReturn(0); 102557b952d6SSatish Balay } 102657b952d6SSatish Balay 10277da1fb6eSBarry Smith { 102857b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 102957b952d6SSatish Balay Mat A; 103057b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 1031d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 10323eda8832SBarry Smith MatScalar *a; 10333e219373SBarry Smith const char *matname; 103457b952d6SSatish Balay 1035f204ca49SKris Buschelman /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1036f204ca49SKris Buschelman /* Perhaps this should be the type of mat? */ 1037ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1038dd400576SPatrick Sanan if (rank == 0) { 1039f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1040d64ed03dSBarry Smith } else { 1041f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 104257b952d6SSatish Balay } 1043f204ca49SKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 10440298fd71SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 10452b82e772SSatish Balay ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 10463bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 104757b952d6SSatish Balay 104857b952d6SSatish Balay /* copy over the A part */ 104957b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 105057b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1051785e854fSJed Brown ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 105257b952d6SSatish Balay 105357b952d6SSatish Balay for (i=0; i<mbs; i++) { 1054899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 105526fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 105657b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1057899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 105857b952d6SSatish Balay for (k=0; k<bs; k++) { 105997e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1060cee3aa6bSSatish Balay col++; a += bs; 106157b952d6SSatish Balay } 106257b952d6SSatish Balay } 106357b952d6SSatish Balay } 106457b952d6SSatish Balay /* copy over the B part */ 106557b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 106657b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 106757b952d6SSatish Balay for (i=0; i<mbs; i++) { 1068899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 106926fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 107057b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 107157b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 107257b952d6SSatish Balay for (k=0; k<bs; k++) { 107397e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1074cee3aa6bSSatish Balay col++; a += bs; 107557b952d6SSatish Balay } 107657b952d6SSatish Balay } 107757b952d6SSatish Balay } 1078606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 10796d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 10806d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 108155843e3eSBarry Smith /* 108255843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1083b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 108455843e3eSBarry Smith */ 10853f08860eSBarry Smith ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1086ade3a672SBarry Smith ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 1087dd400576SPatrick Sanan if (rank == 0) { 1088ade3a672SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 10897da1fb6eSBarry Smith ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 109057b952d6SSatish Balay } 10913f08860eSBarry Smith ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 10921575c14dSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 10936bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 109457b952d6SSatish Balay } 10953a40ed3dSBarry Smith PetscFunctionReturn(0); 109657b952d6SSatish Balay } 109757b952d6SSatish Balay 1098618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1099b51a4376SLisandro Dalcin PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1100660746e0SBarry Smith { 1101b51a4376SLisandro Dalcin Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 1102b51a4376SLisandro Dalcin Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)aij->A->data; 1103b51a4376SLisandro Dalcin Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)aij->B->data; 1104b51a4376SLisandro Dalcin const PetscInt *garray = aij->garray; 1105b51a4376SLisandro Dalcin PetscInt header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l; 1106b51a4376SLisandro Dalcin PetscInt *rowlens,*colidxs; 1107b51a4376SLisandro Dalcin PetscScalar *matvals; 1108660746e0SBarry Smith PetscErrorCode ierr; 1109660746e0SBarry Smith 1110660746e0SBarry Smith PetscFunctionBegin; 1111b51a4376SLisandro Dalcin ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1112b51a4376SLisandro Dalcin 1113b51a4376SLisandro Dalcin M = mat->rmap->N; 1114b51a4376SLisandro Dalcin N = mat->cmap->N; 1115b51a4376SLisandro Dalcin m = mat->rmap->n; 1116b51a4376SLisandro Dalcin rs = mat->rmap->rstart; 1117b51a4376SLisandro Dalcin cs = mat->cmap->rstart; 1118b51a4376SLisandro Dalcin bs = mat->rmap->bs; 1119b51a4376SLisandro Dalcin nz = bs*bs*(A->nz + B->nz); 1120b51a4376SLisandro Dalcin 1121b51a4376SLisandro Dalcin /* write matrix header */ 1122660746e0SBarry Smith header[0] = MAT_FILE_CLASSID; 1123b51a4376SLisandro Dalcin header[1] = M; header[2] = N; header[3] = nz; 1124ffc4695bSBarry Smith ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr); 1125b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1126660746e0SBarry Smith 1127b51a4376SLisandro Dalcin /* fill in and store row lengths */ 1128b51a4376SLisandro Dalcin ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1129b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) 1130b51a4376SLisandro Dalcin for (j=0; j<bs; j++) 1131b51a4376SLisandro Dalcin rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]); 1132b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1133b51a4376SLisandro Dalcin ierr = PetscFree(rowlens);CHKERRQ(ierr); 1134660746e0SBarry Smith 1135b51a4376SLisandro Dalcin /* fill in and store column indices */ 1136b51a4376SLisandro Dalcin ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1137b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) { 1138b51a4376SLisandro Dalcin for (k=0; k<bs; k++) { 1139b51a4376SLisandro Dalcin for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1140b51a4376SLisandro Dalcin if (garray[B->j[jb]] > cs/bs) break; 1141b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1142b51a4376SLisandro Dalcin colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1143660746e0SBarry Smith } 1144b51a4376SLisandro Dalcin for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1145b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1146b51a4376SLisandro Dalcin colidxs[cnt++] = bs*A->j[ja] + l + cs; 1147b51a4376SLisandro Dalcin for (; jb<B->i[i+1]; jb++) 1148b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1149b51a4376SLisandro Dalcin colidxs[cnt++] = bs*garray[B->j[jb]] + l; 1150660746e0SBarry Smith } 1151660746e0SBarry Smith } 11529ace16cdSJacob Faibussowitsch PetscAssertFalse(cnt != nz,PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT,cnt,nz); 1153b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr); 1154b51a4376SLisandro Dalcin ierr = PetscFree(colidxs);CHKERRQ(ierr); 1155660746e0SBarry Smith 1156b51a4376SLisandro Dalcin /* fill in and store nonzero values */ 1157b51a4376SLisandro Dalcin ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1158b51a4376SLisandro Dalcin for (cnt=0, i=0; i<A->mbs; i++) { 1159b51a4376SLisandro Dalcin for (k=0; k<bs; k++) { 1160b51a4376SLisandro Dalcin for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1161b51a4376SLisandro Dalcin if (garray[B->j[jb]] > cs/bs) break; 1162b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1163b51a4376SLisandro Dalcin matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1164660746e0SBarry Smith } 1165b51a4376SLisandro Dalcin for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1166b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1167b51a4376SLisandro Dalcin matvals[cnt++] = A->a[bs*(bs*ja + l) + k]; 1168b51a4376SLisandro Dalcin for (; jb<B->i[i+1]; jb++) 1169b51a4376SLisandro Dalcin for (l=0; l<bs; l++) 1170d21b9a37SPierre Jolivet matvals[cnt++] = B->a[bs*(bs*jb + l) + k]; 1171660746e0SBarry Smith } 1172b51a4376SLisandro Dalcin } 1173b51a4376SLisandro Dalcin ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr); 1174b51a4376SLisandro Dalcin ierr = PetscFree(matvals);CHKERRQ(ierr); 1175660746e0SBarry Smith 1176b51a4376SLisandro Dalcin /* write block size option to the viewer's .info file */ 1177b51a4376SLisandro Dalcin ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1178660746e0SBarry Smith PetscFunctionReturn(0); 1179660746e0SBarry Smith } 1180660746e0SBarry Smith 1181dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 118257b952d6SSatish Balay { 1183dfbe8321SBarry Smith PetscErrorCode ierr; 1184ace3abfcSBarry Smith PetscBool iascii,isdraw,issocket,isbinary; 118557b952d6SSatish Balay 1186d64ed03dSBarry Smith PetscFunctionBegin; 1187251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1188251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1189251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1190251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1191660746e0SBarry Smith if (iascii || isdraw || issocket) { 11927b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1193660746e0SBarry Smith } else if (isbinary) { 1194660746e0SBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 119557b952d6SSatish Balay } 11963a40ed3dSBarry Smith PetscFunctionReturn(0); 119757b952d6SSatish Balay } 119857b952d6SSatish Balay 1199dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 120079bdfe76SSatish Balay { 120179bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1202dfbe8321SBarry Smith PetscErrorCode ierr; 120379bdfe76SSatish Balay 1204d64ed03dSBarry Smith PetscFunctionBegin; 1205aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1206c0aa6a63SJacob Faibussowitsch PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 120779bdfe76SSatish Balay #endif 12088798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12098798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 12106bf464f9SBarry Smith ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 12116bf464f9SBarry Smith ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1212aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 12136bc0bbbfSBarry Smith ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 121448e59246SSatish Balay #else 121505b42c5fSBarry Smith ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 121648e59246SSatish Balay #endif 121705b42c5fSBarry Smith ierr = PetscFree(baij->garray);CHKERRQ(ierr); 12186bf464f9SBarry Smith ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 12196bf464f9SBarry Smith ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1220fca92195SBarry Smith ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 122105b42c5fSBarry Smith ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1222fca92195SBarry Smith ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1223899cda47SBarry Smith ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1224bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 1225901853e0SKris Buschelman 1226f4259b30SLisandro Dalcin ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1227bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1228bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1229bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1230bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1231bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1232bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1233bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1234bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 12357ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 12367ea3e4caSstefano_zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr); 12377ea3e4caSstefano_zampini #endif 1238c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr); 12393a40ed3dSBarry Smith PetscFunctionReturn(0); 124079bdfe76SSatish Balay } 124179bdfe76SSatish Balay 1242dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1243cee3aa6bSSatish Balay { 1244cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1245dfbe8321SBarry Smith PetscErrorCode ierr; 1246b24ad042SBarry Smith PetscInt nt; 1247cee3aa6bSSatish Balay 1248d64ed03dSBarry Smith PetscFunctionBegin; 1249e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 12509ace16cdSJacob Faibussowitsch PetscAssertFalse(nt != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1251e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 12529ace16cdSJacob Faibussowitsch PetscAssertFalse(nt != A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1253ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1254f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1255ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1256f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 12573a40ed3dSBarry Smith PetscFunctionReturn(0); 1258cee3aa6bSSatish Balay } 1259cee3aa6bSSatish Balay 1260dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1261cee3aa6bSSatish Balay { 1262cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1263dfbe8321SBarry Smith PetscErrorCode ierr; 1264d64ed03dSBarry Smith 1265d64ed03dSBarry Smith PetscFunctionBegin; 1266ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1267f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1268ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1269f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 12703a40ed3dSBarry Smith PetscFunctionReturn(0); 1271cee3aa6bSSatish Balay } 1272cee3aa6bSSatish Balay 1273dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1274cee3aa6bSSatish Balay { 1275cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1276dfbe8321SBarry Smith PetscErrorCode ierr; 1277cee3aa6bSSatish Balay 1278d64ed03dSBarry Smith PetscFunctionBegin; 1279cee3aa6bSSatish Balay /* do nondiagonal part */ 12807c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1281cee3aa6bSSatish Balay /* do local part */ 12827c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1283e4a140f6SJunchao Zhang /* add partial results together */ 1284ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1285ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 12863a40ed3dSBarry Smith PetscFunctionReturn(0); 1287cee3aa6bSSatish Balay } 1288cee3aa6bSSatish Balay 1289dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1290cee3aa6bSSatish Balay { 1291cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1292dfbe8321SBarry Smith PetscErrorCode ierr; 1293cee3aa6bSSatish Balay 1294d64ed03dSBarry Smith PetscFunctionBegin; 1295cee3aa6bSSatish Balay /* do nondiagonal part */ 12967c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1297cee3aa6bSSatish Balay /* do local part */ 12987c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1299e4a140f6SJunchao Zhang /* add partial results together */ 1300e4a140f6SJunchao Zhang ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1301ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 13023a40ed3dSBarry Smith PetscFunctionReturn(0); 1303cee3aa6bSSatish Balay } 1304cee3aa6bSSatish Balay 1305cee3aa6bSSatish Balay /* 1306cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1307cee3aa6bSSatish Balay diagonal block 1308cee3aa6bSSatish Balay */ 1309dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1310cee3aa6bSSatish Balay { 1311cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1312dfbe8321SBarry Smith PetscErrorCode ierr; 1313d64ed03dSBarry Smith 1314d64ed03dSBarry Smith PetscFunctionBegin; 13159ace16cdSJacob Faibussowitsch PetscAssertFalse(A->rmap->N != A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13163a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 13173a40ed3dSBarry Smith PetscFunctionReturn(0); 1318cee3aa6bSSatish Balay } 1319cee3aa6bSSatish Balay 1320f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1321cee3aa6bSSatish Balay { 1322cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1323dfbe8321SBarry Smith PetscErrorCode ierr; 1324d64ed03dSBarry Smith 1325d64ed03dSBarry Smith PetscFunctionBegin; 1326f4df32b1SMatthew Knepley ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1327f4df32b1SMatthew Knepley ierr = MatScale(a->B,aa);CHKERRQ(ierr); 13283a40ed3dSBarry Smith PetscFunctionReturn(0); 1329cee3aa6bSSatish Balay } 1330026e39d0SSatish Balay 1331b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1332acdf5bf4SSatish Balay { 1333acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 133487828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 13356849ba73SBarry Smith PetscErrorCode ierr; 1336d0f46423SBarry Smith PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1337d0f46423SBarry Smith PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1338899cda47SBarry Smith PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1339acdf5bf4SSatish Balay 1340d64ed03dSBarry Smith PetscFunctionBegin; 13419ace16cdSJacob Faibussowitsch PetscAssertFalse(row < brstart || row >= brend,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 13429ace16cdSJacob Faibussowitsch PetscAssertFalse(mat->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1343acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1344acdf5bf4SSatish Balay 1345acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1346acdf5bf4SSatish Balay /* 1347acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1348acdf5bf4SSatish Balay */ 1349acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1350b24ad042SBarry Smith PetscInt max = 1,mbs = mat->mbs,tmp; 1351bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1352acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 135326fbe8dcSKarl Rupp if (max < tmp) max = tmp; 1354acdf5bf4SSatish Balay } 1355dcca6d9dSJed Brown ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1356acdf5bf4SSatish Balay } 1357d9d09a02SSatish Balay lrow = row - brstart; 1358acdf5bf4SSatish Balay 1359acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1360f4259b30SLisandro Dalcin if (!v) {pvA = NULL; pvB = NULL;} 1361f4259b30SLisandro Dalcin if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1362f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1363f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1364acdf5bf4SSatish Balay nztot = nzA + nzB; 1365acdf5bf4SSatish Balay 1366acdf5bf4SSatish Balay cmap = mat->garray; 1367acdf5bf4SSatish Balay if (v || idx) { 1368acdf5bf4SSatish Balay if (nztot) { 1369acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1370b24ad042SBarry Smith PetscInt imark = -1; 1371acdf5bf4SSatish Balay if (v) { 1372acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1373acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1374d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1375acdf5bf4SSatish Balay else break; 1376acdf5bf4SSatish Balay } 1377acdf5bf4SSatish Balay imark = i; 1378acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1379acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1380acdf5bf4SSatish Balay } 1381acdf5bf4SSatish Balay if (idx) { 1382acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1383acdf5bf4SSatish Balay if (imark > -1) { 1384acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1385bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1386acdf5bf4SSatish Balay } 1387acdf5bf4SSatish Balay } else { 1388acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 138926fbe8dcSKarl Rupp if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1390acdf5bf4SSatish Balay else break; 1391acdf5bf4SSatish Balay } 1392acdf5bf4SSatish Balay imark = i; 1393acdf5bf4SSatish Balay } 1394d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1395d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1396acdf5bf4SSatish Balay } 1397d64ed03dSBarry Smith } else { 1398f4259b30SLisandro Dalcin if (idx) *idx = NULL; 1399f4259b30SLisandro Dalcin if (v) *v = NULL; 1400d212a18eSSatish Balay } 1401acdf5bf4SSatish Balay } 1402acdf5bf4SSatish Balay *nz = nztot; 1403f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1404f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14053a40ed3dSBarry Smith PetscFunctionReturn(0); 1406acdf5bf4SSatish Balay } 1407acdf5bf4SSatish Balay 1408b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1409acdf5bf4SSatish Balay { 1410acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1411d64ed03dSBarry Smith 1412d64ed03dSBarry Smith PetscFunctionBegin; 14139ace16cdSJacob Faibussowitsch PetscAssertFalse(!baij->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1414acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 14153a40ed3dSBarry Smith PetscFunctionReturn(0); 1416acdf5bf4SSatish Balay } 1417acdf5bf4SSatish Balay 1418dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 141958667388SSatish Balay { 142058667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1421dfbe8321SBarry Smith PetscErrorCode ierr; 1422d64ed03dSBarry Smith 1423d64ed03dSBarry Smith PetscFunctionBegin; 142458667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 142558667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 14263a40ed3dSBarry Smith PetscFunctionReturn(0); 142758667388SSatish Balay } 14280ac07820SSatish Balay 1429dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 14300ac07820SSatish Balay { 14314e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 14324e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 1433dfbe8321SBarry Smith PetscErrorCode ierr; 14343966268fSBarry Smith PetscLogDouble isend[5],irecv[5]; 14350ac07820SSatish Balay 1436d64ed03dSBarry Smith PetscFunctionBegin; 1437d0f46423SBarry Smith info->block_size = (PetscReal)matin->rmap->bs; 143826fbe8dcSKarl Rupp 14394e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 144026fbe8dcSKarl Rupp 14410e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1442de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 144326fbe8dcSKarl Rupp 14444e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 144526fbe8dcSKarl Rupp 14460e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1447de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 144826fbe8dcSKarl Rupp 14490ac07820SSatish Balay if (flag == MAT_LOCAL) { 14504e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 14514e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 14524e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 14534e220ebcSLois Curfman McInnes info->memory = isend[3]; 14544e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 14550ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1456820f2d46SBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 145726fbe8dcSKarl Rupp 14584e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14594e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14604e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14614e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14624e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 14630ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1464820f2d46SBarry Smith ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr); 146526fbe8dcSKarl Rupp 14664e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 14674e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 14684e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 14694e220ebcSLois Curfman McInnes info->memory = irecv[3]; 14704e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 147198921bdaSJacob Faibussowitsch } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 14724e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 14734e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 14744e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 14753a40ed3dSBarry Smith PetscFunctionReturn(0); 14760ac07820SSatish Balay } 14770ac07820SSatish Balay 1478ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 147958667388SSatish Balay { 148058667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1481dfbe8321SBarry Smith PetscErrorCode ierr; 148258667388SSatish Balay 1483d64ed03dSBarry Smith PetscFunctionBegin; 148412c028f9SKris Buschelman switch (op) { 1485512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 148612c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 148728b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1488a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 148912c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 149043674050SBarry Smith MatCheckPreallocated(A,1); 14914e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 14924e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 149312c028f9SKris Buschelman break; 149412c028f9SKris Buschelman case MAT_ROW_ORIENTED: 149543674050SBarry Smith MatCheckPreallocated(A,1); 14964e0d8c25SBarry Smith a->roworiented = flg; 149726fbe8dcSKarl Rupp 14984e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 14994e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 150012c028f9SKris Buschelman break; 15018c78258cSHong Zhang case MAT_FORCE_DIAGONAL_ENTRIES: 1502071fcb05SBarry Smith case MAT_SORTED_FULL: 15037d3de750SJacob Faibussowitsch ierr = PetscInfo(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 150412c028f9SKris Buschelman break; 150512c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15064e0d8c25SBarry Smith a->donotstash = flg; 150712c028f9SKris Buschelman break; 150812c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 15094e0d8c25SBarry Smith a->ht_flag = flg; 1510abf3b562SBarry Smith a->ht_fact = 1.39; 151112c028f9SKris Buschelman break; 151277e54ba9SKris Buschelman case MAT_SYMMETRIC: 151377e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 15142188ac68SBarry Smith case MAT_HERMITIAN: 1515c10200c1SHong Zhang case MAT_SUBMAT_SINGLEIS: 15162188ac68SBarry Smith case MAT_SYMMETRY_ETERNAL: 151743674050SBarry Smith MatCheckPreallocated(A,1); 15184e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 151977e54ba9SKris Buschelman break; 152012c028f9SKris Buschelman default: 152198921bdaSJacob Faibussowitsch SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1522d64ed03dSBarry Smith } 15233a40ed3dSBarry Smith PetscFunctionReturn(0); 152458667388SSatish Balay } 152558667388SSatish Balay 1526fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 15270ac07820SSatish Balay { 15280ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 15290ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 15300ac07820SSatish Balay Mat B; 1531dfbe8321SBarry Smith PetscErrorCode ierr; 1532d0f46423SBarry Smith PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1533d0f46423SBarry Smith PetscInt bs=A->rmap->bs,mbs=baij->mbs; 15343eda8832SBarry Smith MatScalar *a; 15350ac07820SSatish Balay 1536d64ed03dSBarry Smith PetscFunctionBegin; 1537cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1538ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1539d0f46423SBarry Smith ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 15407adad957SLisandro Dalcin ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 15412e72b8d9SBarry Smith /* Do not know preallocation information, but must set block size */ 15420298fd71SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1543fc4dec0aSBarry Smith } else { 1544fc4dec0aSBarry Smith B = *matout; 1545fc4dec0aSBarry Smith } 15460ac07820SSatish Balay 15470ac07820SSatish Balay /* copy over the A part */ 15480ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 15490ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1550785e854fSJed Brown ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 15510ac07820SSatish Balay 15520ac07820SSatish Balay for (i=0; i<mbs; i++) { 1553899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 155426fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 15550ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1556899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 15570ac07820SSatish Balay for (k=0; k<bs; k++) { 155897e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 155926fbe8dcSKarl Rupp 15600ac07820SSatish Balay col++; a += bs; 15610ac07820SSatish Balay } 15620ac07820SSatish Balay } 15630ac07820SSatish Balay } 15640ac07820SSatish Balay /* copy over the B part */ 15650ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 15660ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 15670ac07820SSatish Balay for (i=0; i<mbs; i++) { 1568899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 156926fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 15700ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 15710ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 15720ac07820SSatish Balay for (k=0; k<bs; k++) { 157397e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 157426fbe8dcSKarl Rupp col++; 157526fbe8dcSKarl Rupp a += bs; 15760ac07820SSatish Balay } 15770ac07820SSatish Balay } 15780ac07820SSatish Balay } 1579606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 15800ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15810ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15820ac07820SSatish Balay 1583cf37664fSBarry Smith if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 158426fbe8dcSKarl Rupp else { 158528be2f97SBarry Smith ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 15860ac07820SSatish Balay } 15873a40ed3dSBarry Smith PetscFunctionReturn(0); 15880ac07820SSatish Balay } 15890e95ebc0SSatish Balay 1590dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 15910e95ebc0SSatish Balay { 159236c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 159336c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 1594dfbe8321SBarry Smith PetscErrorCode ierr; 1595b24ad042SBarry Smith PetscInt s1,s2,s3; 15960e95ebc0SSatish Balay 1597d64ed03dSBarry Smith PetscFunctionBegin; 159836c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 159936c4a09eSSatish Balay if (rr) { 160036c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 16019ace16cdSJacob Faibussowitsch PetscAssertFalse(s1!=s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 160236c4a09eSSatish Balay /* Overlap communication with computation. */ 1603ca9f406cSSatish Balay ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 160436c4a09eSSatish Balay } 16050e95ebc0SSatish Balay if (ll) { 16060e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 16079ace16cdSJacob Faibussowitsch PetscAssertFalse(s1!=s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 16080298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 16090e95ebc0SSatish Balay } 161036c4a09eSSatish Balay /* scale the diagonal block */ 161136c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 161236c4a09eSSatish Balay 161336c4a09eSSatish Balay if (rr) { 161436c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 1615ca9f406cSSatish Balay ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 16160298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 161736c4a09eSSatish Balay } 16183a40ed3dSBarry Smith PetscFunctionReturn(0); 16190e95ebc0SSatish Balay } 16200e95ebc0SSatish Balay 16212b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 16220ac07820SSatish Balay { 16230ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 162465a92638SMatthew G. Knepley PetscInt *lrows; 16256e520ac8SStefano Zampini PetscInt r, len; 162694342113SStefano Zampini PetscBool cong; 16276849ba73SBarry Smith PetscErrorCode ierr; 16280ac07820SSatish Balay 1629d64ed03dSBarry Smith PetscFunctionBegin; 16306e520ac8SStefano Zampini /* get locally owned rows */ 16316e520ac8SStefano Zampini ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 163297b48c8fSBarry Smith /* fix right hand side if needed */ 163397b48c8fSBarry Smith if (x && b) { 163465a92638SMatthew G. Knepley const PetscScalar *xx; 163565a92638SMatthew G. Knepley PetscScalar *bb; 163665a92638SMatthew G. Knepley 163797b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 163897b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 163965a92638SMatthew G. Knepley for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 164097b48c8fSBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 164197b48c8fSBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 164297b48c8fSBarry Smith } 164397b48c8fSBarry Smith 16440ac07820SSatish Balay /* actually zap the local rows */ 164572dacd9aSBarry Smith /* 164672dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 1647a8c7a070SBarry Smith is square and the user wishes to set the diagonal we use separate 164872dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 164972dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 165072dacd9aSBarry Smith 165172dacd9aSBarry Smith */ 16529c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1653a34163a4SJed Brown ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 165494342113SStefano Zampini ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 165594342113SStefano Zampini if ((diag != 0.0) && cong) { 1656a34163a4SJed Brown ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1657f4df32b1SMatthew Knepley } else if (diag != 0.0) { 1658f4259b30SLisandro Dalcin ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 16599ace16cdSJacob Faibussowitsch PetscAssertFalse(((Mat_SeqBAIJ*)l->A->data)->nonew,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1660512a5fc5SBarry Smith MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 166165a92638SMatthew G. Knepley for (r = 0; r < len; ++r) { 166265a92638SMatthew G. Knepley const PetscInt row = lrows[r] + A->rmap->rstart; 1663f4df32b1SMatthew Knepley ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1664a07cd24cSSatish Balay } 1665a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1666a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16679c957beeSSatish Balay } else { 1668a34163a4SJed Brown ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1669a07cd24cSSatish Balay } 1670606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 16714f9cfa9eSBarry Smith 16724f9cfa9eSBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 16734f9cfa9eSBarry Smith if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1674e56f5c9eSBarry Smith PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1675820f2d46SBarry Smith ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 1676e56f5c9eSBarry Smith } 16773a40ed3dSBarry Smith PetscFunctionReturn(0); 16780ac07820SSatish Balay } 167972dacd9aSBarry Smith 16806f0a72daSMatthew G. Knepley PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 16816f0a72daSMatthew G. Knepley { 16826f0a72daSMatthew G. Knepley Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 16836f0a72daSMatthew G. Knepley PetscErrorCode ierr; 1684131c27b5Sprj- PetscMPIInt n = A->rmap->n,p = 0; 1685131c27b5Sprj- PetscInt i,j,k,r,len = 0,row,col,count; 16866f0a72daSMatthew G. Knepley PetscInt *lrows,*owners = A->rmap->range; 16876f0a72daSMatthew G. Knepley PetscSFNode *rrows; 16886f0a72daSMatthew G. Knepley PetscSF sf; 16896f0a72daSMatthew G. Knepley const PetscScalar *xx; 16906f0a72daSMatthew G. Knepley PetscScalar *bb,*mask; 16916f0a72daSMatthew G. Knepley Vec xmask,lmask; 16926f0a72daSMatthew G. Knepley Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 16936f0a72daSMatthew G. Knepley PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 16946f0a72daSMatthew G. Knepley PetscScalar *aa; 16956f0a72daSMatthew G. Knepley 16966f0a72daSMatthew G. Knepley PetscFunctionBegin; 16976f0a72daSMatthew G. Knepley /* Create SF where leaves are input rows and roots are owned rows */ 16986f0a72daSMatthew G. Knepley ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 16996f0a72daSMatthew G. Knepley for (r = 0; r < n; ++r) lrows[r] = -1; 17006f0a72daSMatthew G. Knepley ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 17016f0a72daSMatthew G. Knepley for (r = 0; r < N; ++r) { 17026f0a72daSMatthew G. Knepley const PetscInt idx = rows[r]; 17039ace16cdSJacob Faibussowitsch PetscAssertFalse(idx < 0 || A->rmap->N <= idx,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")",idx,A->rmap->N); 17045ba17502SJed Brown if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 17055ba17502SJed Brown ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 17065ba17502SJed Brown } 17076f0a72daSMatthew G. Knepley rrows[r].rank = p; 17086f0a72daSMatthew G. Knepley rrows[r].index = rows[r] - owners[p]; 17096f0a72daSMatthew G. Knepley } 17106f0a72daSMatthew G. Knepley ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 17116f0a72daSMatthew G. Knepley ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 17126f0a72daSMatthew G. Knepley /* Collect flags for rows to be zeroed */ 17136f0a72daSMatthew G. Knepley ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 17146f0a72daSMatthew G. Knepley ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 17156f0a72daSMatthew G. Knepley ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 17166f0a72daSMatthew G. Knepley /* Compress and put in row numbers */ 17176f0a72daSMatthew G. Knepley for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 17186f0a72daSMatthew G. Knepley /* zero diagonal part of matrix */ 17196f0a72daSMatthew G. Knepley ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 17206f0a72daSMatthew G. Knepley /* handle off diagonal part of matrix */ 17212a7a6963SBarry Smith ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 17226f0a72daSMatthew G. Knepley ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 17236f0a72daSMatthew G. Knepley ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 17246f0a72daSMatthew G. Knepley for (i=0; i<len; i++) bb[lrows[i]] = 1; 17256f0a72daSMatthew G. Knepley ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 17266f0a72daSMatthew G. Knepley ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 17276f0a72daSMatthew G. Knepley ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 17286f0a72daSMatthew G. Knepley ierr = VecDestroy(&xmask);CHKERRQ(ierr); 17296f0a72daSMatthew G. Knepley if (x) { 17306f0a72daSMatthew G. Knepley ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 17316f0a72daSMatthew G. Knepley ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 17326f0a72daSMatthew G. Knepley ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 17336f0a72daSMatthew G. Knepley ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 17346f0a72daSMatthew G. Knepley } 17356f0a72daSMatthew G. Knepley ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 17366f0a72daSMatthew G. Knepley /* remove zeroed rows of off diagonal matrix */ 17376f0a72daSMatthew G. Knepley for (i = 0; i < len; ++i) { 17386f0a72daSMatthew G. Knepley row = lrows[i]; 17396f0a72daSMatthew G. Knepley count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 17406f0a72daSMatthew G. Knepley aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 17416f0a72daSMatthew G. Knepley for (k = 0; k < count; ++k) { 17426f0a72daSMatthew G. Knepley aa[0] = 0.0; 17436f0a72daSMatthew G. Knepley aa += bs; 17446f0a72daSMatthew G. Knepley } 17456f0a72daSMatthew G. Knepley } 17466f0a72daSMatthew G. Knepley /* loop over all elements of off process part of matrix zeroing removed columns*/ 17476f0a72daSMatthew G. Knepley for (i = 0; i < l->B->rmap->N; ++i) { 17486f0a72daSMatthew G. Knepley row = i/bs; 17496f0a72daSMatthew G. Knepley for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 17506f0a72daSMatthew G. Knepley for (k = 0; k < bs; ++k) { 17516f0a72daSMatthew G. Knepley col = bs*baij->j[j] + k; 17526f0a72daSMatthew G. Knepley if (PetscAbsScalar(mask[col])) { 17536f0a72daSMatthew G. Knepley aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 175489ae1891SBarry Smith if (x) bb[i] -= aa[0]*xx[col]; 17556f0a72daSMatthew G. Knepley aa[0] = 0.0; 17566f0a72daSMatthew G. Knepley } 17576f0a72daSMatthew G. Knepley } 17586f0a72daSMatthew G. Knepley } 17596f0a72daSMatthew G. Knepley } 17606f0a72daSMatthew G. Knepley if (x) { 17616f0a72daSMatthew G. Knepley ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 17626f0a72daSMatthew G. Knepley ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 17636f0a72daSMatthew G. Knepley } 17646f0a72daSMatthew G. Knepley ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 17656f0a72daSMatthew G. Knepley ierr = VecDestroy(&lmask);CHKERRQ(ierr); 17666f0a72daSMatthew G. Knepley ierr = PetscFree(lrows);CHKERRQ(ierr); 17674f9cfa9eSBarry Smith 17684f9cfa9eSBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 17694f9cfa9eSBarry Smith if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 17704f9cfa9eSBarry Smith PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1771820f2d46SBarry Smith ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 17724f9cfa9eSBarry Smith } 17736f0a72daSMatthew G. Knepley PetscFunctionReturn(0); 17746f0a72daSMatthew G. Knepley } 17756f0a72daSMatthew G. Knepley 1776dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1777bb5a7306SBarry Smith { 1778bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1779dfbe8321SBarry Smith PetscErrorCode ierr; 1780d64ed03dSBarry Smith 1781d64ed03dSBarry Smith PetscFunctionBegin; 1782bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 17833a40ed3dSBarry Smith PetscFunctionReturn(0); 1784bb5a7306SBarry Smith } 1785bb5a7306SBarry Smith 17866849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 17870ac07820SSatish Balay 1788ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 17897fc3c18eSBarry Smith { 17907fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 17917fc3c18eSBarry Smith Mat a,b,c,d; 1792ace3abfcSBarry Smith PetscBool flg; 1793dfbe8321SBarry Smith PetscErrorCode ierr; 17947fc3c18eSBarry Smith 17957fc3c18eSBarry Smith PetscFunctionBegin; 17967fc3c18eSBarry Smith a = matA->A; b = matA->B; 17977fc3c18eSBarry Smith c = matB->A; d = matB->B; 17987fc3c18eSBarry Smith 17997fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1800abc0a331SBarry Smith if (flg) { 18017fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 18027fc3c18eSBarry Smith } 1803820f2d46SBarry Smith ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 18047fc3c18eSBarry Smith PetscFunctionReturn(0); 18057fc3c18eSBarry Smith } 18067fc3c18eSBarry Smith 18073c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 18083c896bc6SHong Zhang { 18093c896bc6SHong Zhang PetscErrorCode ierr; 18103c896bc6SHong Zhang Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 18113c896bc6SHong Zhang Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 18123c896bc6SHong Zhang 18133c896bc6SHong Zhang PetscFunctionBegin; 18143c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 18153c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 18163c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 18173c896bc6SHong Zhang } else { 18183c896bc6SHong Zhang ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 18193c896bc6SHong Zhang ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 18203c896bc6SHong Zhang } 1821cdc753b6SBarry Smith ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 18223c896bc6SHong Zhang PetscFunctionReturn(0); 18233c896bc6SHong Zhang } 1824273d9f13SBarry Smith 18254994cf47SJed Brown PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1826273d9f13SBarry Smith { 1827dfbe8321SBarry Smith PetscErrorCode ierr; 1828273d9f13SBarry Smith 1829273d9f13SBarry Smith PetscFunctionBegin; 1830f4259b30SLisandro Dalcin ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 1831273d9f13SBarry Smith PetscFunctionReturn(0); 1832273d9f13SBarry Smith } 1833273d9f13SBarry Smith 18344de5dceeSHong Zhang PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 18354de5dceeSHong Zhang { 1836001ddc4fSHong Zhang PetscErrorCode ierr; 1837001ddc4fSHong Zhang PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 18384de5dceeSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 18394de5dceeSHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 18404de5dceeSHong Zhang 18414de5dceeSHong Zhang PetscFunctionBegin; 1842001ddc4fSHong Zhang ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 18434de5dceeSHong Zhang PetscFunctionReturn(0); 18444de5dceeSHong Zhang } 18454de5dceeSHong Zhang 18464fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 18474fe895cdSHong Zhang { 18484fe895cdSHong Zhang PetscErrorCode ierr; 18494fe895cdSHong Zhang Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 18504fe895cdSHong Zhang PetscBLASInt bnz,one=1; 18514fe895cdSHong Zhang Mat_SeqBAIJ *x,*y; 1852b31f67cfSBarry Smith PetscInt bs2 = Y->rmap->bs*Y->rmap->bs; 18534fe895cdSHong Zhang 18544fe895cdSHong Zhang PetscFunctionBegin; 18554fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 18564fe895cdSHong Zhang PetscScalar alpha = a; 18574fe895cdSHong Zhang x = (Mat_SeqBAIJ*)xx->A->data; 18584fe895cdSHong Zhang y = (Mat_SeqBAIJ*)yy->A->data; 1859b31f67cfSBarry Smith ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 18608b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 18614fe895cdSHong Zhang x = (Mat_SeqBAIJ*)xx->B->data; 18624fe895cdSHong Zhang y = (Mat_SeqBAIJ*)yy->B->data; 1863b31f67cfSBarry Smith ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 18648b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 1865a3fa217bSJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 1866ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1867ab784542SHong Zhang ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 18684fe895cdSHong Zhang } else { 18694de5dceeSHong Zhang Mat B; 18704de5dceeSHong Zhang PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 18714de5dceeSHong Zhang ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 18724de5dceeSHong Zhang ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 18734de5dceeSHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 18744de5dceeSHong Zhang ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 18754de5dceeSHong Zhang ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 18764de5dceeSHong Zhang ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 18774de5dceeSHong Zhang ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 18784de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 18794de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 18804de5dceeSHong Zhang ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 18814de5dceeSHong Zhang /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 18824de5dceeSHong Zhang ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 188379c2fd05SStefano Zampini ierr = MatHeaderMerge(Y,&B);CHKERRQ(ierr); 18844de5dceeSHong Zhang ierr = PetscFree(nnz_d);CHKERRQ(ierr); 18854de5dceeSHong Zhang ierr = PetscFree(nnz_o);CHKERRQ(ierr); 18864fe895cdSHong Zhang } 18874fe895cdSHong Zhang PetscFunctionReturn(0); 18884fe895cdSHong Zhang } 18894fe895cdSHong Zhang 18902726fb6dSPierre Jolivet PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 18912726fb6dSPierre Jolivet { 18922726fb6dSPierre Jolivet #if defined(PETSC_USE_COMPLEX) 18932726fb6dSPierre Jolivet PetscErrorCode ierr; 18942726fb6dSPierre Jolivet Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 18952726fb6dSPierre Jolivet 18962726fb6dSPierre Jolivet PetscFunctionBegin; 18972726fb6dSPierre Jolivet ierr = MatConjugate_SeqBAIJ(a->A);CHKERRQ(ierr); 18982726fb6dSPierre Jolivet ierr = MatConjugate_SeqBAIJ(a->B);CHKERRQ(ierr); 18992726fb6dSPierre Jolivet #else 19002726fb6dSPierre Jolivet PetscFunctionBegin; 19012726fb6dSPierre Jolivet #endif 19022726fb6dSPierre Jolivet PetscFunctionReturn(0); 19032726fb6dSPierre Jolivet } 19042726fb6dSPierre Jolivet 190599cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 190699cafbc1SBarry Smith { 190799cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 190899cafbc1SBarry Smith PetscErrorCode ierr; 190999cafbc1SBarry Smith 191099cafbc1SBarry Smith PetscFunctionBegin; 191199cafbc1SBarry Smith ierr = MatRealPart(a->A);CHKERRQ(ierr); 191299cafbc1SBarry Smith ierr = MatRealPart(a->B);CHKERRQ(ierr); 191399cafbc1SBarry Smith PetscFunctionReturn(0); 191499cafbc1SBarry Smith } 191599cafbc1SBarry Smith 191699cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 191799cafbc1SBarry Smith { 191899cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 191999cafbc1SBarry Smith PetscErrorCode ierr; 192099cafbc1SBarry Smith 192199cafbc1SBarry Smith PetscFunctionBegin; 192299cafbc1SBarry Smith ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 192399cafbc1SBarry Smith ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 192499cafbc1SBarry Smith PetscFunctionReturn(0); 192599cafbc1SBarry Smith } 192699cafbc1SBarry Smith 19277dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 19284aa3045dSJed Brown { 19294aa3045dSJed Brown PetscErrorCode ierr; 19304aa3045dSJed Brown IS iscol_local; 19314aa3045dSJed Brown PetscInt csize; 19324aa3045dSJed Brown 19334aa3045dSJed Brown PetscFunctionBegin; 19344aa3045dSJed Brown ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 1935b79d0421SJed Brown if (call == MAT_REUSE_MATRIX) { 1936b79d0421SJed Brown ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 19379ace16cdSJacob Faibussowitsch PetscAssertFalse(!iscol_local,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 1938b79d0421SJed Brown } else { 19394aa3045dSJed Brown ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 1940b79d0421SJed Brown } 19417dae84e0SHong Zhang ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 1942b79d0421SJed Brown if (call == MAT_INITIAL_MATRIX) { 1943b79d0421SJed Brown ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 19446bf464f9SBarry Smith ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 1945b79d0421SJed Brown } 19464aa3045dSJed Brown PetscFunctionReturn(0); 19474aa3045dSJed Brown } 194817df9f7cSHong Zhang 194982094794SBarry Smith /* 195082094794SBarry Smith Not great since it makes two copies of the submatrix, first an SeqBAIJ 195182094794SBarry Smith in local and then by concatenating the local matrices the end result. 19527dae84e0SHong Zhang Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 19538f46ffcaSHong Zhang This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 195482094794SBarry Smith */ 19557dae84e0SHong Zhang PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 195682094794SBarry Smith { 195782094794SBarry Smith PetscErrorCode ierr; 195882094794SBarry Smith PetscMPIInt rank,size; 195982094794SBarry Smith PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 1960c9ffca76SHong Zhang PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 196129dcf524SDmitry Karpeev Mat M,Mreuse; 196282094794SBarry Smith MatScalar *vwork,*aa; 1963ce94432eSBarry Smith MPI_Comm comm; 196429dcf524SDmitry Karpeev IS isrow_new, iscol_new; 196582094794SBarry Smith Mat_SeqBAIJ *aij; 196682094794SBarry Smith 196782094794SBarry Smith PetscFunctionBegin; 1968ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 1969ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 1970ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 197129dcf524SDmitry Karpeev /* The compression and expansion should be avoided. Doesn't point 197229dcf524SDmitry Karpeev out errors, might change the indices, hence buggey */ 197329dcf524SDmitry Karpeev ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 197429dcf524SDmitry Karpeev ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 197582094794SBarry Smith 197682094794SBarry Smith if (call == MAT_REUSE_MATRIX) { 197782094794SBarry Smith ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 19789ace16cdSJacob Faibussowitsch PetscAssertFalse(!Mreuse,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 19797dae84e0SHong Zhang ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr); 198082094794SBarry Smith } else { 19817dae84e0SHong Zhang ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr); 198282094794SBarry Smith } 198329dcf524SDmitry Karpeev ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 198429dcf524SDmitry Karpeev ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 198582094794SBarry Smith /* 198682094794SBarry Smith m - number of local rows 198782094794SBarry Smith n - number of columns (same on all processors) 198882094794SBarry Smith rstart - first row in new global matrix generated 198982094794SBarry Smith */ 199082094794SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 199182094794SBarry Smith ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 199282094794SBarry Smith m = m/bs; 199382094794SBarry Smith n = n/bs; 199482094794SBarry Smith 199582094794SBarry Smith if (call == MAT_INITIAL_MATRIX) { 199682094794SBarry Smith aij = (Mat_SeqBAIJ*)(Mreuse)->data; 199782094794SBarry Smith ii = aij->i; 199882094794SBarry Smith jj = aij->j; 199982094794SBarry Smith 200082094794SBarry Smith /* 200182094794SBarry Smith Determine the number of non-zeros in the diagonal and off-diagonal 200282094794SBarry Smith portions of the matrix in order to do correct preallocation 200382094794SBarry Smith */ 200482094794SBarry Smith 200582094794SBarry Smith /* first get start and end of "diagonal" columns */ 200682094794SBarry Smith if (csize == PETSC_DECIDE) { 200782094794SBarry Smith ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 200882094794SBarry Smith if (mglobal == n*bs) { /* square matrix */ 200982094794SBarry Smith nlocal = m; 201082094794SBarry Smith } else { 201182094794SBarry Smith nlocal = n/size + ((n % size) > rank); 201282094794SBarry Smith } 201382094794SBarry Smith } else { 201482094794SBarry Smith nlocal = csize/bs; 201582094794SBarry Smith } 201655b25c41SPierre Jolivet ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr); 201782094794SBarry Smith rstart = rend - nlocal; 20189ace16cdSJacob Faibussowitsch PetscAssertFalse(rank == size - 1 && rend != n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT,rend,n); 201982094794SBarry Smith 202082094794SBarry Smith /* next, compute all the lengths */ 2021dcca6d9dSJed Brown ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 202282094794SBarry Smith for (i=0; i<m; i++) { 202382094794SBarry Smith jend = ii[i+1] - ii[i]; 202482094794SBarry Smith olen = 0; 202582094794SBarry Smith dlen = 0; 202682094794SBarry Smith for (j=0; j<jend; j++) { 202782094794SBarry Smith if (*jj < rstart || *jj >= rend) olen++; 202882094794SBarry Smith else dlen++; 202982094794SBarry Smith jj++; 203082094794SBarry Smith } 203182094794SBarry Smith olens[i] = olen; 203282094794SBarry Smith dlens[i] = dlen; 203382094794SBarry Smith } 203482094794SBarry Smith ierr = MatCreate(comm,&M);CHKERRQ(ierr); 203582094794SBarry Smith ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 203682094794SBarry Smith ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 203782094794SBarry Smith ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 20388f46ffcaSHong Zhang ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2039eb9baa12SBarry Smith ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 204082094794SBarry Smith } else { 204182094794SBarry Smith PetscInt ml,nl; 204282094794SBarry Smith 204382094794SBarry Smith M = *newmat; 204482094794SBarry Smith ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 20459ace16cdSJacob Faibussowitsch PetscAssertFalse(ml != m,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 204682094794SBarry Smith ierr = MatZeroEntries(M);CHKERRQ(ierr); 204782094794SBarry Smith /* 204882094794SBarry Smith The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 204982094794SBarry Smith rather than the slower MatSetValues(). 205082094794SBarry Smith */ 205182094794SBarry Smith M->was_assembled = PETSC_TRUE; 205282094794SBarry Smith M->assembled = PETSC_FALSE; 205382094794SBarry Smith } 205482094794SBarry Smith ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 205582094794SBarry Smith ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 205682094794SBarry Smith aij = (Mat_SeqBAIJ*)(Mreuse)->data; 205782094794SBarry Smith ii = aij->i; 205882094794SBarry Smith jj = aij->j; 205982094794SBarry Smith aa = aij->a; 206082094794SBarry Smith for (i=0; i<m; i++) { 206182094794SBarry Smith row = rstart/bs + i; 206282094794SBarry Smith nz = ii[i+1] - ii[i]; 206382094794SBarry Smith cwork = jj; jj += nz; 206475f6568bSJed Brown vwork = aa; aa += nz*bs*bs; 206582094794SBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 206682094794SBarry Smith } 206782094794SBarry Smith 206882094794SBarry Smith ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 206982094794SBarry Smith ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 207082094794SBarry Smith *newmat = M; 207182094794SBarry Smith 207282094794SBarry Smith /* save submatrix used in processor for next request */ 207382094794SBarry Smith if (call == MAT_INITIAL_MATRIX) { 207482094794SBarry Smith ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 207582094794SBarry Smith ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 207682094794SBarry Smith } 207782094794SBarry Smith PetscFunctionReturn(0); 207882094794SBarry Smith } 207982094794SBarry Smith 208082094794SBarry Smith PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 208182094794SBarry Smith { 208282094794SBarry Smith MPI_Comm comm,pcomm; 2083a0a83eb5SRémi Lacroix PetscInt clocal_size,nrows; 208482094794SBarry Smith const PetscInt *rows; 2085dbf0e21dSBarry Smith PetscMPIInt size; 2086a0a83eb5SRémi Lacroix IS crowp,lcolp; 208782094794SBarry Smith PetscErrorCode ierr; 208882094794SBarry Smith 208982094794SBarry Smith PetscFunctionBegin; 209082094794SBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 209182094794SBarry Smith /* make a collective version of 'rowp' */ 209282094794SBarry Smith ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 209382094794SBarry Smith if (pcomm==comm) { 209482094794SBarry Smith crowp = rowp; 209582094794SBarry Smith } else { 209682094794SBarry Smith ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 209782094794SBarry Smith ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 209870b3c8c7SBarry Smith ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 209982094794SBarry Smith ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 210082094794SBarry Smith } 2101a0a83eb5SRémi Lacroix ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2102a0a83eb5SRémi Lacroix /* make a local version of 'colp' */ 210382094794SBarry Smith ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2104ffc4695bSBarry Smith ierr = MPI_Comm_size(pcomm,&size);CHKERRMPI(ierr); 2105dbf0e21dSBarry Smith if (size==1) { 210682094794SBarry Smith lcolp = colp; 210782094794SBarry Smith } else { 210875f6568bSJed Brown ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 210982094794SBarry Smith } 2110dbf0e21dSBarry Smith ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 211175f6568bSJed Brown /* now we just get the submatrix */ 21127afc1a8bSJed Brown ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 21137dae84e0SHong Zhang ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2114a0a83eb5SRémi Lacroix /* clean up */ 2115a0a83eb5SRémi Lacroix if (pcomm!=comm) { 2116a0a83eb5SRémi Lacroix ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2117a0a83eb5SRémi Lacroix } 2118dbf0e21dSBarry Smith if (size>1) { 21196bf464f9SBarry Smith ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 212082094794SBarry Smith } 212182094794SBarry Smith PetscFunctionReturn(0); 212282094794SBarry Smith } 212382094794SBarry Smith 21247087cfbeSBarry Smith PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 21258c7482ecSBarry Smith { 21268c7482ecSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 21278c7482ecSBarry Smith Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 21288c7482ecSBarry Smith 21298c7482ecSBarry Smith PetscFunctionBegin; 213026fbe8dcSKarl Rupp if (nghosts) *nghosts = B->nbs; 213126fbe8dcSKarl Rupp if (ghosts) *ghosts = baij->garray; 21328c7482ecSBarry Smith PetscFunctionReturn(0); 21338c7482ecSBarry Smith } 21348c7482ecSBarry Smith 2135d1adec66SJed Brown PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2136f6d58c54SBarry Smith { 2137f6d58c54SBarry Smith Mat B; 2138f6d58c54SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2139f6d58c54SBarry Smith Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2140f6d58c54SBarry Smith Mat_SeqAIJ *b; 2141f6d58c54SBarry Smith PetscErrorCode ierr; 2142f4259b30SLisandro Dalcin PetscMPIInt size,rank,*recvcounts = NULL,*displs = NULL; 2143f6d58c54SBarry Smith PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2144f6d58c54SBarry Smith PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2145f6d58c54SBarry Smith 2146f6d58c54SBarry Smith PetscFunctionBegin; 2147ffc4695bSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 2148ffc4695bSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr); 2149f6d58c54SBarry Smith 2150f6d58c54SBarry Smith /* ---------------------------------------------------------------- 2151f6d58c54SBarry Smith Tell every processor the number of nonzeros per row 2152f6d58c54SBarry Smith */ 2153854ce69bSBarry Smith ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2154f6d58c54SBarry Smith for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2155f6d58c54SBarry Smith lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2156f6d58c54SBarry Smith } 2157785e854fSJed Brown ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2158f6d58c54SBarry Smith displs = recvcounts + size; 2159f6d58c54SBarry Smith for (i=0; i<size; i++) { 2160f6d58c54SBarry Smith recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2161f6d58c54SBarry Smith displs[i] = A->rmap->range[i]/bs; 2162f6d58c54SBarry Smith } 2163f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE) 2164ffc4695bSBarry Smith ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2165f6d58c54SBarry Smith #else 21663d3eaba7SBarry Smith sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2167ffc4695bSBarry Smith ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2168f6d58c54SBarry Smith #endif 2169f6d58c54SBarry Smith /* --------------------------------------------------------------- 2170f6d58c54SBarry Smith Create the sequential matrix of the same type as the local block diagonal 2171f6d58c54SBarry Smith */ 2172f6d58c54SBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2173f6d58c54SBarry Smith ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2174f6d58c54SBarry Smith ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2175f6d58c54SBarry Smith ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2176f6d58c54SBarry Smith b = (Mat_SeqAIJ*)B->data; 2177f6d58c54SBarry Smith 2178f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2179f6d58c54SBarry Smith Copy my part of matrix column indices over 2180f6d58c54SBarry Smith */ 2181f6d58c54SBarry Smith sendcount = ad->nz + bd->nz; 2182f6d58c54SBarry Smith jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2183f6d58c54SBarry Smith a_jsendbuf = ad->j; 2184f6d58c54SBarry Smith b_jsendbuf = bd->j; 2185f6d58c54SBarry Smith n = A->rmap->rend/bs - A->rmap->rstart/bs; 2186f6d58c54SBarry Smith cnt = 0; 2187f6d58c54SBarry Smith for (i=0; i<n; i++) { 2188f6d58c54SBarry Smith 2189f6d58c54SBarry Smith /* put in lower diagonal portion */ 2190f6d58c54SBarry Smith m = bd->i[i+1] - bd->i[i]; 2191f6d58c54SBarry Smith while (m > 0) { 2192f6d58c54SBarry Smith /* is it above diagonal (in bd (compressed) numbering) */ 2193f6d58c54SBarry Smith if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2194f6d58c54SBarry Smith jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2195f6d58c54SBarry Smith m--; 2196f6d58c54SBarry Smith } 2197f6d58c54SBarry Smith 2198f6d58c54SBarry Smith /* put in diagonal portion */ 2199f6d58c54SBarry Smith for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2200f6d58c54SBarry Smith jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2201f6d58c54SBarry Smith } 2202f6d58c54SBarry Smith 2203f6d58c54SBarry Smith /* put in upper diagonal portion */ 2204f6d58c54SBarry Smith while (m-- > 0) { 2205f6d58c54SBarry Smith jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2206f6d58c54SBarry Smith } 2207f6d58c54SBarry Smith } 22089ace16cdSJacob Faibussowitsch PetscAssertFalse(cnt != sendcount,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT,sendcount,cnt); 2209f6d58c54SBarry Smith 2210f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2211f6d58c54SBarry Smith Gather all column indices to all processors 2212f6d58c54SBarry Smith */ 2213f6d58c54SBarry Smith for (i=0; i<size; i++) { 2214f6d58c54SBarry Smith recvcounts[i] = 0; 2215f6d58c54SBarry Smith for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2216f6d58c54SBarry Smith recvcounts[i] += lens[j]; 2217f6d58c54SBarry Smith } 2218f6d58c54SBarry Smith } 2219f6d58c54SBarry Smith displs[0] = 0; 2220f6d58c54SBarry Smith for (i=1; i<size; i++) { 2221f6d58c54SBarry Smith displs[i] = displs[i-1] + recvcounts[i-1]; 2222f6d58c54SBarry Smith } 2223f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE) 2224ffc4695bSBarry Smith ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2225f6d58c54SBarry Smith #else 2226ffc4695bSBarry Smith ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 2227f6d58c54SBarry Smith #endif 2228f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2229f6d58c54SBarry Smith Assemble the matrix into useable form (note numerical values not yet set) 2230f6d58c54SBarry Smith */ 2231f6d58c54SBarry Smith /* set the b->ilen (length of each row) values */ 2232580bdb30SBarry Smith ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr); 2233f6d58c54SBarry Smith /* set the b->i indices */ 2234f6d58c54SBarry Smith b->i[0] = 0; 2235f6d58c54SBarry Smith for (i=1; i<=A->rmap->N/bs; i++) { 2236f6d58c54SBarry Smith b->i[i] = b->i[i-1] + lens[i-1]; 2237f6d58c54SBarry Smith } 2238f6d58c54SBarry Smith ierr = PetscFree(lens);CHKERRQ(ierr); 2239f6d58c54SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2240f6d58c54SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2241f6d58c54SBarry Smith ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2242f6d58c54SBarry Smith 2243f6d58c54SBarry Smith if (A->symmetric) { 2244f6d58c54SBarry Smith ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2245f6d58c54SBarry Smith } else if (A->hermitian) { 2246f6d58c54SBarry Smith ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2247f6d58c54SBarry Smith } else if (A->structurally_symmetric) { 2248f6d58c54SBarry Smith ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2249f6d58c54SBarry Smith } 2250f6d58c54SBarry Smith *newmat = B; 2251f6d58c54SBarry Smith PetscFunctionReturn(0); 2252f6d58c54SBarry Smith } 2253f6d58c54SBarry Smith 2254b1a666ecSBarry Smith PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2255b1a666ecSBarry Smith { 2256b1a666ecSBarry Smith Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2257b1a666ecSBarry Smith PetscErrorCode ierr; 2258f4259b30SLisandro Dalcin Vec bb1 = NULL; 2259b1a666ecSBarry Smith 2260b1a666ecSBarry Smith PetscFunctionBegin; 2261b1a666ecSBarry Smith if (flag == SOR_APPLY_UPPER) { 2262b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2263b1a666ecSBarry Smith PetscFunctionReturn(0); 2264b1a666ecSBarry Smith } 2265b1a666ecSBarry Smith 22664e980039SJed Brown if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 22674e980039SJed Brown ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 22684e980039SJed Brown } 22694e980039SJed Brown 2270b1a666ecSBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2271b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 2272b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2273b1a666ecSBarry Smith its--; 2274b1a666ecSBarry Smith } 2275b1a666ecSBarry Smith 2276b1a666ecSBarry Smith while (its--) { 2277b1a666ecSBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2278b1a666ecSBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2279b1a666ecSBarry Smith 2280b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 2281b1a666ecSBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2282b1a666ecSBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2283b1a666ecSBarry Smith 2284b1a666ecSBarry Smith /* local sweep */ 2285b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2286b1a666ecSBarry Smith } 2287b1a666ecSBarry Smith } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2288b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 2289b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2290b1a666ecSBarry Smith its--; 2291b1a666ecSBarry Smith } 2292b1a666ecSBarry Smith while (its--) { 2293b1a666ecSBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2294b1a666ecSBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2295b1a666ecSBarry Smith 2296b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 2297b1a666ecSBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2298b1a666ecSBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2299b1a666ecSBarry Smith 2300b1a666ecSBarry Smith /* local sweep */ 2301b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2302b1a666ecSBarry Smith } 2303b1a666ecSBarry Smith } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2304b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 2305b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2306b1a666ecSBarry Smith its--; 2307b1a666ecSBarry Smith } 2308b1a666ecSBarry Smith while (its--) { 2309b1a666ecSBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2310b1a666ecSBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2311b1a666ecSBarry Smith 2312b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 2313b1a666ecSBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2314b1a666ecSBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2315b1a666ecSBarry Smith 2316b1a666ecSBarry Smith /* local sweep */ 2317b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2318b1a666ecSBarry Smith } 2319ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2320b1a666ecSBarry Smith 23216bf464f9SBarry Smith ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2322b1a666ecSBarry Smith PetscFunctionReturn(0); 2323b1a666ecSBarry Smith } 2324b1a666ecSBarry Smith 2325857cbf51SRichard Tran Mills PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A,PetscInt type,PetscReal *reductions) 232647f7623dSRémi Lacroix { 232747f7623dSRémi Lacroix PetscErrorCode ierr; 232847f7623dSRémi Lacroix Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 2329a873a8cdSSam Reynolds PetscInt m,N,i,*garray = aij->garray; 233047f7623dSRémi Lacroix PetscInt ib,jb,bs = A->rmap->bs; 233147f7623dSRémi Lacroix Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 233247f7623dSRémi Lacroix MatScalar *a_val = a_aij->a; 233347f7623dSRémi Lacroix Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 233447f7623dSRémi Lacroix MatScalar *b_val = b_aij->a; 233547f7623dSRémi Lacroix PetscReal *work; 233647f7623dSRémi Lacroix 233747f7623dSRémi Lacroix PetscFunctionBegin; 2338a873a8cdSSam Reynolds ierr = MatGetSize(A,&m,&N);CHKERRQ(ierr); 23391795a4d1SJed Brown ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 2340857cbf51SRichard Tran Mills if (type == NORM_2) { 234147f7623dSRémi Lacroix for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 234247f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 234347f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 234447f7623dSRémi Lacroix work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 234547f7623dSRémi Lacroix a_val++; 234647f7623dSRémi Lacroix } 234747f7623dSRémi Lacroix } 234847f7623dSRémi Lacroix } 234947f7623dSRémi Lacroix for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 235047f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 235147f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 235247f7623dSRémi Lacroix work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 235347f7623dSRémi Lacroix b_val++; 235447f7623dSRémi Lacroix } 235547f7623dSRémi Lacroix } 235647f7623dSRémi Lacroix } 2357857cbf51SRichard Tran Mills } else if (type == NORM_1) { 235847f7623dSRémi Lacroix for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 235947f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 236047f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 236147f7623dSRémi Lacroix work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 236247f7623dSRémi Lacroix a_val++; 236347f7623dSRémi Lacroix } 236447f7623dSRémi Lacroix } 236547f7623dSRémi Lacroix } 236647f7623dSRémi Lacroix for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 236747f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 236847f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 236947f7623dSRémi Lacroix work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 237047f7623dSRémi Lacroix b_val++; 237147f7623dSRémi Lacroix } 237247f7623dSRémi Lacroix } 237347f7623dSRémi Lacroix } 2374857cbf51SRichard Tran Mills } else if (type == NORM_INFINITY) { 237547f7623dSRémi Lacroix for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 237647f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 237747f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 237847f7623dSRémi Lacroix int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 237947f7623dSRémi Lacroix work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 238047f7623dSRémi Lacroix a_val++; 238147f7623dSRémi Lacroix } 238247f7623dSRémi Lacroix } 238347f7623dSRémi Lacroix } 238447f7623dSRémi Lacroix for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 238547f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 238647f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 238747f7623dSRémi Lacroix int col = garray[b_aij->j[i]] * bs + jb; 238847f7623dSRémi Lacroix work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 238947f7623dSRémi Lacroix b_val++; 239047f7623dSRémi Lacroix } 239147f7623dSRémi Lacroix } 239247f7623dSRémi Lacroix } 2393857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2394a873a8cdSSam Reynolds for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2395a873a8cdSSam Reynolds for (jb=0; jb<bs; jb++) { 2396a873a8cdSSam Reynolds for (ib=0; ib<bs; ib++) { 2397857cbf51SRichard Tran Mills work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2398a873a8cdSSam Reynolds a_val++; 2399a873a8cdSSam Reynolds } 2400a873a8cdSSam Reynolds } 2401a873a8cdSSam Reynolds } 2402a873a8cdSSam Reynolds for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2403a873a8cdSSam Reynolds for (jb=0; jb<bs; jb++) { 2404a873a8cdSSam Reynolds for (ib=0; ib<bs; ib++) { 2405857cbf51SRichard Tran Mills work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2406a873a8cdSSam Reynolds b_val++; 2407a873a8cdSSam Reynolds } 2408a873a8cdSSam Reynolds } 2409a873a8cdSSam Reynolds } 2410857cbf51SRichard Tran Mills } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2411857cbf51SRichard Tran Mills for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 2412857cbf51SRichard Tran Mills for (jb=0; jb<bs; jb++) { 2413857cbf51SRichard Tran Mills for (ib=0; ib<bs; ib++) { 2414857cbf51SRichard Tran Mills work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2415857cbf51SRichard Tran Mills a_val++; 2416857cbf51SRichard Tran Mills } 2417857cbf51SRichard Tran Mills } 2418857cbf51SRichard Tran Mills } 2419857cbf51SRichard Tran Mills for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 2420857cbf51SRichard Tran Mills for (jb=0; jb<bs; jb++) { 2421857cbf51SRichard Tran Mills for (ib=0; ib<bs; ib++) { 2422857cbf51SRichard Tran Mills work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2423857cbf51SRichard Tran Mills b_val++; 2424857cbf51SRichard Tran Mills } 2425857cbf51SRichard Tran Mills } 2426857cbf51SRichard Tran Mills } 2427857cbf51SRichard Tran Mills } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown reduction type"); 2428857cbf51SRichard Tran Mills if (type == NORM_INFINITY) { 2429a873a8cdSSam Reynolds ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 243047f7623dSRémi Lacroix } else { 2431a873a8cdSSam Reynolds ierr = MPIU_Allreduce(work,reductions,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr); 243247f7623dSRémi Lacroix } 243347f7623dSRémi Lacroix ierr = PetscFree(work);CHKERRQ(ierr); 2434857cbf51SRichard Tran Mills if (type == NORM_2) { 2435a873a8cdSSam Reynolds for (i=0; i<N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2436857cbf51SRichard Tran Mills } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2437a873a8cdSSam Reynolds for (i=0; i<N; i++) reductions[i] /= m; 243847f7623dSRémi Lacroix } 243947f7623dSRémi Lacroix PetscFunctionReturn(0); 244047f7623dSRémi Lacroix } 244147f7623dSRémi Lacroix 2442713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2443bbead8a2SBarry Smith { 2444bbead8a2SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2445bbead8a2SBarry Smith PetscErrorCode ierr; 2446bbead8a2SBarry Smith 2447bbead8a2SBarry Smith PetscFunctionBegin; 2448bbead8a2SBarry Smith ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 24497b6c816cSBarry Smith A->factorerrortype = a->A->factorerrortype; 24507b6c816cSBarry Smith A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 24517b6c816cSBarry Smith A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2452bbead8a2SBarry Smith PetscFunctionReturn(0); 2453bbead8a2SBarry Smith } 2454bbead8a2SBarry Smith 24557d68702bSBarry Smith PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a) 24567d68702bSBarry Smith { 24577d68702bSBarry Smith PetscErrorCode ierr; 24587d68702bSBarry Smith Mat_MPIBAIJ *maij = (Mat_MPIBAIJ*)Y->data; 24596f33a894SBarry Smith Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)maij->A->data; 24607d68702bSBarry Smith 24617d68702bSBarry Smith PetscFunctionBegin; 24626f33a894SBarry Smith if (!Y->preallocated) { 24637d68702bSBarry Smith ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr); 24646f33a894SBarry Smith } else if (!aij->nz) { 2465b83222d8SBarry Smith PetscInt nonew = aij->nonew; 24666f33a894SBarry Smith ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2467b83222d8SBarry Smith aij->nonew = nonew; 24687d68702bSBarry Smith } 24697d68702bSBarry Smith ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 24707d68702bSBarry Smith PetscFunctionReturn(0); 24717d68702bSBarry Smith } 24728c7482ecSBarry Smith 24733b49f96aSBarry Smith PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool *missing,PetscInt *d) 24743b49f96aSBarry Smith { 24753b49f96aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 24763b49f96aSBarry Smith PetscErrorCode ierr; 24773b49f96aSBarry Smith 24783b49f96aSBarry Smith PetscFunctionBegin; 24799ace16cdSJacob Faibussowitsch PetscAssertFalse(A->rmap->n != A->cmap->n,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 24803b49f96aSBarry Smith ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 24813b49f96aSBarry Smith if (d) { 24823b49f96aSBarry Smith PetscInt rstart; 24833b49f96aSBarry Smith ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 24843b49f96aSBarry Smith *d += rstart/A->rmap->bs; 24853b49f96aSBarry Smith 24863b49f96aSBarry Smith } 24873b49f96aSBarry Smith PetscFunctionReturn(0); 24883b49f96aSBarry Smith } 24893b49f96aSBarry Smith 2490a5b7ff6bSBarry Smith PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 2491a5b7ff6bSBarry Smith { 2492a5b7ff6bSBarry Smith PetscFunctionBegin; 2493a5b7ff6bSBarry Smith *a = ((Mat_MPIBAIJ*)A->data)->A; 2494a5b7ff6bSBarry Smith PetscFunctionReturn(0); 2495a5b7ff6bSBarry Smith } 2496a5b7ff6bSBarry Smith 249779bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 24983964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2499cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 2500cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 2501cc2dc46cSBarry Smith MatMult_MPIBAIJ, 250297304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ, 25037c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 25047c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 2505f4259b30SLisandro Dalcin NULL, 2506f4259b30SLisandro Dalcin NULL, 2507f4259b30SLisandro Dalcin NULL, 2508f4259b30SLisandro Dalcin /*10*/ NULL, 2509f4259b30SLisandro Dalcin NULL, 2510f4259b30SLisandro Dalcin NULL, 2511b1a666ecSBarry Smith MatSOR_MPIBAIJ, 2512cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 251397304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ, 25147fc3c18eSBarry Smith MatEqual_MPIBAIJ, 2515cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 2516cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 2517cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 251897304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ, 2519cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 2520cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 2521cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 2522d519adbfSMatthew Knepley /*24*/ MatZeroRows_MPIBAIJ, 2523f4259b30SLisandro Dalcin NULL, 2524f4259b30SLisandro Dalcin NULL, 2525f4259b30SLisandro Dalcin NULL, 2526f4259b30SLisandro Dalcin NULL, 25274994cf47SJed Brown /*29*/ MatSetUp_MPIBAIJ, 2528f4259b30SLisandro Dalcin NULL, 2529f4259b30SLisandro Dalcin NULL, 2530a5b7ff6bSBarry Smith MatGetDiagonalBlock_MPIBAIJ, 2531f4259b30SLisandro Dalcin NULL, 2532d519adbfSMatthew Knepley /*34*/ MatDuplicate_MPIBAIJ, 2533f4259b30SLisandro Dalcin NULL, 2534f4259b30SLisandro Dalcin NULL, 2535f4259b30SLisandro Dalcin NULL, 2536f4259b30SLisandro Dalcin NULL, 2537d519adbfSMatthew Knepley /*39*/ MatAXPY_MPIBAIJ, 25387dae84e0SHong Zhang MatCreateSubMatrices_MPIBAIJ, 2539cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 2540cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 25413c896bc6SHong Zhang MatCopy_MPIBAIJ, 2542f4259b30SLisandro Dalcin /*44*/ NULL, 2543cc2dc46cSBarry Smith MatScale_MPIBAIJ, 25447d68702bSBarry Smith MatShift_MPIBAIJ, 2545f4259b30SLisandro Dalcin NULL, 25466f0a72daSMatthew G. Knepley MatZeroRowsColumns_MPIBAIJ, 2547f4259b30SLisandro Dalcin /*49*/ NULL, 2548f4259b30SLisandro Dalcin NULL, 2549f4259b30SLisandro Dalcin NULL, 2550f4259b30SLisandro Dalcin NULL, 2551f4259b30SLisandro Dalcin NULL, 255293dfae19SHong Zhang /*54*/ MatFDColoringCreate_MPIXAIJ, 2553f4259b30SLisandro Dalcin NULL, 2554cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 255582094794SBarry Smith MatPermute_MPIBAIJ, 2556cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 25577dae84e0SHong Zhang /*59*/ MatCreateSubMatrix_MPIBAIJ, 2558f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 2559f14a1c24SBarry Smith MatView_MPIBAIJ, 2560f4259b30SLisandro Dalcin NULL, 2561f4259b30SLisandro Dalcin NULL, 2562f4259b30SLisandro Dalcin /*64*/ NULL, 2563f4259b30SLisandro Dalcin NULL, 2564f4259b30SLisandro Dalcin NULL, 2565f4259b30SLisandro Dalcin NULL, 2566f4259b30SLisandro Dalcin NULL, 2567d519adbfSMatthew Knepley /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2568f4259b30SLisandro Dalcin NULL, 2569f4259b30SLisandro Dalcin NULL, 2570f4259b30SLisandro Dalcin NULL, 2571f4259b30SLisandro Dalcin NULL, 2572f4259b30SLisandro Dalcin /*74*/ NULL, 2573f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 2574f4259b30SLisandro Dalcin NULL, 2575f4259b30SLisandro Dalcin NULL, 2576f4259b30SLisandro Dalcin NULL, 2577f4259b30SLisandro Dalcin /*79*/ NULL, 2578f4259b30SLisandro Dalcin NULL, 2579f4259b30SLisandro Dalcin NULL, 2580f4259b30SLisandro Dalcin NULL, 25815bba2384SShri Abhyankar MatLoad_MPIBAIJ, 2582f4259b30SLisandro Dalcin /*84*/ NULL, 2583f4259b30SLisandro Dalcin NULL, 2584f4259b30SLisandro Dalcin NULL, 2585f4259b30SLisandro Dalcin NULL, 2586f4259b30SLisandro Dalcin NULL, 2587f4259b30SLisandro Dalcin /*89*/ NULL, 2588f4259b30SLisandro Dalcin NULL, 2589f4259b30SLisandro Dalcin NULL, 2590f4259b30SLisandro Dalcin NULL, 2591f4259b30SLisandro Dalcin NULL, 2592f4259b30SLisandro Dalcin /*94*/ NULL, 2593f4259b30SLisandro Dalcin NULL, 2594f4259b30SLisandro Dalcin NULL, 2595f4259b30SLisandro Dalcin NULL, 2596f4259b30SLisandro Dalcin NULL, 2597f4259b30SLisandro Dalcin /*99*/ NULL, 2598f4259b30SLisandro Dalcin NULL, 2599f4259b30SLisandro Dalcin NULL, 26002726fb6dSPierre Jolivet MatConjugate_MPIBAIJ, 2601f4259b30SLisandro Dalcin NULL, 2602f4259b30SLisandro Dalcin /*104*/NULL, 260399cafbc1SBarry Smith MatRealPart_MPIBAIJ, 26048c7482ecSBarry Smith MatImaginaryPart_MPIBAIJ, 2605f4259b30SLisandro Dalcin NULL, 2606f4259b30SLisandro Dalcin NULL, 2607f4259b30SLisandro Dalcin /*109*/NULL, 2608f4259b30SLisandro Dalcin NULL, 2609f4259b30SLisandro Dalcin NULL, 2610f4259b30SLisandro Dalcin NULL, 26113b49f96aSBarry Smith MatMissingDiagonal_MPIBAIJ, 2612d1adec66SJed Brown /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 2613f4259b30SLisandro Dalcin NULL, 26144683f7a4SShri Abhyankar MatGetGhosts_MPIBAIJ, 2615f4259b30SLisandro Dalcin NULL, 2616f4259b30SLisandro Dalcin NULL, 2617f4259b30SLisandro Dalcin /*119*/NULL, 2618f4259b30SLisandro Dalcin NULL, 2619f4259b30SLisandro Dalcin NULL, 2620f4259b30SLisandro Dalcin NULL, 2621e8271787SHong Zhang MatGetMultiProcBlock_MPIBAIJ, 2622f4259b30SLisandro Dalcin /*124*/NULL, 2623a873a8cdSSam Reynolds MatGetColumnReductions_MPIBAIJ, 26243964eb88SJed Brown MatInvertBlockDiagonal_MPIBAIJ, 2625f4259b30SLisandro Dalcin NULL, 2626f4259b30SLisandro Dalcin NULL, 2627f4259b30SLisandro Dalcin /*129*/ NULL, 2628f4259b30SLisandro Dalcin NULL, 2629f4259b30SLisandro Dalcin NULL, 2630f4259b30SLisandro Dalcin NULL, 2631f4259b30SLisandro Dalcin NULL, 2632f4259b30SLisandro Dalcin /*134*/ NULL, 2633f4259b30SLisandro Dalcin NULL, 2634f4259b30SLisandro Dalcin NULL, 2635f4259b30SLisandro Dalcin NULL, 2636f4259b30SLisandro Dalcin NULL, 263746533700Sstefano_zampini /*139*/ MatSetBlockSizes_Default, 2638f4259b30SLisandro Dalcin NULL, 2639f4259b30SLisandro Dalcin NULL, 2640bdf6f3fcSHong Zhang MatFDColoringSetUp_MPIXAIJ, 2641f4259b30SLisandro Dalcin NULL, 2642bdf6f3fcSHong Zhang /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 26438c7482ecSBarry Smith }; 264479bdfe76SSatish Balay 2645cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 2646c9225affSStefano Zampini PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 2647d94109b8SHong Zhang 2648cf12db73SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2649aac34f13SBarry Smith { 2650b8d659d7SLisandro Dalcin PetscInt m,rstart,cstart,cend; 2651f4259b30SLisandro Dalcin PetscInt i,j,dlen,olen,nz,nz_max=0,*d_nnz=NULL,*o_nnz=NULL; 2652f4259b30SLisandro Dalcin const PetscInt *JJ =NULL; 2653f4259b30SLisandro Dalcin PetscScalar *values=NULL; 2654d47bf9aaSJed Brown PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2655aac34f13SBarry Smith PetscErrorCode ierr; 26563bd0feecSPierre Jolivet PetscBool nooffprocentries; 2657aac34f13SBarry Smith 2658aac34f13SBarry Smith PetscFunctionBegin; 265926283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 266026283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 266126283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 266226283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2663e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2664d0f46423SBarry Smith m = B->rmap->n/bs; 2665d0f46423SBarry Smith rstart = B->rmap->rstart/bs; 2666d0f46423SBarry Smith cstart = B->cmap->rstart/bs; 2667d0f46423SBarry Smith cend = B->cmap->rend/bs; 2668b8d659d7SLisandro Dalcin 26699ace16cdSJacob Faibussowitsch PetscAssertFalse(ii[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %" PetscInt_FMT,ii[0]); 2670dcca6d9dSJed Brown ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2671aac34f13SBarry Smith for (i=0; i<m; i++) { 2672cf12db73SBarry Smith nz = ii[i+1] - ii[i]; 26739ace16cdSJacob Faibussowitsch PetscAssertFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT,i,nz); 2674b8d659d7SLisandro Dalcin nz_max = PetscMax(nz_max,nz); 267537cd3c0dSBarry Smith dlen = 0; 267637cd3c0dSBarry Smith olen = 0; 2677cf12db73SBarry Smith JJ = jj + ii[i]; 2678b8d659d7SLisandro Dalcin for (j=0; j<nz; j++) { 267937cd3c0dSBarry Smith if (*JJ < cstart || *JJ >= cend) olen++; 268037cd3c0dSBarry Smith else dlen++; 2681aac34f13SBarry Smith JJ++; 2682aac34f13SBarry Smith } 268337cd3c0dSBarry Smith d_nnz[i] = dlen; 268437cd3c0dSBarry Smith o_nnz[i] = olen; 2685aac34f13SBarry Smith } 2686aac34f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2687fca92195SBarry Smith ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2688aac34f13SBarry Smith 2689b8d659d7SLisandro Dalcin values = (PetscScalar*)V; 2690b8d659d7SLisandro Dalcin if (!values) { 269137cd3c0dSBarry Smith ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2692b8d659d7SLisandro Dalcin } 2693b8d659d7SLisandro Dalcin for (i=0; i<m; i++) { 2694b8d659d7SLisandro Dalcin PetscInt row = i + rstart; 2695cf12db73SBarry Smith PetscInt ncols = ii[i+1] - ii[i]; 2696cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 2697bb80cfbbSStefano Zampini if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2698cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2699b8d659d7SLisandro Dalcin ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 27003adadaf3SJed Brown } else { /* block ordering does not match so we can only insert one block at a time. */ 27013adadaf3SJed Brown PetscInt j; 27023adadaf3SJed Brown for (j=0; j<ncols; j++) { 27033adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 27043adadaf3SJed Brown ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 27053adadaf3SJed Brown } 27063adadaf3SJed Brown } 2707aac34f13SBarry Smith } 2708aac34f13SBarry Smith 2709b8d659d7SLisandro Dalcin if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 27103bd0feecSPierre Jolivet nooffprocentries = B->nooffprocentries; 27113bd0feecSPierre Jolivet B->nooffprocentries = PETSC_TRUE; 2712aac34f13SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2713aac34f13SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 27143bd0feecSPierre Jolivet B->nooffprocentries = nooffprocentries; 27153bd0feecSPierre Jolivet 27167827cd58SJed Brown ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2717aac34f13SBarry Smith PetscFunctionReturn(0); 2718aac34f13SBarry Smith } 2719aac34f13SBarry Smith 2720aac34f13SBarry Smith /*@C 2721664954b6SBarry Smith MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values 2722aac34f13SBarry Smith 2723d083f849SBarry Smith Collective 2724aac34f13SBarry Smith 2725aac34f13SBarry Smith Input Parameters: 27261c4f3114SJed Brown + B - the matrix 2727dfb205c3SBarry Smith . bs - the block size 2728aac34f13SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2729aac34f13SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2730aac34f13SBarry Smith - v - optional values in the matrix 2731aac34f13SBarry Smith 2732664954b6SBarry Smith Level: advanced 2733aac34f13SBarry Smith 273495452b02SPatrick Sanan Notes: 273595452b02SPatrick Sanan The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 27363adadaf3SJed Brown may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 27373adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 27383adadaf3SJed Brown MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 27393adadaf3SJed Brown block column and the second index is over columns within a block. 27403adadaf3SJed Brown 2741664954b6SBarry Smith Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2742664954b6SBarry Smith 27433adadaf3SJed Brown .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2744aac34f13SBarry Smith @*/ 27457087cfbeSBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2746aac34f13SBarry Smith { 27474ac538c5SBarry Smith PetscErrorCode ierr; 2748aac34f13SBarry Smith 2749aac34f13SBarry Smith PetscFunctionBegin; 27506ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 27516ba663aaSJed Brown PetscValidType(B,1); 27526ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 27534ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2754aac34f13SBarry Smith PetscFunctionReturn(0); 2755aac34f13SBarry Smith } 2756aac34f13SBarry Smith 2757b2573a8aSBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2758a23d5eceSKris Buschelman { 2759a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 2760dfbe8321SBarry Smith PetscErrorCode ierr; 2761535b19f3SBarry Smith PetscInt i; 27625d2a9ed1SStefano Zampini PetscMPIInt size; 2763a23d5eceSKris Buschelman 2764a23d5eceSKris Buschelman PetscFunctionBegin; 276533d57670SJed Brown ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 276626283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 276726283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2768e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2769899cda47SBarry Smith 2770a23d5eceSKris Buschelman if (d_nnz) { 2771d0f46423SBarry Smith for (i=0; i<B->rmap->n/bs; i++) { 27729ace16cdSJacob Faibussowitsch PetscAssertFalse(d_nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,d_nnz[i]); 2773a23d5eceSKris Buschelman } 2774a23d5eceSKris Buschelman } 2775a23d5eceSKris Buschelman if (o_nnz) { 2776d0f46423SBarry Smith for (i=0; i<B->rmap->n/bs; i++) { 27779ace16cdSJacob Faibussowitsch PetscAssertFalse(o_nnz[i] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT,i,o_nnz[i]); 2778a23d5eceSKris Buschelman } 2779a23d5eceSKris Buschelman } 2780a23d5eceSKris Buschelman 2781a23d5eceSKris Buschelman b = (Mat_MPIBAIJ*)B->data; 2782a23d5eceSKris Buschelman b->bs2 = bs*bs; 2783d0f46423SBarry Smith b->mbs = B->rmap->n/bs; 2784d0f46423SBarry Smith b->nbs = B->cmap->n/bs; 2785d0f46423SBarry Smith b->Mbs = B->rmap->N/bs; 2786d0f46423SBarry Smith b->Nbs = B->cmap->N/bs; 2787a23d5eceSKris Buschelman 2788a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2789d0f46423SBarry Smith b->rangebs[i] = B->rmap->range[i]/bs; 2790a23d5eceSKris Buschelman } 2791d0f46423SBarry Smith b->rstartbs = B->rmap->rstart/bs; 2792d0f46423SBarry Smith b->rendbs = B->rmap->rend/bs; 2793d0f46423SBarry Smith b->cstartbs = B->cmap->rstart/bs; 2794d0f46423SBarry Smith b->cendbs = B->cmap->rend/bs; 2795a23d5eceSKris Buschelman 2796cb7b82ddSBarry Smith #if defined(PETSC_USE_CTABLE) 2797cb7b82ddSBarry Smith ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2798cb7b82ddSBarry Smith #else 2799cb7b82ddSBarry Smith ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2800cb7b82ddSBarry Smith #endif 2801cb7b82ddSBarry Smith ierr = PetscFree(b->garray);CHKERRQ(ierr); 2802cb7b82ddSBarry Smith ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2803cb7b82ddSBarry Smith ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2804cb7b82ddSBarry Smith 2805cb7b82ddSBarry Smith /* Because the B will have been resized we simply destroy it and create a new one each time */ 2806ffc4695bSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr); 2807cb7b82ddSBarry Smith ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2808cb7b82ddSBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 28095d2a9ed1SStefano Zampini ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2810cb7b82ddSBarry Smith ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 2811cb7b82ddSBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2812cb7b82ddSBarry Smith 2813526dfc15SBarry Smith if (!B->preallocated) { 2814f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2815d0f46423SBarry Smith ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 28169c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 28173bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2818ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2819526dfc15SBarry Smith } 2820a23d5eceSKris Buschelman 2821526dfc15SBarry Smith ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2822526dfc15SBarry Smith ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2823526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 2824cb7b82ddSBarry Smith B->was_assembled = PETSC_FALSE; 2825cb7b82ddSBarry Smith B->assembled = PETSC_FALSE; 2826a23d5eceSKris Buschelman PetscFunctionReturn(0); 2827a23d5eceSKris Buschelman } 2828a23d5eceSKris Buschelman 28297087cfbeSBarry Smith extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 28307087cfbeSBarry Smith extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 28315bf65638SKris Buschelman 2832cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 283382094794SBarry Smith { 283482094794SBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 283582094794SBarry Smith PetscErrorCode ierr; 283682094794SBarry Smith Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 283782094794SBarry Smith PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 283882094794SBarry Smith const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 283982094794SBarry Smith 284082094794SBarry Smith PetscFunctionBegin; 2841854ce69bSBarry Smith ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 284282094794SBarry Smith ii[0] = 0; 284382094794SBarry Smith for (i=0; i<M; i++) { 28449ace16cdSJacob Faibussowitsch PetscAssertFalse((id[i+1] - id[i]) < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,id[i],id[i+1]); 28459ace16cdSJacob Faibussowitsch PetscAssertFalse((io[i+1] - io[i]) < 0,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT,i,io[i],io[i+1]); 284682094794SBarry Smith ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 28475ee9ba1cSJed Brown /* remove one from count of matrix has diagonal */ 28485ee9ba1cSJed Brown for (j=id[i]; j<id[i+1]; j++) { 28495ee9ba1cSJed Brown if (jd[j] == i) {ii[i+1]--;break;} 28505ee9ba1cSJed Brown } 285182094794SBarry Smith } 2852785e854fSJed Brown ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 285382094794SBarry Smith cnt = 0; 285482094794SBarry Smith for (i=0; i<M; i++) { 285582094794SBarry Smith for (j=io[i]; j<io[i+1]; j++) { 285682094794SBarry Smith if (garray[jo[j]] > rstart) break; 285782094794SBarry Smith jj[cnt++] = garray[jo[j]]; 285882094794SBarry Smith } 285982094794SBarry Smith for (k=id[i]; k<id[i+1]; k++) { 28605ee9ba1cSJed Brown if (jd[k] != i) { 286182094794SBarry Smith jj[cnt++] = rstart + jd[k]; 286282094794SBarry Smith } 28635ee9ba1cSJed Brown } 286482094794SBarry Smith for (; j<io[i+1]; j++) { 286582094794SBarry Smith jj[cnt++] = garray[jo[j]]; 286682094794SBarry Smith } 286782094794SBarry Smith } 2868ce94432eSBarry Smith ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 286982094794SBarry Smith PetscFunctionReturn(0); 287082094794SBarry Smith } 287182094794SBarry Smith 2872c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 287362471d69SBarry Smith 2874cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2875b2573a8aSBarry Smith 2876cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 287762471d69SBarry Smith { 287862471d69SBarry Smith PetscErrorCode ierr; 287962471d69SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 288062471d69SBarry Smith Mat B; 288185a69837SSatish Balay Mat_MPIAIJ *b; 288262471d69SBarry Smith 288362471d69SBarry Smith PetscFunctionBegin; 28849ace16cdSJacob Faibussowitsch PetscAssertFalse(!A->assembled,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 288562471d69SBarry Smith 28860f6d62edSLisandro Dalcin if (reuse == MAT_REUSE_MATRIX) { 28870f6d62edSLisandro Dalcin B = *newmat; 28880f6d62edSLisandro Dalcin } else { 2889ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 28906d0a4a0eSHong Zhang ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2891f090d951SRémi Lacroix ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2892f090d951SRémi Lacroix ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 28930298fd71SBarry Smith ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 28940298fd71SBarry Smith ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 28950f6d62edSLisandro Dalcin } 289662471d69SBarry Smith b = (Mat_MPIAIJ*) B->data; 289762471d69SBarry Smith 28980f6d62edSLisandro Dalcin if (reuse == MAT_REUSE_MATRIX) { 28990f6d62edSLisandro Dalcin ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr); 29000f6d62edSLisandro Dalcin ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr); 29010f6d62edSLisandro Dalcin } else { 29026bf464f9SBarry Smith ierr = MatDestroy(&b->A);CHKERRQ(ierr); 29036bf464f9SBarry Smith ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2904ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 290562471d69SBarry Smith ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 290662471d69SBarry Smith ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 29076a719282SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 29086a719282SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 29090f6d62edSLisandro Dalcin } 29100f6d62edSLisandro Dalcin ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 29110f6d62edSLisandro Dalcin ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 29120f6d62edSLisandro Dalcin 2913511c6705SHong Zhang if (reuse == MAT_INPLACE_MATRIX) { 291428be2f97SBarry Smith ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr); 291562471d69SBarry Smith } else { 291662471d69SBarry Smith *newmat = B; 291762471d69SBarry Smith } 291862471d69SBarry Smith PetscFunctionReturn(0); 291962471d69SBarry Smith } 292062471d69SBarry Smith 29210bad9183SKris Buschelman /*MC 2922fafad747SKris Buschelman MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 29230bad9183SKris Buschelman 29240bad9183SKris Buschelman Options Database Keys: 29258c07d4e3SBarry Smith + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 29268c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix 29276679dcc1SBarry Smith . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 29288c07d4e3SBarry Smith - -mat_use_hash_table <fact> 29290bad9183SKris Buschelman 29300bad9183SKris Buschelman Level: beginner 29310cd7f59aSBarry Smith 29320cd7f59aSBarry Smith Notes: 29330cd7f59aSBarry Smith MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 29340cd7f59aSBarry Smith space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 29350bad9183SKris Buschelman 2936fd292e60Sprj- .seealso: MatCreateBAIJ 29370bad9183SKris Buschelman M*/ 29380bad9183SKris Buschelman 2939cc2e6a90SBarry Smith PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2940c0cdd4a1SDahai Guo 29418cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2942273d9f13SBarry Smith { 2943273d9f13SBarry Smith Mat_MPIBAIJ *b; 2944dfbe8321SBarry Smith PetscErrorCode ierr; 294594ae4db5SBarry Smith PetscBool flg = PETSC_FALSE; 2946273d9f13SBarry Smith 2947273d9f13SBarry Smith PetscFunctionBegin; 2948b00a9115SJed Brown ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 294982502324SSatish Balay B->data = (void*)b; 295082502324SSatish Balay 2951273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 2952273d9f13SBarry Smith B->assembled = PETSC_FALSE; 2953273d9f13SBarry Smith 2954273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 295555b25c41SPierre Jolivet ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr); 295655b25c41SPierre Jolivet ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRMPI(ierr); 2957273d9f13SBarry Smith 2958273d9f13SBarry Smith /* build local table of row and column ownerships */ 2959854ce69bSBarry Smith ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 2960273d9f13SBarry Smith 2961273d9f13SBarry Smith /* build cache for off array entries formed */ 2962ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 296326fbe8dcSKarl Rupp 2964273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 29650298fd71SBarry Smith b->colmap = NULL; 29660298fd71SBarry Smith b->garray = NULL; 2967273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 2968273d9f13SBarry Smith 2969273d9f13SBarry Smith /* stuff used in block assembly */ 2970f4259b30SLisandro Dalcin b->barray = NULL; 2971273d9f13SBarry Smith 2972273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 2973f4259b30SLisandro Dalcin b->lvec = NULL; 2974f4259b30SLisandro Dalcin b->Mvctx = NULL; 2975273d9f13SBarry Smith 2976273d9f13SBarry Smith /* stuff for MatGetRow() */ 2977f4259b30SLisandro Dalcin b->rowindices = NULL; 2978f4259b30SLisandro Dalcin b->rowvalues = NULL; 2979273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 2980273d9f13SBarry Smith 2981273d9f13SBarry Smith /* hash table stuff */ 2982f4259b30SLisandro Dalcin b->ht = NULL; 2983f4259b30SLisandro Dalcin b->hd = NULL; 2984273d9f13SBarry Smith b->ht_size = 0; 2985273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 2986273d9f13SBarry Smith b->ht_fact = 0; 2987273d9f13SBarry Smith b->ht_total_ct = 0; 2988273d9f13SBarry Smith b->ht_insert_ct = 0; 2989273d9f13SBarry Smith 29907dae84e0SHong Zhang /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 29917a868f3eSHong Zhang b->ijonly = PETSC_FALSE; 29927a868f3eSHong Zhang 2993bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 2994bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 2995bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 29967ea3e4caSstefano_zampini #if defined(PETSC_HAVE_HYPRE) 29977ea3e4caSstefano_zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 29987ea3e4caSstefano_zampini #endif 2999bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3000bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3001bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3002bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3003bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3004bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3005c9225affSStefano Zampini ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 300617667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 300794ae4db5SBarry Smith 300894ae4db5SBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 3009abf3b562SBarry Smith ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr); 301094ae4db5SBarry Smith if (flg) { 301194ae4db5SBarry Smith PetscReal fact = 1.39; 301294ae4db5SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 301394ae4db5SBarry Smith ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 301494ae4db5SBarry Smith if (fact <= 1.0) fact = 1.39; 301594ae4db5SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 30167d3de750SJacob Faibussowitsch ierr = PetscInfo(B,"Hash table Factor used %5.2g\n",(double)fact);CHKERRQ(ierr); 301794ae4db5SBarry Smith } 301894ae4db5SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 3019273d9f13SBarry Smith PetscFunctionReturn(0); 3020273d9f13SBarry Smith } 3021273d9f13SBarry Smith 3022209238afSKris Buschelman /*MC 3023002d173eSKris Buschelman MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3024209238afSKris Buschelman 3025209238afSKris Buschelman This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3026209238afSKris Buschelman and MATMPIBAIJ otherwise. 3027209238afSKris Buschelman 3028209238afSKris Buschelman Options Database Keys: 3029209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3030209238afSKris Buschelman 3031209238afSKris Buschelman Level: beginner 3032209238afSKris Buschelman 303369b1f4b7SBarry Smith .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3034209238afSKris Buschelman M*/ 3035209238afSKris Buschelman 3036273d9f13SBarry Smith /*@C 3037aac34f13SBarry Smith MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3038273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 3039273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 3040273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3041273d9f13SBarry Smith performance can be increased by more than a factor of 50. 3042273d9f13SBarry Smith 3043273d9f13SBarry Smith Collective on Mat 3044273d9f13SBarry Smith 3045273d9f13SBarry Smith Input Parameters: 30461c4f3114SJed Brown + B - the matrix 3047bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3048bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3049273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 3050273d9f13SBarry Smith submatrix (same for all local rows) 3051273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 3052273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 30530298fd71SBarry Smith row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 305495742e49SBarry Smith set it even if it is zero. 3055273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3056273d9f13SBarry Smith submatrix (same for all local rows). 3057273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 3058273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 30590298fd71SBarry Smith each block row) or NULL. 3060273d9f13SBarry Smith 306149a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 3062273d9f13SBarry Smith 3063273d9f13SBarry Smith Options Database Keys: 30648c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 30658c07d4e3SBarry Smith - -mat_use_hash_table <fact> 3066273d9f13SBarry Smith 3067273d9f13SBarry Smith Notes: 3068273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3069273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 3070273d9f13SBarry Smith 3071273d9f13SBarry Smith Storage Information: 3072273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 3073273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 3074273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 3075273d9f13SBarry Smith local matrix (a rectangular submatrix). 3076273d9f13SBarry Smith 3077273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 3078273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 30790298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3080273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 3081273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3082273d9f13SBarry Smith 3083273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3084273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 3085273d9f13SBarry Smith 3086273d9f13SBarry Smith .vb 3087273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 3088a4b1a0f6SJed Brown -------------------------- 3089273d9f13SBarry Smith row 3 |o o o d d d o o o o o o 3090273d9f13SBarry Smith row 4 |o o o d d d o o o o o o 3091273d9f13SBarry Smith row 5 |o o o d d d o o o o o o 3092a4b1a0f6SJed Brown -------------------------- 3093273d9f13SBarry Smith .ve 3094273d9f13SBarry Smith 3095273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 3096273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 3097273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 3098273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 3099273d9f13SBarry Smith 3100273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3101273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 3102273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 3103273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3104273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 3105273d9f13SBarry Smith matrices. 3106273d9f13SBarry Smith 3107aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3108aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3109aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3110aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3111aa95bbe8SBarry Smith 3112273d9f13SBarry Smith Level: intermediate 3113273d9f13SBarry Smith 3114ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3115273d9f13SBarry Smith @*/ 31167087cfbeSBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3117273d9f13SBarry Smith { 31184ac538c5SBarry Smith PetscErrorCode ierr; 3119273d9f13SBarry Smith 3120273d9f13SBarry Smith PetscFunctionBegin; 31216ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 31226ba663aaSJed Brown PetscValidType(B,1); 31236ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 31244ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3125273d9f13SBarry Smith PetscFunctionReturn(0); 3126273d9f13SBarry Smith } 3127273d9f13SBarry Smith 312879bdfe76SSatish Balay /*@C 312969b1f4b7SBarry Smith MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 313079bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 313179bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 313279bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 313379bdfe76SSatish Balay performance can be increased by more than a factor of 50. 313479bdfe76SSatish Balay 3135d083f849SBarry Smith Collective 3136db81eaa0SLois Curfman McInnes 313779bdfe76SSatish Balay Input Parameters: 3138db81eaa0SLois Curfman McInnes + comm - MPI communicator 3139bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3140bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 314179bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 314292e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 314392e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 314492e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 314592e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 314692e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 3147be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3148be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 314947a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 315079bdfe76SSatish Balay submatrix (same for all local rows) 315147a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 315292e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 31530298fd71SBarry Smith row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 315495742e49SBarry Smith and set it even if it is zero. 315547a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 315679bdfe76SSatish Balay submatrix (same for all local rows). 315747a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 315892e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 31590298fd71SBarry Smith each block row) or NULL. 316079bdfe76SSatish Balay 316179bdfe76SSatish Balay Output Parameter: 316279bdfe76SSatish Balay . A - the matrix 316379bdfe76SSatish Balay 3164db81eaa0SLois Curfman McInnes Options Database Keys: 31658c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 31668c07d4e3SBarry Smith - -mat_use_hash_table <fact> 31673ffaccefSLois Curfman McInnes 3168175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3169f6f02116SRichard Tran Mills MatXXXXSetPreallocation() paradigm instead of this routine directly. 3170175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3171175b88e8SBarry Smith 3172b259b22eSLois Curfman McInnes Notes: 317349a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 317449a6f317SBarry Smith 317547a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 317647a75d0bSBarry Smith 317779bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 317879bdfe76SSatish Balay (possibly both). 317979bdfe76SSatish Balay 3180be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3181be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 3182be79a94dSBarry Smith 318379bdfe76SSatish Balay Storage Information: 318479bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 318579bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 318679bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 318779bdfe76SSatish Balay local matrix (a rectangular submatrix). 318879bdfe76SSatish Balay 318979bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 319079bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 31910298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 319279bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 319379bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 319479bdfe76SSatish Balay 319579bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 319679bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 319779bdfe76SSatish Balay 3198db81eaa0SLois Curfman McInnes .vb 3199db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 3200a4b1a0f6SJed Brown -------------------------- 3201db81eaa0SLois Curfman McInnes row 3 |o o o d d d o o o o o o 3202db81eaa0SLois Curfman McInnes row 4 |o o o d d d o o o o o o 3203db81eaa0SLois Curfman McInnes row 5 |o o o d d d o o o o o o 3204a4b1a0f6SJed Brown -------------------------- 3205db81eaa0SLois Curfman McInnes .ve 320679bdfe76SSatish Balay 320779bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 320879bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 320979bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 321057b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 321179bdfe76SSatish Balay 3212d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3213d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 321479bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 321592e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 321692e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 32176da5968aSLois Curfman McInnes matrices. 321879bdfe76SSatish Balay 3219027ccd11SLois Curfman McInnes Level: intermediate 3220027ccd11SLois Curfman McInnes 322169b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 322279bdfe76SSatish Balay @*/ 322369b1f4b7SBarry Smith PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 322479bdfe76SSatish Balay { 32256849ba73SBarry Smith PetscErrorCode ierr; 3226b24ad042SBarry Smith PetscMPIInt size; 322779bdfe76SSatish Balay 3228d64ed03dSBarry Smith PetscFunctionBegin; 3229f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 3230f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3231ffc4695bSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr); 3232273d9f13SBarry Smith if (size > 1) { 3233273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3234273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3235273d9f13SBarry Smith } else { 3236273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3237273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 32383914022bSBarry Smith } 32393a40ed3dSBarry Smith PetscFunctionReturn(0); 324079bdfe76SSatish Balay } 3241026e39d0SSatish Balay 32426849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 32430ac07820SSatish Balay { 32440ac07820SSatish Balay Mat mat; 32450ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3246dfbe8321SBarry Smith PetscErrorCode ierr; 3247b24ad042SBarry Smith PetscInt len=0; 32480ac07820SSatish Balay 3249d64ed03dSBarry Smith PetscFunctionBegin; 3250f4259b30SLisandro Dalcin *newmat = NULL; 3251ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3252d0f46423SBarry Smith ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 32537adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 32547fff6886SHong Zhang 3255d5f3da31SBarry Smith mat->factortype = matin->factortype; 3256273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 32570ac07820SSatish Balay mat->assembled = PETSC_TRUE; 32587fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 32597fff6886SHong Zhang 3260273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 3261d0f46423SBarry Smith mat->rmap->bs = matin->rmap->bs; 32620ac07820SSatish Balay a->bs2 = oldmat->bs2; 32630ac07820SSatish Balay a->mbs = oldmat->mbs; 32640ac07820SSatish Balay a->nbs = oldmat->nbs; 32650ac07820SSatish Balay a->Mbs = oldmat->Mbs; 32660ac07820SSatish Balay a->Nbs = oldmat->Nbs; 32670ac07820SSatish Balay 32681e1e43feSBarry Smith ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 32691e1e43feSBarry Smith ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3270899cda47SBarry Smith 32710ac07820SSatish Balay a->size = oldmat->size; 32720ac07820SSatish Balay a->rank = oldmat->rank; 3273aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 3274aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 3275f4259b30SLisandro Dalcin a->rowindices = NULL; 3276f4259b30SLisandro Dalcin a->rowvalues = NULL; 32770ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 3278f4259b30SLisandro Dalcin a->barray = NULL; 3279899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 3280899cda47SBarry Smith a->rendbs = oldmat->rendbs; 3281899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 3282899cda47SBarry Smith a->cendbs = oldmat->cendbs; 32830ac07820SSatish Balay 3284133cdb44SSatish Balay /* hash table stuff */ 3285f4259b30SLisandro Dalcin a->ht = NULL; 3286f4259b30SLisandro Dalcin a->hd = NULL; 3287133cdb44SSatish Balay a->ht_size = 0; 3288133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 328925fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 3290133cdb44SSatish Balay a->ht_total_ct = 0; 3291133cdb44SSatish Balay a->ht_insert_ct = 0; 3292133cdb44SSatish Balay 3293580bdb30SBarry Smith ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr); 32940ac07820SSatish Balay if (oldmat->colmap) { 3295aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 32960f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 329748e59246SSatish Balay #else 3298854ce69bSBarry Smith ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 32993bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3300580bdb30SBarry Smith ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr); 330148e59246SSatish Balay #endif 3302f4259b30SLisandro Dalcin } else a->colmap = NULL; 33034beb1cfeSHong Zhang 33040ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3305785e854fSJed Brown ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 33063bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3307580bdb30SBarry Smith ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); 3308f4259b30SLisandro Dalcin } else a->garray = NULL; 33090ac07820SSatish Balay 3310ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 33110ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 33123bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 33130ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 33143bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 33157fff6886SHong Zhang 33162e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 33173bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 33182e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 33193bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3320140e18c1SBarry Smith ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 33210ac07820SSatish Balay *newmat = mat; 33223a40ed3dSBarry Smith PetscFunctionReturn(0); 33230ac07820SSatish Balay } 332457b952d6SSatish Balay 3325618cc2edSLisandro Dalcin /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3326b51a4376SLisandro Dalcin PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 3327b51a4376SLisandro Dalcin { 3328b51a4376SLisandro Dalcin PetscInt header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k; 3329b51a4376SLisandro Dalcin PetscInt *rowidxs,*colidxs,rs,cs,ce; 3330b51a4376SLisandro Dalcin PetscScalar *matvals; 3331b51a4376SLisandro Dalcin PetscErrorCode ierr; 3332b51a4376SLisandro Dalcin 3333b51a4376SLisandro Dalcin PetscFunctionBegin; 3334b51a4376SLisandro Dalcin ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3335b51a4376SLisandro Dalcin 3336b51a4376SLisandro Dalcin /* read in matrix header */ 3337b51a4376SLisandro Dalcin ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 33389ace16cdSJacob Faibussowitsch PetscAssertFalse(header[0] != MAT_FILE_CLASSID,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 3339b51a4376SLisandro Dalcin M = header[1]; N = header[2]; nz = header[3]; 33409ace16cdSJacob Faibussowitsch PetscAssertFalse(M < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%" PetscInt_FMT ") in file is negative",M); 33419ace16cdSJacob Faibussowitsch PetscAssertFalse(N < 0,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%" PetscInt_FMT ") in file is negative",N); 33429ace16cdSJacob Faibussowitsch PetscAssertFalse(nz < 0,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3343b51a4376SLisandro Dalcin 3344b51a4376SLisandro Dalcin /* set block sizes from the viewer's .info file */ 3345b51a4376SLisandro Dalcin ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 3346618cc2edSLisandro Dalcin /* set local sizes if not set already */ 3347618cc2edSLisandro Dalcin if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3348618cc2edSLisandro Dalcin if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3349b51a4376SLisandro Dalcin /* set global sizes if not set already */ 3350b51a4376SLisandro Dalcin if (mat->rmap->N < 0) mat->rmap->N = M; 3351b51a4376SLisandro Dalcin if (mat->cmap->N < 0) mat->cmap->N = N; 3352b51a4376SLisandro Dalcin ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 3353b51a4376SLisandro Dalcin ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 3354b51a4376SLisandro Dalcin 3355b51a4376SLisandro Dalcin /* check if the matrix sizes are correct */ 3356b51a4376SLisandro Dalcin ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 33579ace16cdSJacob Faibussowitsch PetscAssertFalse(M != rows || N != cols,PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")",M,N,rows,cols); 3358b51a4376SLisandro Dalcin ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 3359b51a4376SLisandro Dalcin ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr); 33601e1ea65dSPierre Jolivet ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);CHKERRQ(ierr); 33611e1ea65dSPierre Jolivet ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);CHKERRQ(ierr); 3362b51a4376SLisandro Dalcin mbs = m/bs; nbs = n/bs; 3363b51a4376SLisandro Dalcin 3364b51a4376SLisandro Dalcin /* read in row lengths and build row indices */ 3365b51a4376SLisandro Dalcin ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 3366b51a4376SLisandro Dalcin ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 3367b51a4376SLisandro Dalcin rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 3368820f2d46SBarry Smith ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr); 33699ace16cdSJacob Faibussowitsch PetscAssertFalse(sum != nz,PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT,nz,sum); 3370b51a4376SLisandro Dalcin 3371b51a4376SLisandro Dalcin /* read in column indices and matrix values */ 3372b51a4376SLisandro Dalcin ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 3373b51a4376SLisandro Dalcin ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 3374b51a4376SLisandro Dalcin ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 3375b51a4376SLisandro Dalcin 3376b51a4376SLisandro Dalcin { /* preallocate matrix storage */ 3377b51a4376SLisandro Dalcin PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3378b51a4376SLisandro Dalcin PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3379618cc2edSLisandro Dalcin PetscBool sbaij,done; 3380b51a4376SLisandro Dalcin PetscInt *d_nnz,*o_nnz; 3381b51a4376SLisandro Dalcin 3382b51a4376SLisandro Dalcin ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr); 3383b51a4376SLisandro Dalcin ierr = PetscHSetICreate(&ht);CHKERRQ(ierr); 3384b51a4376SLisandro Dalcin ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr); 3385618cc2edSLisandro Dalcin ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr); 3386b51a4376SLisandro Dalcin for (i=0; i<mbs; i++) { 3387b51a4376SLisandro Dalcin ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr); 3388b51a4376SLisandro Dalcin ierr = PetscHSetIClear(ht);CHKERRQ(ierr); 3389618cc2edSLisandro Dalcin for (k=0; k<bs; k++) { 3390618cc2edSLisandro Dalcin PetscInt row = bs*i + k; 3391618cc2edSLisandro Dalcin for (j=rowidxs[row]; j<rowidxs[row+1]; j++) { 3392618cc2edSLisandro Dalcin PetscInt col = colidxs[j]; 3393618cc2edSLisandro Dalcin if (!sbaij || col >= row) { 3394618cc2edSLisandro Dalcin if (col >= cs && col < ce) { 3395618cc2edSLisandro Dalcin if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++; 3396b51a4376SLisandro Dalcin } else { 3397618cc2edSLisandro Dalcin ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr); 3398b51a4376SLisandro Dalcin if (done) o_nnz[i]++; 3399b51a4376SLisandro Dalcin } 3400b51a4376SLisandro Dalcin } 3401618cc2edSLisandro Dalcin } 3402618cc2edSLisandro Dalcin } 3403618cc2edSLisandro Dalcin } 3404b51a4376SLisandro Dalcin ierr = PetscBTDestroy(&bt);CHKERRQ(ierr); 3405b51a4376SLisandro Dalcin ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr); 3406b51a4376SLisandro Dalcin ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3407618cc2edSLisandro Dalcin ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3408b51a4376SLisandro Dalcin ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3409b51a4376SLisandro Dalcin } 3410b51a4376SLisandro Dalcin 3411b51a4376SLisandro Dalcin /* store matrix values */ 3412b51a4376SLisandro Dalcin for (i=0; i<m; i++) { 3413b51a4376SLisandro Dalcin PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1]; 3414618cc2edSLisandro Dalcin ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr); 3415b51a4376SLisandro Dalcin } 3416b51a4376SLisandro Dalcin 3417b51a4376SLisandro Dalcin ierr = PetscFree(rowidxs);CHKERRQ(ierr); 3418b51a4376SLisandro Dalcin ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 3419b51a4376SLisandro Dalcin ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3420b51a4376SLisandro Dalcin ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3421b51a4376SLisandro Dalcin PetscFunctionReturn(0); 3422b51a4376SLisandro Dalcin } 3423b51a4376SLisandro Dalcin 3424b51a4376SLisandro Dalcin PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer) 34254683f7a4SShri Abhyankar { 34264683f7a4SShri Abhyankar PetscErrorCode ierr; 34277f489da9SVaclav Hapla PetscBool isbinary; 34284683f7a4SShri Abhyankar 34294683f7a4SShri Abhyankar PetscFunctionBegin; 34307f489da9SVaclav Hapla ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 34319ace16cdSJacob Faibussowitsch PetscAssertFalse(!isbinary,PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name); 3432b51a4376SLisandro Dalcin ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 34334683f7a4SShri Abhyankar PetscFunctionReturn(0); 34344683f7a4SShri Abhyankar } 34354683f7a4SShri Abhyankar 3436133cdb44SSatish Balay /*@ 3437133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3438133cdb44SSatish Balay 3439133cdb44SSatish Balay Input Parameters: 3440a2b725a8SWilliam Gropp + mat - the matrix 3441a2b725a8SWilliam Gropp - fact - factor 3442133cdb44SSatish Balay 3443c5eb9154SBarry Smith Not Collective, each process can use a different factor 3444fee21e36SBarry Smith 34458c890885SBarry Smith Level: advanced 34468c890885SBarry Smith 3447133cdb44SSatish Balay Notes: 34488c07d4e3SBarry Smith This can also be set by the command line option: -mat_use_hash_table <fact> 3449133cdb44SSatish Balay 3450133cdb44SSatish Balay .seealso: MatSetOption() 3451133cdb44SSatish Balay @*/ 34527087cfbeSBarry Smith PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3453133cdb44SSatish Balay { 34544ac538c5SBarry Smith PetscErrorCode ierr; 34555bf65638SKris Buschelman 34565bf65638SKris Buschelman PetscFunctionBegin; 34574ac538c5SBarry Smith ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 34585bf65638SKris Buschelman PetscFunctionReturn(0); 34595bf65638SKris Buschelman } 34605bf65638SKris Buschelman 34617087cfbeSBarry Smith PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 34625bf65638SKris Buschelman { 346325fdafccSSatish Balay Mat_MPIBAIJ *baij; 3464133cdb44SSatish Balay 3465133cdb44SSatish Balay PetscFunctionBegin; 3466133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 3467133cdb44SSatish Balay baij->ht_fact = fact; 3468133cdb44SSatish Balay PetscFunctionReturn(0); 3469133cdb44SSatish Balay } 3470f2a5309cSSatish Balay 34719230625dSJed Brown PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3472f2a5309cSSatish Balay { 3473f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 3474ab4d48faSStefano Zampini PetscBool flg; 3475ab4d48faSStefano Zampini PetscErrorCode ierr; 34765fd66863SKarl Rupp 3477f2a5309cSSatish Balay PetscFunctionBegin; 3478ab4d48faSStefano Zampini ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr); 34799ace16cdSJacob Faibussowitsch PetscAssertFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input"); 348021e72a00SBarry Smith if (Ad) *Ad = a->A; 348121e72a00SBarry Smith if (Ao) *Ao = a->B; 348221e72a00SBarry Smith if (colmap) *colmap = a->garray; 3483f2a5309cSSatish Balay PetscFunctionReturn(0); 3484f2a5309cSSatish Balay } 348585535b8eSBarry Smith 348685535b8eSBarry Smith /* 348785535b8eSBarry Smith Special version for direct calls from Fortran (to eliminate two function call overheads 348885535b8eSBarry Smith */ 348985535b8eSBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 349085535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 349185535b8eSBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 349285535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 349385535b8eSBarry Smith #endif 349485535b8eSBarry Smith 349585535b8eSBarry Smith /*@C 349685535b8eSBarry Smith MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 349785535b8eSBarry Smith 349885535b8eSBarry Smith Collective on Mat 349985535b8eSBarry Smith 350085535b8eSBarry Smith Input Parameters: 350185535b8eSBarry Smith + mat - the matrix 350285535b8eSBarry Smith . min - number of input rows 350385535b8eSBarry Smith . im - input rows 350485535b8eSBarry Smith . nin - number of input columns 350585535b8eSBarry Smith . in - input columns 350685535b8eSBarry Smith . v - numerical values input 350785535b8eSBarry Smith - addvin - INSERT_VALUES or ADD_VALUES 350885535b8eSBarry Smith 350995452b02SPatrick Sanan Notes: 351095452b02SPatrick Sanan This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 351185535b8eSBarry Smith 351285535b8eSBarry Smith Level: advanced 351385535b8eSBarry Smith 351485535b8eSBarry Smith .seealso: MatSetValuesBlocked() 351585535b8eSBarry Smith @*/ 351685535b8eSBarry Smith PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 351785535b8eSBarry Smith { 351885535b8eSBarry Smith /* convert input arguments to C version */ 351985535b8eSBarry Smith Mat mat = *matin; 352085535b8eSBarry Smith PetscInt m = *min, n = *nin; 352185535b8eSBarry Smith InsertMode addv = *addvin; 352285535b8eSBarry Smith 352385535b8eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 352485535b8eSBarry Smith const MatScalar *value; 352585535b8eSBarry Smith MatScalar *barray = baij->barray; 3526ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 352785535b8eSBarry Smith PetscErrorCode ierr; 352885535b8eSBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 352985535b8eSBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3530d0f46423SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 353185535b8eSBarry Smith 353285535b8eSBarry Smith PetscFunctionBegin; 353385535b8eSBarry Smith /* tasks normally handled by MatSetValuesBlocked() */ 353426fbe8dcSKarl Rupp if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 35359ace16cdSJacob Faibussowitsch else PetscAssertFalse(mat->insertmode != addv,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 35369ace16cdSJacob Faibussowitsch PetscAssertFalse(mat->factortype,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 353785535b8eSBarry Smith if (mat->assembled) { 353885535b8eSBarry Smith mat->was_assembled = PETSC_TRUE; 353985535b8eSBarry Smith mat->assembled = PETSC_FALSE; 354085535b8eSBarry Smith } 354185535b8eSBarry Smith ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 354285535b8eSBarry Smith 354385535b8eSBarry Smith if (!barray) { 3544785e854fSJed Brown ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 354585535b8eSBarry Smith baij->barray = barray; 354685535b8eSBarry Smith } 354785535b8eSBarry Smith 354826fbe8dcSKarl Rupp if (roworiented) stepval = (n-1)*bs; 354926fbe8dcSKarl Rupp else stepval = (m-1)*bs; 355026fbe8dcSKarl Rupp 355185535b8eSBarry Smith for (i=0; i<m; i++) { 355285535b8eSBarry Smith if (im[i] < 0) continue; 35539ace16cdSJacob Faibussowitsch PetscAssertFalseDebug(im[i] >= baij->Mbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT,im[i],baij->Mbs-1); 355485535b8eSBarry Smith if (im[i] >= rstart && im[i] < rend) { 355585535b8eSBarry Smith row = im[i] - rstart; 355685535b8eSBarry Smith for (j=0; j<n; j++) { 355785535b8eSBarry Smith /* If NumCol = 1 then a copy is not required */ 355885535b8eSBarry Smith if ((roworiented) && (n == 1)) { 355985535b8eSBarry Smith barray = (MatScalar*)v + i*bs2; 356085535b8eSBarry Smith } else if ((!roworiented) && (m == 1)) { 356185535b8eSBarry Smith barray = (MatScalar*)v + j*bs2; 356285535b8eSBarry Smith } else { /* Here a copy is required */ 356385535b8eSBarry Smith if (roworiented) { 356485535b8eSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 356585535b8eSBarry Smith } else { 356685535b8eSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 356785535b8eSBarry Smith } 356885535b8eSBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 356985535b8eSBarry Smith for (jj=0; jj<bs; jj++) { 357085535b8eSBarry Smith *barray++ = *value++; 357185535b8eSBarry Smith } 357285535b8eSBarry Smith } 357385535b8eSBarry Smith barray -=bs2; 357485535b8eSBarry Smith } 357585535b8eSBarry Smith 357685535b8eSBarry Smith if (in[j] >= cstart && in[j] < cend) { 357785535b8eSBarry Smith col = in[j] - cstart; 35788ab52850SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 357926fbe8dcSKarl Rupp } else if (in[j] < 0) continue; 35809ace16cdSJacob Faibussowitsch else PetscAssertFalseDebug(in[j] >= baij->Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT,in[j],baij->Nbs-1); 358185535b8eSBarry Smith else { 358285535b8eSBarry Smith if (mat->was_assembled) { 358385535b8eSBarry Smith if (!baij->colmap) { 3584ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 358585535b8eSBarry Smith } 358685535b8eSBarry Smith 358785535b8eSBarry Smith #if defined(PETSC_USE_DEBUG) 358885535b8eSBarry Smith #if defined(PETSC_USE_CTABLE) 358985535b8eSBarry Smith { PetscInt data; 359085535b8eSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 35919ace16cdSJacob Faibussowitsch PetscAssertFalse((data - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 359285535b8eSBarry Smith } 359385535b8eSBarry Smith #else 35949ace16cdSJacob Faibussowitsch PetscAssertFalse((baij->colmap[in[j]] - 1) % bs,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 359585535b8eSBarry Smith #endif 359685535b8eSBarry Smith #endif 359785535b8eSBarry Smith #if defined(PETSC_USE_CTABLE) 359885535b8eSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 359985535b8eSBarry Smith col = (col - 1)/bs; 360085535b8eSBarry Smith #else 360185535b8eSBarry Smith col = (baij->colmap[in[j]] - 1)/bs; 360285535b8eSBarry Smith #endif 360385535b8eSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3604ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 360585535b8eSBarry Smith col = in[j]; 360685535b8eSBarry Smith } 360726fbe8dcSKarl Rupp } else col = in[j]; 36088ab52850SBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr); 360985535b8eSBarry Smith } 361085535b8eSBarry Smith } 361185535b8eSBarry Smith } else { 361285535b8eSBarry Smith if (!baij->donotstash) { 361385535b8eSBarry Smith if (roworiented) { 361485535b8eSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 361585535b8eSBarry Smith } else { 361685535b8eSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 361785535b8eSBarry Smith } 361885535b8eSBarry Smith } 361985535b8eSBarry Smith } 362085535b8eSBarry Smith } 362185535b8eSBarry Smith 362285535b8eSBarry Smith /* task normally handled by MatSetValuesBlocked() */ 362385535b8eSBarry Smith ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 362485535b8eSBarry Smith PetscFunctionReturn(0); 362585535b8eSBarry Smith } 3626dfb205c3SBarry Smith 3627dfb205c3SBarry Smith /*@ 3628483a2f95SBarry Smith MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block 3629dfb205c3SBarry Smith CSR format the local rows. 3630dfb205c3SBarry Smith 3631d083f849SBarry Smith Collective 3632dfb205c3SBarry Smith 3633dfb205c3SBarry Smith Input Parameters: 3634dfb205c3SBarry Smith + comm - MPI communicator 3635dfb205c3SBarry Smith . bs - the block size, only a block size of 1 is supported 3636dfb205c3SBarry Smith . m - number of local rows (Cannot be PETSC_DECIDE) 3637dfb205c3SBarry Smith . n - This value should be the same as the local size used in creating the 3638dfb205c3SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3639dfb205c3SBarry Smith calculated if N is given) For square matrices n is almost always m. 3640dfb205c3SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3641dfb205c3SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3642483a2f95SBarry Smith . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3643dfb205c3SBarry Smith . j - column indices 3644dfb205c3SBarry Smith - a - matrix values 3645dfb205c3SBarry Smith 3646dfb205c3SBarry Smith Output Parameter: 3647dfb205c3SBarry Smith . mat - the matrix 3648dfb205c3SBarry Smith 3649dfb205c3SBarry Smith Level: intermediate 3650dfb205c3SBarry Smith 3651dfb205c3SBarry Smith Notes: 3652dfb205c3SBarry Smith The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3653dfb205c3SBarry Smith thus you CANNOT change the matrix entries by changing the values of a[] after you have 3654dfb205c3SBarry Smith called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3655dfb205c3SBarry Smith 36563adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 36573adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 36583adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 36593adadaf3SJed Brown with column-major ordering within blocks. 36603adadaf3SJed Brown 3661dfb205c3SBarry Smith The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3662dfb205c3SBarry Smith 3663dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 366469b1f4b7SBarry Smith MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3665dfb205c3SBarry Smith @*/ 36667087cfbeSBarry Smith PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3667dfb205c3SBarry Smith { 3668dfb205c3SBarry Smith PetscErrorCode ierr; 3669dfb205c3SBarry Smith 3670dfb205c3SBarry Smith PetscFunctionBegin; 36719ace16cdSJacob Faibussowitsch PetscAssertFalse(i[0],PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 36729ace16cdSJacob Faibussowitsch PetscAssertFalse(m < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3673dfb205c3SBarry Smith ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3674dfb205c3SBarry Smith ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 36759a43d2d5SJed Brown ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr); 367627f91139SJed Brown ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr); 367727f91139SJed Brown ierr = MatSetUp(*mat);CHKERRQ(ierr); 3678d47bf9aaSJed Brown ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3679dfb205c3SBarry Smith ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3680d47bf9aaSJed Brown ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3681dfb205c3SBarry Smith PetscFunctionReturn(0); 3682dfb205c3SBarry Smith } 3683e561ad89SHong Zhang 3684bd153df0SHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3685e561ad89SHong Zhang { 3686e561ad89SHong Zhang PetscErrorCode ierr; 3687bd153df0SHong Zhang PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3688bd153df0SHong Zhang PetscInt *indx; 3689bd153df0SHong Zhang PetscScalar *values; 3690e561ad89SHong Zhang 3691e561ad89SHong Zhang PetscFunctionBegin; 3692e561ad89SHong Zhang ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3693bd153df0SHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3694bd153df0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 36952c6ba4edSHong Zhang PetscInt *dnz,*onz,mbs,Nbs,nbs; 3696bd153df0SHong Zhang PetscInt *bindx,rmax=a->rmax,j; 369777f764caSHong Zhang PetscMPIInt rank,size; 3698e561ad89SHong Zhang 3699bd153df0SHong Zhang ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3700bd153df0SHong Zhang mbs = m/bs; Nbs = N/cbs; 3701bd153df0SHong Zhang if (n == PETSC_DECIDE) { 37021e1ea65dSPierre Jolivet ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);CHKERRQ(ierr); 3703bd153df0SHong Zhang } 3704da91a574SPierre Jolivet nbs = n/cbs; 3705e561ad89SHong Zhang 3706647a6520SHong Zhang ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 370777f764caSHong Zhang ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */ 370877f764caSHong Zhang 3709ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr); 3710ffc4695bSBarry Smith ierr = MPI_Comm_rank(comm,&size);CHKERRMPI(ierr); 371177f764caSHong Zhang if (rank == size-1) { 371277f764caSHong Zhang /* Check sum(nbs) = Nbs */ 37139ace16cdSJacob Faibussowitsch PetscAssertFalse(__end != Nbs,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT,__end,Nbs); 371477f764caSHong Zhang } 371577f764caSHong Zhang 371677f764caSHong Zhang rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */ 3717bd153df0SHong Zhang for (i=0; i<mbs; i++) { 3718647a6520SHong Zhang ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3719647a6520SHong Zhang nnz = nnz/bs; 3720647a6520SHong Zhang for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3721647a6520SHong Zhang ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3722647a6520SHong Zhang ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3723e561ad89SHong Zhang } 3724647a6520SHong Zhang ierr = PetscFree(bindx);CHKERRQ(ierr); 3725e561ad89SHong Zhang 3726e561ad89SHong Zhang ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 372777f764caSHong Zhang ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3728e561ad89SHong Zhang ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 37298761c3d6SHong Zhang ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr); 37308761c3d6SHong Zhang ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr); 3731e561ad89SHong Zhang ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3732e561ad89SHong Zhang ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3733f2e2784eSPierre Jolivet ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3734e561ad89SHong Zhang } 3735e561ad89SHong Zhang 3736bd153df0SHong Zhang /* numeric phase */ 3737647a6520SHong Zhang ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3738bd153df0SHong Zhang ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3739e561ad89SHong Zhang 3740e561ad89SHong Zhang for (i=0; i<m; i++) { 3741e561ad89SHong Zhang ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3742e561ad89SHong Zhang Ii = i + rstart; 3743bd153df0SHong Zhang ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3744e561ad89SHong Zhang ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3745e561ad89SHong Zhang } 3746bd153df0SHong Zhang ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3747bd153df0SHong Zhang ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3748e561ad89SHong Zhang PetscFunctionReturn(0); 3749e561ad89SHong Zhang } 3750