179bdfe76SSatish Balay 2c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3*c5d9258eSSatish Balay 4c6db04a5SJed Brown #include <petscblaslapack.h> 565a92638SMatthew G. Knepley #include <petscsf.h> 679bdfe76SSatish Balay 74a2ae208SSatish Balay #undef __FUNCT__ 8985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 9985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 107843d17aSBarry Smith { 117843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 12dfbe8321SBarry Smith PetscErrorCode ierr; 13985db425SBarry Smith PetscInt i,*idxb = 0; 1487828ca2SBarry Smith PetscScalar *va,*vb; 157843d17aSBarry Smith Vec vtmp; 167843d17aSBarry Smith 177843d17aSBarry Smith PetscFunctionBegin; 18985db425SBarry Smith ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 191ebc52fbSHong Zhang ierr = VecGetArray(v,&va);CHKERRQ(ierr); 20985db425SBarry Smith if (idx) { 2126fbe8dcSKarl Rupp for (i=0; i<A->rmap->n; i++) { 2226fbe8dcSKarl Rupp if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2326fbe8dcSKarl Rupp } 24985db425SBarry Smith } 257843d17aSBarry Smith 26d0f46423SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 27785e854fSJed Brown if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 28985db425SBarry Smith ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 291ebc52fbSHong Zhang ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 307843d17aSBarry Smith 31d0f46423SBarry Smith for (i=0; i<A->rmap->n; i++) { 3226fbe8dcSKarl Rupp if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3326fbe8dcSKarl Rupp va[i] = vb[i]; 3426fbe8dcSKarl Rupp if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 3526fbe8dcSKarl Rupp } 367843d17aSBarry Smith } 377843d17aSBarry Smith 381ebc52fbSHong Zhang ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 391ebc52fbSHong Zhang ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 40c31cb41cSBarry Smith ierr = PetscFree(idxb);CHKERRQ(ierr); 416bf464f9SBarry Smith ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 427843d17aSBarry Smith PetscFunctionReturn(0); 437843d17aSBarry Smith } 447843d17aSBarry Smith 454a2ae208SSatish Balay #undef __FUNCT__ 464a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ" 477087cfbeSBarry Smith PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 487fc3c18eSBarry Smith { 497fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50dfbe8321SBarry Smith PetscErrorCode ierr; 517fc3c18eSBarry Smith 527fc3c18eSBarry Smith PetscFunctionBegin; 537fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 547fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 557fc3c18eSBarry Smith PetscFunctionReturn(0); 567fc3c18eSBarry Smith } 577fc3c18eSBarry Smith 584a2ae208SSatish Balay #undef __FUNCT__ 594a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 607087cfbeSBarry Smith PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 617fc3c18eSBarry Smith { 627fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 63dfbe8321SBarry Smith PetscErrorCode ierr; 647fc3c18eSBarry Smith 657fc3c18eSBarry Smith PetscFunctionBegin; 667fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 677fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 687fc3c18eSBarry Smith PetscFunctionReturn(0); 697fc3c18eSBarry Smith } 707fc3c18eSBarry Smith 71537820f0SBarry Smith /* 72537820f0SBarry Smith Local utility routine that creates a mapping from the global column 7357b952d6SSatish Balay number to the local number in the off-diagonal part of the local 74e06f6af7SJed Brown storage of the matrix. This is done in a non scalable way since the 7557b952d6SSatish Balay length of colmap equals the global matrix length. 7657b952d6SSatish Balay */ 774a2ae208SSatish Balay #undef __FUNCT__ 78ab9863d7SBarry Smith #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 79ab9863d7SBarry Smith PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 8057b952d6SSatish Balay { 8157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 8257b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 836849ba73SBarry Smith PetscErrorCode ierr; 84d0f46423SBarry Smith PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 8557b952d6SSatish Balay 86d64ed03dSBarry Smith PetscFunctionBegin; 87aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 88e23dfa41SBarry Smith ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 8948e59246SSatish Balay for (i=0; i<nbs; i++) { 903861aac3SJed Brown ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 9148e59246SSatish Balay } 9248e59246SSatish Balay #else 93785e854fSJed Brown ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr); 943bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 95b24ad042SBarry Smith ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 96928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 9748e59246SSatish Balay #endif 983a40ed3dSBarry Smith PetscFunctionReturn(0); 9957b952d6SSatish Balay } 10057b952d6SSatish Balay 101f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 10280c1aa95SSatish Balay { \ 10380c1aa95SSatish Balay \ 10480c1aa95SSatish Balay brow = row/bs; \ 10580c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 106ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 10780c1aa95SSatish Balay bcol = col/bs; \ 10880c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 109ab26458aSBarry Smith low = 0; high = nrow; \ 110ab26458aSBarry Smith while (high-low > 3) { \ 111ab26458aSBarry Smith t = (low+high)/2; \ 112ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 113ab26458aSBarry Smith else low = t; \ 114ab26458aSBarry Smith } \ 115ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 11680c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 11780c1aa95SSatish Balay if (rp[_i] == bcol) { \ 11880c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 119eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 120eada6651SSatish Balay else *bap = value; \ 121ac7a638eSSatish Balay goto a_noinsert; \ 12280c1aa95SSatish Balay } \ 12380c1aa95SSatish Balay } \ 12489280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 125e32f2f54SBarry Smith if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 126fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 12780c1aa95SSatish Balay N = nrow++ - 1; \ 12880c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 12980c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 13080c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1313eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 13280c1aa95SSatish Balay } \ 1333eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 13480c1aa95SSatish Balay rp[_i] = bcol; \ 13580c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 136ac7a638eSSatish Balay a_noinsert:; \ 13780c1aa95SSatish Balay ailen[brow] = nrow; \ 13880c1aa95SSatish Balay } 13957b952d6SSatish Balay 140ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 141ac7a638eSSatish Balay { \ 142ac7a638eSSatish Balay brow = row/bs; \ 143ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 144ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 145ac7a638eSSatish Balay bcol = col/bs; \ 146ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 147ac7a638eSSatish Balay low = 0; high = nrow; \ 148ac7a638eSSatish Balay while (high-low > 3) { \ 149ac7a638eSSatish Balay t = (low+high)/2; \ 150ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 151ac7a638eSSatish Balay else low = t; \ 152ac7a638eSSatish Balay } \ 153ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 154ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 155ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 156ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 157ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 158ac7a638eSSatish Balay else *bap = value; \ 159ac7a638eSSatish Balay goto b_noinsert; \ 160ac7a638eSSatish Balay } \ 161ac7a638eSSatish Balay } \ 16289280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 163e32f2f54SBarry Smith if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 164fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 165ac7a638eSSatish Balay N = nrow++ - 1; \ 166ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 167ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 168ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 1693eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 170ac7a638eSSatish Balay } \ 1713eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 172ac7a638eSSatish Balay rp[_i] = bcol; \ 173ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 174ac7a638eSSatish Balay b_noinsert:; \ 175ac7a638eSSatish Balay bilen[brow] = nrow; \ 176ac7a638eSSatish Balay } 177ac7a638eSSatish Balay 1784a2ae208SSatish Balay #undef __FUNCT__ 1794a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ" 180b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 18157b952d6SSatish Balay { 18257b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 18393fea6afSBarry Smith MatScalar value; 184ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 185dfbe8321SBarry Smith PetscErrorCode ierr; 186b24ad042SBarry Smith PetscInt i,j,row,col; 187d0f46423SBarry Smith PetscInt rstart_orig=mat->rmap->rstart; 188d0f46423SBarry Smith PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 189d0f46423SBarry Smith PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 19057b952d6SSatish Balay 191eada6651SSatish Balay /* Some Variables required in the macro */ 19280c1aa95SSatish Balay Mat A = baij->A; 19380c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 194b24ad042SBarry Smith PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 1953eda8832SBarry Smith MatScalar *aa =a->a; 196ac7a638eSSatish Balay 197ac7a638eSSatish Balay Mat B = baij->B; 198ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 199b24ad042SBarry Smith PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 2003eda8832SBarry Smith MatScalar *ba =b->a; 201ac7a638eSSatish Balay 202b24ad042SBarry Smith PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 203b24ad042SBarry Smith PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 2043eda8832SBarry Smith MatScalar *ap,*bap; 20580c1aa95SSatish Balay 206d64ed03dSBarry Smith PetscFunctionBegin; 20757b952d6SSatish Balay for (i=0; i<m; i++) { 2085ef9f2a5SBarry Smith if (im[i] < 0) continue; 2092515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 210e32f2f54SBarry Smith if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 211639f9d9dSBarry Smith #endif 21257b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 21357b952d6SSatish Balay row = im[i] - rstart_orig; 21457b952d6SSatish Balay for (j=0; j<n; j++) { 21557b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig) { 21657b952d6SSatish Balay col = in[j] - cstart_orig; 217db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 218db4deed7SKarl Rupp else value = v[i+j*m]; 219f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 22080c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 22173959e64SBarry Smith } else if (in[j] < 0) continue; 2222515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 223660746e0SBarry Smith else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 224639f9d9dSBarry Smith #endif 22557b952d6SSatish Balay else { 22657b952d6SSatish Balay if (mat->was_assembled) { 227905e6a2fSBarry Smith if (!baij->colmap) { 228ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 229905e6a2fSBarry Smith } 230aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 2310f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 232bba1ac68SSatish Balay col = col - 1; 23348e59246SSatish Balay #else 234bba1ac68SSatish Balay col = baij->colmap[in[j]/bs] - 1; 23548e59246SSatish Balay #endif 236c9ef50b2SBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 237ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 2388295de27SSatish Balay col = in[j]; 2399bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2409bf004c3SSatish Balay B = baij->B; 2419bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 2429bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2439bf004c3SSatish Balay ba =b->a; 244c9ef50b2SBarry Smith } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 245c9ef50b2SBarry Smith else col += in[j]%bs; 2468295de27SSatish Balay } else col = in[j]; 247db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 248db4deed7SKarl Rupp else value = v[i+j*m]; 24990da58bdSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 25090da58bdSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 25157b952d6SSatish Balay } 25257b952d6SSatish Balay } 253d64ed03dSBarry Smith } else { 2544cb17eb5SBarry Smith if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 25590f02eecSBarry Smith if (!baij->donotstash) { 2565080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 257ff2fd236SBarry Smith if (roworiented) { 258b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 259ff2fd236SBarry Smith } else { 260b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 26157b952d6SSatish Balay } 26257b952d6SSatish Balay } 26357b952d6SSatish Balay } 26490f02eecSBarry Smith } 2653a40ed3dSBarry Smith PetscFunctionReturn(0); 26657b952d6SSatish Balay } 26757b952d6SSatish Balay 2684a2ae208SSatish Balay #undef __FUNCT__ 26997e5c40aSBarry Smith #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 27097e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 271ab26458aSBarry Smith { 272ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 273dd6ea824SBarry Smith const PetscScalar *value; 274f15d580aSBarry Smith MatScalar *barray = baij->barray; 275ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 276dfbe8321SBarry Smith PetscErrorCode ierr; 277899cda47SBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 278899cda47SBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 279d0f46423SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 280ab26458aSBarry Smith 281b16ae2b1SBarry Smith PetscFunctionBegin; 28230793edcSSatish Balay if (!barray) { 283785e854fSJed Brown ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 28482502324SSatish Balay baij->barray = barray; 28530793edcSSatish Balay } 28630793edcSSatish Balay 28726fbe8dcSKarl Rupp if (roworiented) stepval = (n-1)*bs; 28826fbe8dcSKarl Rupp else stepval = (m-1)*bs; 28926fbe8dcSKarl Rupp 290ab26458aSBarry Smith for (i=0; i<m; i++) { 2915ef9f2a5SBarry Smith if (im[i] < 0) continue; 2922515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 293e32f2f54SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 294ab26458aSBarry Smith #endif 295ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 296ab26458aSBarry Smith row = im[i] - rstart; 297ab26458aSBarry Smith for (j=0; j<n; j++) { 29815b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 29915b57d14SSatish Balay if ((roworiented) && (n == 1)) { 300f15d580aSBarry Smith barray = (MatScalar*)v + i*bs2; 30115b57d14SSatish Balay } else if ((!roworiented) && (m == 1)) { 302f15d580aSBarry Smith barray = (MatScalar*)v + j*bs2; 30315b57d14SSatish Balay } else { /* Here a copy is required */ 304ab26458aSBarry Smith if (roworiented) { 30553ef36baSBarry Smith value = v + (i*(stepval+bs) + j)*bs; 306ab26458aSBarry Smith } else { 30753ef36baSBarry Smith value = v + (j*(stepval+bs) + i)*bs; 308abef11f7SSatish Balay } 30953ef36baSBarry Smith for (ii=0; ii<bs; ii++,value+=bs+stepval) { 31026fbe8dcSKarl Rupp for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 31153ef36baSBarry Smith barray += bs; 31247513183SBarry Smith } 31330793edcSSatish Balay barray -= bs2; 31415b57d14SSatish Balay } 315abef11f7SSatish Balay 316abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend) { 317abef11f7SSatish Balay col = in[j] - cstart; 31897e5c40aSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 31926fbe8dcSKarl Rupp } else if (in[j] < 0) continue; 3202515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 321cb9801acSJed Brown else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 322ab26458aSBarry Smith #endif 323ab26458aSBarry Smith else { 324ab26458aSBarry Smith if (mat->was_assembled) { 325ab26458aSBarry Smith if (!baij->colmap) { 326ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 327ab26458aSBarry Smith } 328a5eb4965SSatish Balay 3292515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 330aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 331b24ad042SBarry Smith { PetscInt data; 3320f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 333e32f2f54SBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 334fa46199cSSatish Balay } 33548e59246SSatish Balay #else 336e32f2f54SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 337a5eb4965SSatish Balay #endif 33848e59246SSatish Balay #endif 339aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 3400f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 341fa46199cSSatish Balay col = (col - 1)/bs; 34248e59246SSatish Balay #else 343a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 34448e59246SSatish Balay #endif 3450e9bae81SBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 346ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 347ab26458aSBarry Smith col = in[j]; 3480e9bae81SBarry Smith } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 349db4deed7SKarl Rupp } else col = in[j]; 35097e5c40aSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 351ab26458aSBarry Smith } 352ab26458aSBarry Smith } 353d64ed03dSBarry Smith } else { 3544cb17eb5SBarry Smith if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 355ab26458aSBarry Smith if (!baij->donotstash) { 356ff2fd236SBarry Smith if (roworiented) { 3576fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 358ff2fd236SBarry Smith } else { 3596fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 360ff2fd236SBarry Smith } 361abef11f7SSatish Balay } 362ab26458aSBarry Smith } 363ab26458aSBarry Smith } 3643a40ed3dSBarry Smith PetscFunctionReturn(0); 365ab26458aSBarry Smith } 3666fa18ffdSBarry Smith 3670bdbc534SSatish Balay #define HASH_KEY 0.6180339887 368b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 369b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 370b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 3714a2ae208SSatish Balay #undef __FUNCT__ 37297e5c40aSBarry Smith #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 37397e5c40aSBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 3740bdbc534SSatish Balay { 3750bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 376ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 377dfbe8321SBarry Smith PetscErrorCode ierr; 378b24ad042SBarry Smith PetscInt i,j,row,col; 379d0f46423SBarry Smith PetscInt rstart_orig=mat->rmap->rstart; 380d0f46423SBarry Smith PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 381d0f46423SBarry Smith PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 382329f5518SBarry Smith PetscReal tmp; 3833eda8832SBarry Smith MatScalar **HD = baij->hd,value; 3842515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 385b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 3864a15367fSSatish Balay #endif 3870bdbc534SSatish Balay 3880bdbc534SSatish Balay PetscFunctionBegin; 3890bdbc534SSatish Balay for (i=0; i<m; i++) { 3902515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 391e32f2f54SBarry Smith if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 392e32f2f54SBarry Smith if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 3930bdbc534SSatish Balay #endif 3940bdbc534SSatish Balay row = im[i]; 395c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 3960bdbc534SSatish Balay for (j=0; j<n; j++) { 3970bdbc534SSatish Balay col = in[j]; 398db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 399db4deed7SKarl Rupp else value = v[i+j*m]; 400b24ad042SBarry Smith /* Look up PetscInto the Hash Table */ 401c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 402c2760754SSatish Balay h1 = HASH(size,key,tmp); 4030bdbc534SSatish Balay 404c2760754SSatish Balay 405c2760754SSatish Balay idx = h1; 4062515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 407187ce0cbSSatish Balay insert_ct++; 408187ce0cbSSatish Balay total_ct++; 409187ce0cbSSatish Balay if (HT[idx] != key) { 410187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 411187ce0cbSSatish Balay if (idx == size) { 412187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 413f23aa3ddSBarry Smith if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 414187ce0cbSSatish Balay } 415187ce0cbSSatish Balay } 416187ce0cbSSatish Balay #else 417c2760754SSatish Balay if (HT[idx] != key) { 418c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 419c2760754SSatish Balay if (idx == size) { 420c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 421f23aa3ddSBarry Smith if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 422c2760754SSatish Balay } 423c2760754SSatish Balay } 424187ce0cbSSatish Balay #endif 425c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 426c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 427c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 4280bdbc534SSatish Balay } 42926fbe8dcSKarl Rupp } else if (!baij->donotstash) { 430ff2fd236SBarry Smith if (roworiented) { 431b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 432ff2fd236SBarry Smith } else { 433b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 4340bdbc534SSatish Balay } 4350bdbc534SSatish Balay } 4360bdbc534SSatish Balay } 4372515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 438187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 439187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 440187ce0cbSSatish Balay #endif 4410bdbc534SSatish Balay PetscFunctionReturn(0); 4420bdbc534SSatish Balay } 4430bdbc534SSatish Balay 4444a2ae208SSatish Balay #undef __FUNCT__ 44597e5c40aSBarry Smith #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 44697e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 4470bdbc534SSatish Balay { 4480bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 449ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 450dfbe8321SBarry Smith PetscErrorCode ierr; 451b24ad042SBarry Smith PetscInt i,j,ii,jj,row,col; 452899cda47SBarry Smith PetscInt rstart=baij->rstartbs; 453d0f46423SBarry Smith PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 454b24ad042SBarry Smith PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 455329f5518SBarry Smith PetscReal tmp; 4563eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 457dd6ea824SBarry Smith const PetscScalar *v_t,*value; 4582515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 459b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4604a15367fSSatish Balay #endif 4610bdbc534SSatish Balay 462d0a41580SSatish Balay PetscFunctionBegin; 46326fbe8dcSKarl Rupp if (roworiented) stepval = (n-1)*bs; 46426fbe8dcSKarl Rupp else stepval = (m-1)*bs; 46526fbe8dcSKarl Rupp 4660bdbc534SSatish Balay for (i=0; i<m; i++) { 4672515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 468e32f2f54SBarry Smith if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 469e32f2f54SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 4700bdbc534SSatish Balay #endif 4710bdbc534SSatish Balay row = im[i]; 472ab715e2cSSatish Balay v_t = v + i*nbs2; 473c2760754SSatish Balay if (row >= rstart && row < rend) { 4740bdbc534SSatish Balay for (j=0; j<n; j++) { 4750bdbc534SSatish Balay col = in[j]; 4760bdbc534SSatish Balay 4770bdbc534SSatish Balay /* Look up into the Hash Table */ 478c2760754SSatish Balay key = row*Nbs+col+1; 479c2760754SSatish Balay h1 = HASH(size,key,tmp); 4800bdbc534SSatish Balay 481c2760754SSatish Balay idx = h1; 4822515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 483187ce0cbSSatish Balay total_ct++; 484187ce0cbSSatish Balay insert_ct++; 485187ce0cbSSatish Balay if (HT[idx] != key) { 486187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 487187ce0cbSSatish Balay if (idx == size) { 488187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 489f23aa3ddSBarry Smith if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 490187ce0cbSSatish Balay } 491187ce0cbSSatish Balay } 492187ce0cbSSatish Balay #else 493c2760754SSatish Balay if (HT[idx] != key) { 494c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 495c2760754SSatish Balay if (idx == size) { 496c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 497f23aa3ddSBarry Smith if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 498c2760754SSatish Balay } 499c2760754SSatish Balay } 500187ce0cbSSatish Balay #endif 501c2760754SSatish Balay baij_a = HD[idx]; 5020bdbc534SSatish Balay if (roworiented) { 503c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 504187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 505187ce0cbSSatish Balay value = v_t; 506187ce0cbSSatish Balay v_t += bs; 507fef45726SSatish Balay if (addv == ADD_VALUES) { 508c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 509c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 510fef45726SSatish Balay baij_a[jj] += *value++; 511b4cc0f5aSSatish Balay } 512b4cc0f5aSSatish Balay } 513fef45726SSatish Balay } else { 514c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 515c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 516fef45726SSatish Balay baij_a[jj] = *value++; 517fef45726SSatish Balay } 518fef45726SSatish Balay } 519fef45726SSatish Balay } 5200bdbc534SSatish Balay } else { 5210bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 522fef45726SSatish Balay if (addv == ADD_VALUES) { 523b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 5240bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 525fef45726SSatish Balay baij_a[jj] += *value++; 526fef45726SSatish Balay } 527fef45726SSatish Balay } 528fef45726SSatish Balay } else { 529fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 530fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 531fef45726SSatish Balay baij_a[jj] = *value++; 532fef45726SSatish Balay } 533b4cc0f5aSSatish Balay } 5340bdbc534SSatish Balay } 5350bdbc534SSatish Balay } 5360bdbc534SSatish Balay } 5370bdbc534SSatish Balay } else { 5380bdbc534SSatish Balay if (!baij->donotstash) { 5390bdbc534SSatish Balay if (roworiented) { 5408798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 5410bdbc534SSatish Balay } else { 5428798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 5430bdbc534SSatish Balay } 5440bdbc534SSatish Balay } 5450bdbc534SSatish Balay } 5460bdbc534SSatish Balay } 5472515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 548187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 549187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 550187ce0cbSSatish Balay #endif 5510bdbc534SSatish Balay PetscFunctionReturn(0); 5520bdbc534SSatish Balay } 553133cdb44SSatish Balay 5544a2ae208SSatish Balay #undef __FUNCT__ 5554a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ" 556b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 557d6de1c52SSatish Balay { 558d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 5596849ba73SBarry Smith PetscErrorCode ierr; 560d0f46423SBarry Smith PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 561d0f46423SBarry Smith PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 562d6de1c52SSatish Balay 563133cdb44SSatish Balay PetscFunctionBegin; 564d6de1c52SSatish Balay for (i=0; i<m; i++) { 565e32f2f54SBarry Smith if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 566e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 567d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 568d6de1c52SSatish Balay row = idxm[i] - bsrstart; 569d6de1c52SSatish Balay for (j=0; j<n; j++) { 570e32f2f54SBarry Smith if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 571e32f2f54SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 572d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend) { 573d6de1c52SSatish Balay col = idxn[j] - bscstart; 57498dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 575d64ed03dSBarry Smith } else { 576905e6a2fSBarry Smith if (!baij->colmap) { 577ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 578905e6a2fSBarry Smith } 579aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 5800f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 581fa46199cSSatish Balay data--; 58248e59246SSatish Balay #else 58348e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 58448e59246SSatish Balay #endif 58548e59246SSatish Balay if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 586d9d09a02SSatish Balay else { 58748e59246SSatish Balay col = data + idxn[j]%bs; 58898dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589d6de1c52SSatish Balay } 590d6de1c52SSatish Balay } 591d6de1c52SSatish Balay } 592f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 593d6de1c52SSatish Balay } 5943a40ed3dSBarry Smith PetscFunctionReturn(0); 595d6de1c52SSatish Balay } 596d6de1c52SSatish Balay 5974a2ae208SSatish Balay #undef __FUNCT__ 5984a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ" 599dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 600d6de1c52SSatish Balay { 601d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 602d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 603dfbe8321SBarry Smith PetscErrorCode ierr; 604d0f46423SBarry Smith PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 605329f5518SBarry Smith PetscReal sum = 0.0; 6063eda8832SBarry Smith MatScalar *v; 607d6de1c52SSatish Balay 608d64ed03dSBarry Smith PetscFunctionBegin; 609d6de1c52SSatish Balay if (baij->size == 1) { 610064f8208SBarry Smith ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 611d6de1c52SSatish Balay } else { 612d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 613d6de1c52SSatish Balay v = amat->a; 6148a62d963SHong Zhang nz = amat->nz*bs2; 6158a62d963SHong Zhang for (i=0; i<nz; i++) { 616329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 617d6de1c52SSatish Balay } 618d6de1c52SSatish Balay v = bmat->a; 6198a62d963SHong Zhang nz = bmat->nz*bs2; 6208a62d963SHong Zhang for (i=0; i<nz; i++) { 621329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 622d6de1c52SSatish Balay } 623ce94432eSBarry Smith ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6248f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 6258a62d963SHong Zhang } else if (type == NORM_1) { /* max column sum */ 6268a62d963SHong Zhang PetscReal *tmp,*tmp2; 627899cda47SBarry Smith PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 628dcca6d9dSJed Brown ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 629d0f46423SBarry Smith ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 6308a62d963SHong Zhang v = amat->a; jj = amat->j; 6318a62d963SHong Zhang for (i=0; i<amat->nz; i++) { 6328a62d963SHong Zhang for (j=0; j<bs; j++) { 6338a62d963SHong Zhang col = bs*(cstart + *jj) + j; /* column index */ 6348a62d963SHong Zhang for (row=0; row<bs; row++) { 6358a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 6368a62d963SHong Zhang } 6378a62d963SHong Zhang } 6388a62d963SHong Zhang jj++; 6398a62d963SHong Zhang } 6408a62d963SHong Zhang v = bmat->a; jj = bmat->j; 6418a62d963SHong Zhang for (i=0; i<bmat->nz; i++) { 6428a62d963SHong Zhang for (j=0; j<bs; j++) { 6438a62d963SHong Zhang col = bs*garray[*jj] + j; 6448a62d963SHong Zhang for (row=0; row<bs; row++) { 6458a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 6468a62d963SHong Zhang } 6478a62d963SHong Zhang } 6488a62d963SHong Zhang jj++; 6498a62d963SHong Zhang } 650ce94432eSBarry Smith ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6518a62d963SHong Zhang *nrm = 0.0; 652d0f46423SBarry Smith for (j=0; j<mat->cmap->N; j++) { 6538a62d963SHong Zhang if (tmp2[j] > *nrm) *nrm = tmp2[j]; 6548a62d963SHong Zhang } 655fca92195SBarry Smith ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 6568a62d963SHong Zhang } else if (type == NORM_INFINITY) { /* max row sum */ 657577dd1f9SKris Buschelman PetscReal *sums; 658785e854fSJed Brown ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 6598a62d963SHong Zhang sum = 0.0; 6608a62d963SHong Zhang for (j=0; j<amat->mbs; j++) { 6618a62d963SHong Zhang for (row=0; row<bs; row++) sums[row] = 0.0; 6628a62d963SHong Zhang v = amat->a + bs2*amat->i[j]; 6638a62d963SHong Zhang nz = amat->i[j+1]-amat->i[j]; 6648a62d963SHong Zhang for (i=0; i<nz; i++) { 6658a62d963SHong Zhang for (col=0; col<bs; col++) { 6668a62d963SHong Zhang for (row=0; row<bs; row++) { 6678a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 6688a62d963SHong Zhang } 6698a62d963SHong Zhang } 6708a62d963SHong Zhang } 6718a62d963SHong Zhang v = bmat->a + bs2*bmat->i[j]; 6728a62d963SHong Zhang nz = bmat->i[j+1]-bmat->i[j]; 6738a62d963SHong Zhang for (i=0; i<nz; i++) { 6748a62d963SHong Zhang for (col=0; col<bs; col++) { 6758a62d963SHong Zhang for (row=0; row<bs; row++) { 6768a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 6778a62d963SHong Zhang } 6788a62d963SHong Zhang } 6798a62d963SHong Zhang } 6808a62d963SHong Zhang for (row=0; row<bs; row++) { 6818a62d963SHong Zhang if (sums[row] > sum) sum = sums[row]; 6828a62d963SHong Zhang } 6838a62d963SHong Zhang } 684ce94432eSBarry Smith ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 685577dd1f9SKris Buschelman ierr = PetscFree(sums);CHKERRQ(ierr); 686ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 687d64ed03dSBarry Smith } 6883a40ed3dSBarry Smith PetscFunctionReturn(0); 689d6de1c52SSatish Balay } 69057b952d6SSatish Balay 691fef45726SSatish Balay /* 692fef45726SSatish Balay Creates the hash table, and sets the table 693fef45726SSatish Balay This table is created only once. 694fef45726SSatish Balay If new entried need to be added to the matrix 695fef45726SSatish Balay then the hash table has to be destroyed and 696fef45726SSatish Balay recreated. 697fef45726SSatish Balay */ 6984a2ae208SSatish Balay #undef __FUNCT__ 6994a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 700dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 701596b8d2eSBarry Smith { 702596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 703596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 704596b8d2eSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 705b24ad042SBarry Smith PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 7066849ba73SBarry Smith PetscErrorCode ierr; 707fca92195SBarry Smith PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 708899cda47SBarry Smith PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 709b24ad042SBarry Smith PetscInt *HT,key; 7103eda8832SBarry Smith MatScalar **HD; 711329f5518SBarry Smith PetscReal tmp; 7126cf91177SBarry Smith #if defined(PETSC_USE_INFO) 713b24ad042SBarry Smith PetscInt ct=0,max=0; 7144a15367fSSatish Balay #endif 715fef45726SSatish Balay 716d64ed03dSBarry Smith PetscFunctionBegin; 717fca92195SBarry Smith if (baij->ht) PetscFunctionReturn(0); 718fef45726SSatish Balay 719fca92195SBarry Smith baij->ht_size = (PetscInt)(factor*nz); 720fca92195SBarry Smith ht_size = baij->ht_size; 7210bdbc534SSatish Balay 722fef45726SSatish Balay /* Allocate Memory for Hash Table */ 7231795a4d1SJed Brown ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 724b9e4cc15SSatish Balay HD = baij->hd; 725a07cd24cSSatish Balay HT = baij->ht; 726b9e4cc15SSatish Balay 727596b8d2eSBarry Smith /* Loop Over A */ 7280bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 729596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 7300bdbc534SSatish Balay row = i+rstart; 7310bdbc534SSatish Balay col = aj[j]+cstart; 732596b8d2eSBarry Smith 733187ce0cbSSatish Balay key = row*Nbs + col + 1; 734fca92195SBarry Smith h1 = HASH(ht_size,key,tmp); 735fca92195SBarry Smith for (k=0; k<ht_size; k++) { 736fca92195SBarry Smith if (!HT[(h1+k)%ht_size]) { 737fca92195SBarry Smith HT[(h1+k)%ht_size] = key; 738fca92195SBarry Smith HD[(h1+k)%ht_size] = a->a + j*bs2; 739596b8d2eSBarry Smith break; 7406cf91177SBarry Smith #if defined(PETSC_USE_INFO) 741187ce0cbSSatish Balay } else { 742187ce0cbSSatish Balay ct++; 743187ce0cbSSatish Balay #endif 744596b8d2eSBarry Smith } 745187ce0cbSSatish Balay } 7466cf91177SBarry Smith #if defined(PETSC_USE_INFO) 747187ce0cbSSatish Balay if (k> max) max = k; 748187ce0cbSSatish Balay #endif 749596b8d2eSBarry Smith } 750596b8d2eSBarry Smith } 751596b8d2eSBarry Smith /* Loop Over B */ 7520bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 753596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 7540bdbc534SSatish Balay row = i+rstart; 7550bdbc534SSatish Balay col = garray[bj[j]]; 756187ce0cbSSatish Balay key = row*Nbs + col + 1; 757fca92195SBarry Smith h1 = HASH(ht_size,key,tmp); 758fca92195SBarry Smith for (k=0; k<ht_size; k++) { 759fca92195SBarry Smith if (!HT[(h1+k)%ht_size]) { 760fca92195SBarry Smith HT[(h1+k)%ht_size] = key; 761fca92195SBarry Smith HD[(h1+k)%ht_size] = b->a + j*bs2; 762596b8d2eSBarry Smith break; 7636cf91177SBarry Smith #if defined(PETSC_USE_INFO) 764187ce0cbSSatish Balay } else { 765187ce0cbSSatish Balay ct++; 766187ce0cbSSatish Balay #endif 767596b8d2eSBarry Smith } 768187ce0cbSSatish Balay } 7696cf91177SBarry Smith #if defined(PETSC_USE_INFO) 770187ce0cbSSatish Balay if (k> max) max = k; 771187ce0cbSSatish Balay #endif 772596b8d2eSBarry Smith } 773596b8d2eSBarry Smith } 774596b8d2eSBarry Smith 775596b8d2eSBarry Smith /* Print Summary */ 7766cf91177SBarry Smith #if defined(PETSC_USE_INFO) 777fca92195SBarry Smith for (i=0,j=0; i<ht_size; i++) { 77826fbe8dcSKarl Rupp if (HT[i]) j++; 779c38d4ed2SBarry Smith } 7801e2582c4SBarry Smith ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 781187ce0cbSSatish Balay #endif 7823a40ed3dSBarry Smith PetscFunctionReturn(0); 783596b8d2eSBarry Smith } 78457b952d6SSatish Balay 7854a2ae208SSatish Balay #undef __FUNCT__ 7864a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 787dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 788bbb85fb3SSatish Balay { 789bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 790dfbe8321SBarry Smith PetscErrorCode ierr; 791b24ad042SBarry Smith PetscInt nstash,reallocs; 792bbb85fb3SSatish Balay InsertMode addv; 793bbb85fb3SSatish Balay 794bbb85fb3SSatish Balay PetscFunctionBegin; 79526fbe8dcSKarl Rupp if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 796bbb85fb3SSatish Balay 797bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 798ce94432eSBarry Smith ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 799ce94432eSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 800bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 801bbb85fb3SSatish Balay 802d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 8031e2582c4SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 8048798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 8051e2582c4SBarry Smith ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 80646680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 8071e2582c4SBarry Smith ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 808bbb85fb3SSatish Balay PetscFunctionReturn(0); 809bbb85fb3SSatish Balay } 810bbb85fb3SSatish Balay 8114a2ae208SSatish Balay #undef __FUNCT__ 8124a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 813dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 814bbb85fb3SSatish Balay { 815bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 81691c97fd4SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 8176849ba73SBarry Smith PetscErrorCode ierr; 818b24ad042SBarry Smith PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 819e44c0bd4SBarry Smith PetscInt *row,*col; 820ace3abfcSBarry Smith PetscBool r1,r2,r3,other_disassembled; 8213eda8832SBarry Smith MatScalar *val; 822bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 823b24ad042SBarry Smith PetscMPIInt n; 824bbb85fb3SSatish Balay 825bbb85fb3SSatish Balay PetscFunctionBegin; 8265fd66863SKarl Rupp /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 8274cb17eb5SBarry Smith if (!baij->donotstash && !mat->nooffprocentries) { 828a2d1c673SSatish Balay while (1) { 8298798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 830a2d1c673SSatish Balay if (!flg) break; 831a2d1c673SSatish Balay 832bbb85fb3SSatish Balay for (i=0; i<n;) { 833bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 83426fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 83526fbe8dcSKarl Rupp if (row[j] != rstart) break; 83626fbe8dcSKarl Rupp } 837bbb85fb3SSatish Balay if (j < n) ncols = j-i; 838bbb85fb3SSatish Balay else ncols = n-i; 839bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 84097e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 841bbb85fb3SSatish Balay i = j; 842bbb85fb3SSatish Balay } 843bbb85fb3SSatish Balay } 8448798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 845a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 846a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 847a2d1c673SSatish Balay restore the original flags */ 848a2d1c673SSatish Balay r1 = baij->roworiented; 849a2d1c673SSatish Balay r2 = a->roworiented; 85091c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 85126fbe8dcSKarl Rupp 8527c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 8537c922b88SBarry Smith a->roworiented = PETSC_FALSE; 85426fbe8dcSKarl Rupp 85591c97fd4SSatish Balay (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 856a2d1c673SSatish Balay while (1) { 8578798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 858a2d1c673SSatish Balay if (!flg) break; 859a2d1c673SSatish Balay 860a2d1c673SSatish Balay for (i=0; i<n;) { 861a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 86226fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 86326fbe8dcSKarl Rupp if (row[j] != rstart) break; 86426fbe8dcSKarl Rupp } 865a2d1c673SSatish Balay if (j < n) ncols = j-i; 866a2d1c673SSatish Balay else ncols = n-i; 86797e5c40aSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 868a2d1c673SSatish Balay i = j; 869a2d1c673SSatish Balay } 870a2d1c673SSatish Balay } 8718798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 87226fbe8dcSKarl Rupp 873a2d1c673SSatish Balay baij->roworiented = r1; 874a2d1c673SSatish Balay a->roworiented = r2; 87526fbe8dcSKarl Rupp 87691c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 877bbb85fb3SSatish Balay } 878bbb85fb3SSatish Balay 879bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 880bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 881bbb85fb3SSatish Balay 882bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 883bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 884bbb85fb3SSatish Balay /* 885bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 886bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 887bbb85fb3SSatish Balay */ 888bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 889ce94432eSBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 890bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 891ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 892bbb85fb3SSatish Balay } 893bbb85fb3SSatish Balay } 894bbb85fb3SSatish Balay 895bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 896bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 897bbb85fb3SSatish Balay } 898bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 899bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 900bbb85fb3SSatish Balay 9016cf91177SBarry Smith #if defined(PETSC_USE_INFO) 902bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 9031e2582c4SBarry Smith ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 90426fbe8dcSKarl Rupp 905bbb85fb3SSatish Balay baij->ht_total_ct = 0; 906bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 907bbb85fb3SSatish Balay } 908bbb85fb3SSatish Balay #endif 909bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 910bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 91126fbe8dcSKarl Rupp 912bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 913bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 914bbb85fb3SSatish Balay } 915bbb85fb3SSatish Balay 916fca92195SBarry Smith ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 91726fbe8dcSKarl Rupp 918606d414cSSatish Balay baij->rowvalues = 0; 9194f9cfa9eSBarry Smith 9204f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 9214f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 922e56f5c9eSBarry Smith PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 92309e82e2bSBarry Smith ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 924e56f5c9eSBarry Smith } 925bbb85fb3SSatish Balay PetscFunctionReturn(0); 926bbb85fb3SSatish Balay } 92757b952d6SSatish Balay 9287da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 9299804daf3SBarry Smith #include <petscdraw.h> 9304a2ae208SSatish Balay #undef __FUNCT__ 9314a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 9326849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 93357b952d6SSatish Balay { 93457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 935dfbe8321SBarry Smith PetscErrorCode ierr; 9367da1fb6eSBarry Smith PetscMPIInt rank = baij->rank; 937d0f46423SBarry Smith PetscInt bs = mat->rmap->bs; 938ace3abfcSBarry Smith PetscBool iascii,isdraw; 939b0a32e0cSBarry Smith PetscViewer sviewer; 940f3ef73ceSBarry Smith PetscViewerFormat format; 94157b952d6SSatish Balay 942d64ed03dSBarry Smith PetscFunctionBegin; 943251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 944251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 94532077d6dSBarry Smith if (iascii) { 946b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 947456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 9484e220ebcSLois Curfman McInnes MatInfo info; 949ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 950d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 9517b23a99aSBarry Smith ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 95277431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 95316608c43SJed Brown rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 954d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 955e6dd01d4SJed Brown ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 956d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 957e6dd01d4SJed Brown ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 958b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 9597b23a99aSBarry Smith ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 96007d81ca4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 96157b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 9623a40ed3dSBarry Smith PetscFunctionReturn(0); 963fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 96477431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 9653a40ed3dSBarry Smith PetscFunctionReturn(0); 96604929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 96704929863SHong Zhang PetscFunctionReturn(0); 96857b952d6SSatish Balay } 96957b952d6SSatish Balay } 97057b952d6SSatish Balay 9710f5bd95cSBarry Smith if (isdraw) { 972b0a32e0cSBarry Smith PetscDraw draw; 973ace3abfcSBarry Smith PetscBool isnull; 974b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 975b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 97657b952d6SSatish Balay } 97757b952d6SSatish Balay 9787da1fb6eSBarry Smith { 97957b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 98057b952d6SSatish Balay Mat A; 98157b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 982d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 9833eda8832SBarry Smith MatScalar *a; 9843e219373SBarry Smith const char *matname; 98557b952d6SSatish Balay 986f204ca49SKris Buschelman /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 987f204ca49SKris Buschelman /* Perhaps this should be the type of mat? */ 988ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 98957b952d6SSatish Balay if (!rank) { 990f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 991d64ed03dSBarry Smith } else { 992f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 99357b952d6SSatish Balay } 994f204ca49SKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 9950298fd71SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 9962b82e772SSatish Balay ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 9973bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 99857b952d6SSatish Balay 99957b952d6SSatish Balay /* copy over the A part */ 100057b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 100157b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1002785e854fSJed Brown ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 100357b952d6SSatish Balay 100457b952d6SSatish Balay for (i=0; i<mbs; i++) { 1005899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 100626fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 100757b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1008899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 100957b952d6SSatish Balay for (k=0; k<bs; k++) { 101097e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1011cee3aa6bSSatish Balay col++; a += bs; 101257b952d6SSatish Balay } 101357b952d6SSatish Balay } 101457b952d6SSatish Balay } 101557b952d6SSatish Balay /* copy over the B part */ 101657b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 101757b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 101857b952d6SSatish Balay for (i=0; i<mbs; i++) { 1019899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 102026fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 102157b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 102257b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 102357b952d6SSatish Balay for (k=0; k<bs; k++) { 102497e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1025cee3aa6bSSatish Balay col++; a += bs; 102657b952d6SSatish Balay } 102757b952d6SSatish Balay } 102857b952d6SSatish Balay } 1029606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 10306d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 10316d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 103255843e3eSBarry Smith /* 103355843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1034b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 103555843e3eSBarry Smith */ 1036b0a32e0cSBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1037ade3a672SBarry Smith ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 10383e219373SBarry Smith if (!rank) { 1039ade3a672SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 10407da1fb6eSBarry Smith ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 104157b952d6SSatish Balay } 1042b0a32e0cSBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 10436bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 104457b952d6SSatish Balay } 10453a40ed3dSBarry Smith PetscFunctionReturn(0); 104657b952d6SSatish Balay } 104757b952d6SSatish Balay 10484a2ae208SSatish Balay #undef __FUNCT__ 1049660746e0SBarry Smith #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1050660746e0SBarry Smith static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1051660746e0SBarry Smith { 1052660746e0SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1053660746e0SBarry Smith Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1054660746e0SBarry Smith Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1055660746e0SBarry Smith PetscErrorCode ierr; 10565f48b12bSBarry Smith PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1057e96a6426SSatish Balay PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1058660746e0SBarry Smith int fd; 1059660746e0SBarry Smith PetscScalar *column_values; 1060660746e0SBarry Smith FILE *file; 1061660746e0SBarry Smith PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1062638eb2ebSBarry Smith PetscInt message_count,flowcontrolcount; 1063660746e0SBarry Smith 1064660746e0SBarry Smith PetscFunctionBegin; 1065ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1066ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1067660746e0SBarry Smith nz = bs2*(A->nz + B->nz); 1068660746e0SBarry Smith rlen = mat->rmap->n; 1069660746e0SBarry Smith if (!rank) { 1070660746e0SBarry Smith header[0] = MAT_FILE_CLASSID; 1071660746e0SBarry Smith header[1] = mat->rmap->N; 1072660746e0SBarry Smith header[2] = mat->cmap->N; 107326fbe8dcSKarl Rupp 1074ce94432eSBarry Smith ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1075660746e0SBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1076660746e0SBarry Smith ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1077660746e0SBarry Smith /* get largest number of rows any processor has */ 1078660746e0SBarry Smith range = mat->rmap->range; 1079660746e0SBarry Smith for (i=1; i<size; i++) { 1080660746e0SBarry Smith rlen = PetscMax(rlen,range[i+1] - range[i]); 1081660746e0SBarry Smith } 1082660746e0SBarry Smith } else { 1083ce94432eSBarry Smith ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1084660746e0SBarry Smith } 1085660746e0SBarry Smith 1086785e854fSJed Brown ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr); 1087660746e0SBarry Smith /* compute lengths of each row */ 1088660746e0SBarry Smith for (i=0; i<a->mbs; i++) { 1089660746e0SBarry Smith crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1090660746e0SBarry Smith } 1091660746e0SBarry Smith /* store the row lengths to the file */ 1092638eb2ebSBarry Smith ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1093660746e0SBarry Smith if (!rank) { 1094660746e0SBarry Smith MPI_Status status; 1095785e854fSJed Brown ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1096660746e0SBarry Smith rlen = (range[1] - range[0])/bs; 1097660746e0SBarry Smith for (i=0; i<rlen; i++) { 1098660746e0SBarry Smith for (j=0; j<bs; j++) { 1099660746e0SBarry Smith row_lens[i*bs+j] = bs*crow_lens[i]; 1100660746e0SBarry Smith } 1101660746e0SBarry Smith } 1102660746e0SBarry Smith ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1103660746e0SBarry Smith for (i=1; i<size; i++) { 1104660746e0SBarry Smith rlen = (range[i+1] - range[i])/bs; 1105639ff905SBarry Smith ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1106ce94432eSBarry Smith ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1107660746e0SBarry Smith for (k=0; k<rlen; k++) { 1108660746e0SBarry Smith for (j=0; j<bs; j++) { 1109660746e0SBarry Smith row_lens[k*bs+j] = bs*crow_lens[k]; 1110660746e0SBarry Smith } 1111660746e0SBarry Smith } 1112660746e0SBarry Smith ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1113660746e0SBarry Smith } 1114639ff905SBarry Smith ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1115660746e0SBarry Smith ierr = PetscFree(row_lens);CHKERRQ(ierr); 1116660746e0SBarry Smith } else { 1117639ff905SBarry Smith ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1118ce94432eSBarry Smith ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1119639ff905SBarry Smith ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1120660746e0SBarry Smith } 1121660746e0SBarry Smith ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1122660746e0SBarry Smith 1123660746e0SBarry Smith /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1124660746e0SBarry Smith information needed to make it for each row from a block row. This does require more communication but still not more than 1125660746e0SBarry Smith the communication needed for the nonzero values */ 1126660746e0SBarry Smith nzmax = nz; /* space a largest processor needs */ 1127ce94432eSBarry Smith ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1128785e854fSJed Brown ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1129660746e0SBarry Smith cnt = 0; 1130660746e0SBarry Smith for (i=0; i<a->mbs; i++) { 1131660746e0SBarry Smith pcnt = cnt; 1132660746e0SBarry Smith for (j=B->i[i]; j<B->i[i+1]; j++) { 1133660746e0SBarry Smith if ((col = garray[B->j[j]]) > cstart) break; 1134660746e0SBarry Smith for (l=0; l<bs; l++) { 1135660746e0SBarry Smith column_indices[cnt++] = bs*col+l; 1136660746e0SBarry Smith } 1137660746e0SBarry Smith } 1138660746e0SBarry Smith for (k=A->i[i]; k<A->i[i+1]; k++) { 1139660746e0SBarry Smith for (l=0; l<bs; l++) { 1140660746e0SBarry Smith column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1141660746e0SBarry Smith } 1142660746e0SBarry Smith } 1143660746e0SBarry Smith for (; j<B->i[i+1]; j++) { 1144660746e0SBarry Smith for (l=0; l<bs; l++) { 1145660746e0SBarry Smith column_indices[cnt++] = bs*garray[B->j[j]]+l; 1146660746e0SBarry Smith } 1147660746e0SBarry Smith } 1148660746e0SBarry Smith len = cnt - pcnt; 1149660746e0SBarry Smith for (k=1; k<bs; k++) { 1150660746e0SBarry Smith ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1151660746e0SBarry Smith cnt += len; 1152660746e0SBarry Smith } 1153660746e0SBarry Smith } 1154660746e0SBarry Smith if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1155660746e0SBarry Smith 1156660746e0SBarry Smith /* store the columns to the file */ 1157638eb2ebSBarry Smith ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1158660746e0SBarry Smith if (!rank) { 1159660746e0SBarry Smith MPI_Status status; 1160660746e0SBarry Smith ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1161660746e0SBarry Smith for (i=1; i<size; i++) { 1162639ff905SBarry Smith ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1163ce94432eSBarry Smith ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1164ce94432eSBarry Smith ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1165660746e0SBarry Smith ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1166660746e0SBarry Smith } 1167639ff905SBarry Smith ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1168660746e0SBarry Smith } else { 1169639ff905SBarry Smith ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1170ce94432eSBarry Smith ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1171ce94432eSBarry Smith ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1172639ff905SBarry Smith ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1173660746e0SBarry Smith } 1174660746e0SBarry Smith ierr = PetscFree(column_indices);CHKERRQ(ierr); 1175660746e0SBarry Smith 1176660746e0SBarry Smith /* load up the numerical values */ 1177785e854fSJed Brown ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1178660746e0SBarry Smith cnt = 0; 1179660746e0SBarry Smith for (i=0; i<a->mbs; i++) { 1180660746e0SBarry Smith rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1181660746e0SBarry Smith for (j=B->i[i]; j<B->i[i+1]; j++) { 1182660746e0SBarry Smith if (garray[B->j[j]] > cstart) break; 1183660746e0SBarry Smith for (l=0; l<bs; l++) { 1184660746e0SBarry Smith for (ll=0; ll<bs; ll++) { 1185660746e0SBarry Smith column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1186660746e0SBarry Smith } 1187660746e0SBarry Smith } 1188660746e0SBarry Smith cnt += bs; 1189660746e0SBarry Smith } 1190660746e0SBarry Smith for (k=A->i[i]; k<A->i[i+1]; k++) { 1191660746e0SBarry Smith for (l=0; l<bs; l++) { 1192660746e0SBarry Smith for (ll=0; ll<bs; ll++) { 1193660746e0SBarry Smith column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1194660746e0SBarry Smith } 1195660746e0SBarry Smith } 1196660746e0SBarry Smith cnt += bs; 1197660746e0SBarry Smith } 1198660746e0SBarry Smith for (; j<B->i[i+1]; j++) { 1199660746e0SBarry Smith for (l=0; l<bs; l++) { 1200660746e0SBarry Smith for (ll=0; ll<bs; ll++) { 1201660746e0SBarry Smith column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1202660746e0SBarry Smith } 1203660746e0SBarry Smith } 1204660746e0SBarry Smith cnt += bs; 1205660746e0SBarry Smith } 1206660746e0SBarry Smith cnt += (bs-1)*rlen; 1207660746e0SBarry Smith } 1208660746e0SBarry Smith if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1209660746e0SBarry Smith 1210660746e0SBarry Smith /* store the column values to the file */ 1211638eb2ebSBarry Smith ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1212660746e0SBarry Smith if (!rank) { 1213660746e0SBarry Smith MPI_Status status; 1214660746e0SBarry Smith ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1215660746e0SBarry Smith for (i=1; i<size; i++) { 1216639ff905SBarry Smith ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1217ce94432eSBarry Smith ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1218ce94432eSBarry Smith ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1219660746e0SBarry Smith ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1220660746e0SBarry Smith } 1221639ff905SBarry Smith ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1222660746e0SBarry Smith } else { 1223639ff905SBarry Smith ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1224ce94432eSBarry Smith ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225ce94432eSBarry Smith ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226639ff905SBarry Smith ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1227660746e0SBarry Smith } 1228660746e0SBarry Smith ierr = PetscFree(column_values);CHKERRQ(ierr); 1229660746e0SBarry Smith 1230660746e0SBarry Smith ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1231660746e0SBarry Smith if (file) { 1232660746e0SBarry Smith fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1233660746e0SBarry Smith } 1234660746e0SBarry Smith PetscFunctionReturn(0); 1235660746e0SBarry Smith } 1236660746e0SBarry Smith 1237660746e0SBarry Smith #undef __FUNCT__ 12384a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ" 1239dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 124057b952d6SSatish Balay { 1241dfbe8321SBarry Smith PetscErrorCode ierr; 1242ace3abfcSBarry Smith PetscBool iascii,isdraw,issocket,isbinary; 124357b952d6SSatish Balay 1244d64ed03dSBarry Smith PetscFunctionBegin; 1245251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1246251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1247251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1248251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1249660746e0SBarry Smith if (iascii || isdraw || issocket) { 12507b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1251660746e0SBarry Smith } else if (isbinary) { 1252660746e0SBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 125357b952d6SSatish Balay } 12543a40ed3dSBarry Smith PetscFunctionReturn(0); 125557b952d6SSatish Balay } 125657b952d6SSatish Balay 12574a2ae208SSatish Balay #undef __FUNCT__ 12584a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ" 1259dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 126079bdfe76SSatish Balay { 126179bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1262dfbe8321SBarry Smith PetscErrorCode ierr; 126379bdfe76SSatish Balay 1264d64ed03dSBarry Smith PetscFunctionBegin; 1265aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1266d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 126779bdfe76SSatish Balay #endif 12688798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12698798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 12706bf464f9SBarry Smith ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 12716bf464f9SBarry Smith ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1272aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 12736bc0bbbfSBarry Smith ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 127448e59246SSatish Balay #else 127505b42c5fSBarry Smith ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 127648e59246SSatish Balay #endif 127705b42c5fSBarry Smith ierr = PetscFree(baij->garray);CHKERRQ(ierr); 12786bf464f9SBarry Smith ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 12796bf464f9SBarry Smith ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1280fca92195SBarry Smith ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 128105b42c5fSBarry Smith ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1282fca92195SBarry Smith ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1283899cda47SBarry Smith ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1284bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 1285901853e0SKris Buschelman 1286dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1287bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1288bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1289bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1290bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1291bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1292bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1293bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1294bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1295bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 12963a40ed3dSBarry Smith PetscFunctionReturn(0); 129779bdfe76SSatish Balay } 129879bdfe76SSatish Balay 12994a2ae208SSatish Balay #undef __FUNCT__ 13004a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ" 1301dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1302cee3aa6bSSatish Balay { 1303cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1304dfbe8321SBarry Smith PetscErrorCode ierr; 1305b24ad042SBarry Smith PetscInt nt; 1306cee3aa6bSSatish Balay 1307d64ed03dSBarry Smith PetscFunctionBegin; 1308e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1309e7e72b3dSBarry Smith if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1310e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1311e7e72b3dSBarry Smith if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1312ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1313f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1314ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1315f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 13163a40ed3dSBarry Smith PetscFunctionReturn(0); 1317cee3aa6bSSatish Balay } 1318cee3aa6bSSatish Balay 13194a2ae208SSatish Balay #undef __FUNCT__ 13204a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1321dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1322cee3aa6bSSatish Balay { 1323cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1324dfbe8321SBarry Smith PetscErrorCode ierr; 1325d64ed03dSBarry Smith 1326d64ed03dSBarry Smith PetscFunctionBegin; 1327ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1328f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1329ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1330f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 13313a40ed3dSBarry Smith PetscFunctionReturn(0); 1332cee3aa6bSSatish Balay } 1333cee3aa6bSSatish Balay 13344a2ae208SSatish Balay #undef __FUNCT__ 13354a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1336dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1337cee3aa6bSSatish Balay { 1338cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1339dfbe8321SBarry Smith PetscErrorCode ierr; 1340ace3abfcSBarry Smith PetscBool merged; 1341cee3aa6bSSatish Balay 1342d64ed03dSBarry Smith PetscFunctionBegin; 1343a5ff213dSBarry Smith ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1344cee3aa6bSSatish Balay /* do nondiagonal part */ 13457c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1346a5ff213dSBarry Smith if (!merged) { 1347cee3aa6bSSatish Balay /* send it on its way */ 1348ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1349cee3aa6bSSatish Balay /* do local part */ 13507c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1351cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1352a5ff213dSBarry Smith /* inserted in yy until the next line */ 1353ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1354a5ff213dSBarry Smith } else { 1355a5ff213dSBarry Smith /* do local part */ 1356a5ff213dSBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1357a5ff213dSBarry Smith /* send it on its way */ 1358ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1359a5ff213dSBarry Smith /* values actually were received in the Begin() but we need to call this nop */ 1360ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1361a5ff213dSBarry Smith } 13623a40ed3dSBarry Smith PetscFunctionReturn(0); 1363cee3aa6bSSatish Balay } 1364cee3aa6bSSatish Balay 13654a2ae208SSatish Balay #undef __FUNCT__ 13664a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1367dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1368cee3aa6bSSatish Balay { 1369cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1370dfbe8321SBarry Smith PetscErrorCode ierr; 1371cee3aa6bSSatish Balay 1372d64ed03dSBarry Smith PetscFunctionBegin; 1373cee3aa6bSSatish Balay /* do nondiagonal part */ 13747c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1375cee3aa6bSSatish Balay /* send it on its way */ 1376ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1377cee3aa6bSSatish Balay /* do local part */ 13787c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1379cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1380cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1381cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1382ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 13833a40ed3dSBarry Smith PetscFunctionReturn(0); 1384cee3aa6bSSatish Balay } 1385cee3aa6bSSatish Balay 1386cee3aa6bSSatish Balay /* 1387cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1388cee3aa6bSSatish Balay diagonal block 1389cee3aa6bSSatish Balay */ 13904a2ae208SSatish Balay #undef __FUNCT__ 13914a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1392dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1393cee3aa6bSSatish Balay { 1394cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1395dfbe8321SBarry Smith PetscErrorCode ierr; 1396d64ed03dSBarry Smith 1397d64ed03dSBarry Smith PetscFunctionBegin; 1398e32f2f54SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13993a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 14003a40ed3dSBarry Smith PetscFunctionReturn(0); 1401cee3aa6bSSatish Balay } 1402cee3aa6bSSatish Balay 14034a2ae208SSatish Balay #undef __FUNCT__ 14044a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ" 1405f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1406cee3aa6bSSatish Balay { 1407cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1408dfbe8321SBarry Smith PetscErrorCode ierr; 1409d64ed03dSBarry Smith 1410d64ed03dSBarry Smith PetscFunctionBegin; 1411f4df32b1SMatthew Knepley ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1412f4df32b1SMatthew Knepley ierr = MatScale(a->B,aa);CHKERRQ(ierr); 14133a40ed3dSBarry Smith PetscFunctionReturn(0); 1414cee3aa6bSSatish Balay } 1415026e39d0SSatish Balay 14164a2ae208SSatish Balay #undef __FUNCT__ 14174a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ" 1418b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1419acdf5bf4SSatish Balay { 1420acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 142187828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 14226849ba73SBarry Smith PetscErrorCode ierr; 1423d0f46423SBarry Smith PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1424d0f46423SBarry Smith PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1425899cda47SBarry Smith PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1426acdf5bf4SSatish Balay 1427d64ed03dSBarry Smith PetscFunctionBegin; 1428e7e72b3dSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1429e32f2f54SBarry Smith if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1430acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1431acdf5bf4SSatish Balay 1432acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1433acdf5bf4SSatish Balay /* 1434acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1435acdf5bf4SSatish Balay */ 1436acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1437b24ad042SBarry Smith PetscInt max = 1,mbs = mat->mbs,tmp; 1438bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1439acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 144026fbe8dcSKarl Rupp if (max < tmp) max = tmp; 1441acdf5bf4SSatish Balay } 1442dcca6d9dSJed Brown ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1443acdf5bf4SSatish Balay } 1444d9d09a02SSatish Balay lrow = row - brstart; 1445acdf5bf4SSatish Balay 1446acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1447acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1448acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1449f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1450f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1451acdf5bf4SSatish Balay nztot = nzA + nzB; 1452acdf5bf4SSatish Balay 1453acdf5bf4SSatish Balay cmap = mat->garray; 1454acdf5bf4SSatish Balay if (v || idx) { 1455acdf5bf4SSatish Balay if (nztot) { 1456acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1457b24ad042SBarry Smith PetscInt imark = -1; 1458acdf5bf4SSatish Balay if (v) { 1459acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1460acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1461d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1462acdf5bf4SSatish Balay else break; 1463acdf5bf4SSatish Balay } 1464acdf5bf4SSatish Balay imark = i; 1465acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1466acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1467acdf5bf4SSatish Balay } 1468acdf5bf4SSatish Balay if (idx) { 1469acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1470acdf5bf4SSatish Balay if (imark > -1) { 1471acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1472bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1473acdf5bf4SSatish Balay } 1474acdf5bf4SSatish Balay } else { 1475acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 147626fbe8dcSKarl Rupp if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1477acdf5bf4SSatish Balay else break; 1478acdf5bf4SSatish Balay } 1479acdf5bf4SSatish Balay imark = i; 1480acdf5bf4SSatish Balay } 1481d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1482d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1483acdf5bf4SSatish Balay } 1484d64ed03dSBarry Smith } else { 1485d212a18eSSatish Balay if (idx) *idx = 0; 1486d212a18eSSatish Balay if (v) *v = 0; 1487d212a18eSSatish Balay } 1488acdf5bf4SSatish Balay } 1489acdf5bf4SSatish Balay *nz = nztot; 1490f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1491f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14923a40ed3dSBarry Smith PetscFunctionReturn(0); 1493acdf5bf4SSatish Balay } 1494acdf5bf4SSatish Balay 14954a2ae208SSatish Balay #undef __FUNCT__ 14964a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1497b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1498acdf5bf4SSatish Balay { 1499acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1500d64ed03dSBarry Smith 1501d64ed03dSBarry Smith PetscFunctionBegin; 1502e7e72b3dSBarry Smith if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1503acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 15043a40ed3dSBarry Smith PetscFunctionReturn(0); 1505acdf5bf4SSatish Balay } 1506acdf5bf4SSatish Balay 15074a2ae208SSatish Balay #undef __FUNCT__ 15084a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1509dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 151058667388SSatish Balay { 151158667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1512dfbe8321SBarry Smith PetscErrorCode ierr; 1513d64ed03dSBarry Smith 1514d64ed03dSBarry Smith PetscFunctionBegin; 151558667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 151658667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 15173a40ed3dSBarry Smith PetscFunctionReturn(0); 151858667388SSatish Balay } 15190ac07820SSatish Balay 15204a2ae208SSatish Balay #undef __FUNCT__ 15214a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1522dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 15230ac07820SSatish Balay { 15244e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 15254e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 1526dfbe8321SBarry Smith PetscErrorCode ierr; 1527329f5518SBarry Smith PetscReal isend[5],irecv[5]; 15280ac07820SSatish Balay 1529d64ed03dSBarry Smith PetscFunctionBegin; 1530d0f46423SBarry Smith info->block_size = (PetscReal)matin->rmap->bs; 153126fbe8dcSKarl Rupp 15324e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 153326fbe8dcSKarl Rupp 15340e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1535de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 153626fbe8dcSKarl Rupp 15374e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 153826fbe8dcSKarl Rupp 15390e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1540de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 154126fbe8dcSKarl Rupp 15420ac07820SSatish Balay if (flag == MAT_LOCAL) { 15434e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 15444e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 15454e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 15464e220ebcSLois Curfman McInnes info->memory = isend[3]; 15474e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 15480ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1549ce94432eSBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 155026fbe8dcSKarl Rupp 15514e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15524e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15534e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15544e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15554e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 15560ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1557ce94432eSBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 155826fbe8dcSKarl Rupp 15594e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15604e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15614e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15624e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15634e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1564ce94432eSBarry Smith } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 15654e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15664e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15674e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15683a40ed3dSBarry Smith PetscFunctionReturn(0); 15690ac07820SSatish Balay } 15700ac07820SSatish Balay 15714a2ae208SSatish Balay #undef __FUNCT__ 15724a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ" 1573ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 157458667388SSatish Balay { 157558667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1576dfbe8321SBarry Smith PetscErrorCode ierr; 157758667388SSatish Balay 1578d64ed03dSBarry Smith PetscFunctionBegin; 157912c028f9SKris Buschelman switch (op) { 1580512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 158112c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 158228b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1583a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 158412c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 15854e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 15864e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 158712c028f9SKris Buschelman break; 158812c028f9SKris Buschelman case MAT_ROW_ORIENTED: 15894e0d8c25SBarry Smith a->roworiented = flg; 159026fbe8dcSKarl Rupp 15914e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 15924e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 159312c028f9SKris Buschelman break; 15944e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 1595290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 159612c028f9SKris Buschelman break; 159712c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15984e0d8c25SBarry Smith a->donotstash = flg; 159912c028f9SKris Buschelman break; 160012c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 16014e0d8c25SBarry Smith a->ht_flag = flg; 160212c028f9SKris Buschelman break; 160377e54ba9SKris Buschelman case MAT_SYMMETRIC: 160477e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 16052188ac68SBarry Smith case MAT_HERMITIAN: 16062188ac68SBarry Smith case MAT_SYMMETRY_ETERNAL: 16074e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 160877e54ba9SKris Buschelman break; 160912c028f9SKris Buschelman default: 1610ce94432eSBarry Smith SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1611d64ed03dSBarry Smith } 16123a40ed3dSBarry Smith PetscFunctionReturn(0); 161358667388SSatish Balay } 161458667388SSatish Balay 16154a2ae208SSatish Balay #undef __FUNCT__ 16166a719282SBarry Smith #define __FUNCT__ "MatTranspose_MPIBAIJ" 1617fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 16180ac07820SSatish Balay { 16190ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 16200ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 16210ac07820SSatish Balay Mat B; 1622dfbe8321SBarry Smith PetscErrorCode ierr; 1623d0f46423SBarry Smith PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1624d0f46423SBarry Smith PetscInt bs=A->rmap->bs,mbs=baij->mbs; 16253eda8832SBarry Smith MatScalar *a; 16260ac07820SSatish Balay 1627d64ed03dSBarry Smith PetscFunctionBegin; 1628ce94432eSBarry Smith if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1629fc4dec0aSBarry Smith if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1630ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1631d0f46423SBarry Smith ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 16327adad957SLisandro Dalcin ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 16332e72b8d9SBarry Smith /* Do not know preallocation information, but must set block size */ 16340298fd71SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1635fc4dec0aSBarry Smith } else { 1636fc4dec0aSBarry Smith B = *matout; 1637fc4dec0aSBarry Smith } 16380ac07820SSatish Balay 16390ac07820SSatish Balay /* copy over the A part */ 16400ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 16410ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1642785e854fSJed Brown ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 16430ac07820SSatish Balay 16440ac07820SSatish Balay for (i=0; i<mbs; i++) { 1645899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 164626fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 16470ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1648899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 16490ac07820SSatish Balay for (k=0; k<bs; k++) { 165097e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 165126fbe8dcSKarl Rupp 16520ac07820SSatish Balay col++; a += bs; 16530ac07820SSatish Balay } 16540ac07820SSatish Balay } 16550ac07820SSatish Balay } 16560ac07820SSatish Balay /* copy over the B part */ 16570ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 16580ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16590ac07820SSatish Balay for (i=0; i<mbs; i++) { 1660899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 166126fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 16620ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16630ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16640ac07820SSatish Balay for (k=0; k<bs; k++) { 166597e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 166626fbe8dcSKarl Rupp col++; 166726fbe8dcSKarl Rupp a += bs; 16680ac07820SSatish Balay } 16690ac07820SSatish Balay } 16700ac07820SSatish Balay } 1671606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16720ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16730ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16740ac07820SSatish Balay 167526fbe8dcSKarl Rupp if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 167626fbe8dcSKarl Rupp else { 1677eb6b5d47SBarry Smith ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 16780ac07820SSatish Balay } 16793a40ed3dSBarry Smith PetscFunctionReturn(0); 16800ac07820SSatish Balay } 16810e95ebc0SSatish Balay 16824a2ae208SSatish Balay #undef __FUNCT__ 16834a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1684dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16850e95ebc0SSatish Balay { 168636c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 168736c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 1688dfbe8321SBarry Smith PetscErrorCode ierr; 1689b24ad042SBarry Smith PetscInt s1,s2,s3; 16900e95ebc0SSatish Balay 1691d64ed03dSBarry Smith PetscFunctionBegin; 169236c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 169336c4a09eSSatish Balay if (rr) { 169436c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1695e32f2f54SBarry Smith if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 169636c4a09eSSatish Balay /* Overlap communication with computation. */ 1697ca9f406cSSatish Balay ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 169836c4a09eSSatish Balay } 16990e95ebc0SSatish Balay if (ll) { 17000e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1701e32f2f54SBarry Smith if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 17020298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 17030e95ebc0SSatish Balay } 170436c4a09eSSatish Balay /* scale the diagonal block */ 170536c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 170636c4a09eSSatish Balay 170736c4a09eSSatish Balay if (rr) { 170836c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 1709ca9f406cSSatish Balay ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 17100298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 171136c4a09eSSatish Balay } 17123a40ed3dSBarry Smith PetscFunctionReturn(0); 17130e95ebc0SSatish Balay } 17140e95ebc0SSatish Balay 17154a2ae208SSatish Balay #undef __FUNCT__ 17164a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ" 17172b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 17180ac07820SSatish Balay { 17190ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 172065a92638SMatthew G. Knepley PetscInt *owners = A->rmap->range; 172165a92638SMatthew G. Knepley PetscInt n = A->rmap->n; 172265a92638SMatthew G. Knepley PetscSF sf; 172365a92638SMatthew G. Knepley PetscInt *lrows; 172465a92638SMatthew G. Knepley PetscSFNode *rrows; 172569ea2d38SJed Brown PetscInt r, p = 0, len = 0; 17266849ba73SBarry Smith PetscErrorCode ierr; 17270ac07820SSatish Balay 1728d64ed03dSBarry Smith PetscFunctionBegin; 172965a92638SMatthew G. Knepley /* Create SF where leaves are input rows and roots are owned rows */ 1730785e854fSJed Brown ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 173165a92638SMatthew G. Knepley for (r = 0; r < n; ++r) lrows[r] = -1; 1732a34163a4SJed Brown if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 173365a92638SMatthew G. Knepley for (r = 0; r < N; ++r) { 173465a92638SMatthew G. Knepley const PetscInt idx = rows[r]; 173569ea2d38SJed Brown if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 173669ea2d38SJed Brown if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 173769ea2d38SJed Brown ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 173869ea2d38SJed Brown } 1739a34163a4SJed Brown if (A->nooffproczerorows) { 1740a34163a4SJed Brown if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank); 1741a34163a4SJed Brown lrows[len++] = idx - owners[p]; 1742a34163a4SJed Brown } else { 174365a92638SMatthew G. Knepley rrows[r].rank = p; 174465a92638SMatthew G. Knepley rrows[r].index = rows[r] - owners[p]; 17450ac07820SSatish Balay } 1746a34163a4SJed Brown } 1747a34163a4SJed Brown if (!A->nooffproczerorows) { 174865a92638SMatthew G. Knepley ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 174965a92638SMatthew G. Knepley ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 175065a92638SMatthew G. Knepley /* Collect flags for rows to be zeroed */ 175165a92638SMatthew G. Knepley ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 175265a92638SMatthew G. Knepley ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 175365a92638SMatthew G. Knepley ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 175465a92638SMatthew G. Knepley /* Compress and put in row numbers */ 175565a92638SMatthew G. Knepley for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1756a34163a4SJed Brown } 175797b48c8fSBarry Smith /* fix right hand side if needed */ 175897b48c8fSBarry Smith if (x && b) { 175965a92638SMatthew G. Knepley const PetscScalar *xx; 176065a92638SMatthew G. Knepley PetscScalar *bb; 176165a92638SMatthew G. Knepley 176297b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 176397b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 176465a92638SMatthew G. Knepley for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 176597b48c8fSBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 176697b48c8fSBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 176797b48c8fSBarry Smith } 176897b48c8fSBarry Smith 17690ac07820SSatish Balay /* actually zap the local rows */ 177072dacd9aSBarry Smith /* 177172dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 1772a8c7a070SBarry Smith is square and the user wishes to set the diagonal we use separate 177372dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 177472dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 177572dacd9aSBarry Smith 177672dacd9aSBarry Smith */ 17779c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1778a34163a4SJed Brown ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1779d0f46423SBarry Smith if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1780a34163a4SJed Brown ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1781f4df32b1SMatthew Knepley } else if (diag != 0.0) { 178265a92638SMatthew G. Knepley ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1783e7e72b3dSBarry Smith if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1784512a5fc5SBarry Smith MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 178565a92638SMatthew G. Knepley for (r = 0; r < len; ++r) { 178665a92638SMatthew G. Knepley const PetscInt row = lrows[r] + A->rmap->rstart; 1787f4df32b1SMatthew Knepley ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1788a07cd24cSSatish Balay } 1789a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1790a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17919c957beeSSatish Balay } else { 1792a34163a4SJed Brown ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1793a07cd24cSSatish Balay } 1794606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 17954f9cfa9eSBarry Smith 17964f9cfa9eSBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 17974f9cfa9eSBarry Smith if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1798e56f5c9eSBarry Smith PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 179909e82e2bSBarry Smith ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1800e56f5c9eSBarry Smith } 18013a40ed3dSBarry Smith PetscFunctionReturn(0); 18020ac07820SSatish Balay } 180372dacd9aSBarry Smith 18044a2ae208SSatish Balay #undef __FUNCT__ 18056f0a72daSMatthew G. Knepley #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 18066f0a72daSMatthew G. Knepley PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 18076f0a72daSMatthew G. Knepley { 18086f0a72daSMatthew G. Knepley Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 18096f0a72daSMatthew G. Knepley PetscErrorCode ierr; 18105ba17502SJed Brown PetscMPIInt n = A->rmap->n; 1811fbb64d0eSMatthew G. Knepley PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 18126f0a72daSMatthew G. Knepley PetscInt *lrows,*owners = A->rmap->range; 18136f0a72daSMatthew G. Knepley PetscSFNode *rrows; 18146f0a72daSMatthew G. Knepley PetscSF sf; 18156f0a72daSMatthew G. Knepley const PetscScalar *xx; 18166f0a72daSMatthew G. Knepley PetscScalar *bb,*mask; 18176f0a72daSMatthew G. Knepley Vec xmask,lmask; 18186f0a72daSMatthew G. Knepley Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 18196f0a72daSMatthew G. Knepley PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 18206f0a72daSMatthew G. Knepley PetscScalar *aa; 18216f0a72daSMatthew G. Knepley 18226f0a72daSMatthew G. Knepley PetscFunctionBegin; 18236f0a72daSMatthew G. Knepley /* Create SF where leaves are input rows and roots are owned rows */ 18246f0a72daSMatthew G. Knepley ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 18256f0a72daSMatthew G. Knepley for (r = 0; r < n; ++r) lrows[r] = -1; 18266f0a72daSMatthew G. Knepley ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 18276f0a72daSMatthew G. Knepley for (r = 0; r < N; ++r) { 18286f0a72daSMatthew G. Knepley const PetscInt idx = rows[r]; 18295ba17502SJed Brown if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 18305ba17502SJed Brown if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 18315ba17502SJed Brown ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 18325ba17502SJed Brown } 18336f0a72daSMatthew G. Knepley rrows[r].rank = p; 18346f0a72daSMatthew G. Knepley rrows[r].index = rows[r] - owners[p]; 18356f0a72daSMatthew G. Knepley } 18366f0a72daSMatthew G. Knepley ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 18376f0a72daSMatthew G. Knepley ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 18386f0a72daSMatthew G. Knepley /* Collect flags for rows to be zeroed */ 18396f0a72daSMatthew G. Knepley ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 18406f0a72daSMatthew G. Knepley ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 18416f0a72daSMatthew G. Knepley ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 18426f0a72daSMatthew G. Knepley /* Compress and put in row numbers */ 18436f0a72daSMatthew G. Knepley for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 18446f0a72daSMatthew G. Knepley /* zero diagonal part of matrix */ 18456f0a72daSMatthew G. Knepley ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 18466f0a72daSMatthew G. Knepley /* handle off diagonal part of matrix */ 18476f0a72daSMatthew G. Knepley ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr); 18486f0a72daSMatthew G. Knepley ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 18496f0a72daSMatthew G. Knepley ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 18506f0a72daSMatthew G. Knepley for (i=0; i<len; i++) bb[lrows[i]] = 1; 18516f0a72daSMatthew G. Knepley ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 18526f0a72daSMatthew G. Knepley ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 18536f0a72daSMatthew G. Knepley ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 18546f0a72daSMatthew G. Knepley ierr = VecDestroy(&xmask);CHKERRQ(ierr); 18556f0a72daSMatthew G. Knepley if (x) { 18566f0a72daSMatthew G. Knepley ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 18576f0a72daSMatthew G. Knepley ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 18586f0a72daSMatthew G. Knepley ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 18596f0a72daSMatthew G. Knepley ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 18606f0a72daSMatthew G. Knepley } 18616f0a72daSMatthew G. Knepley ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 18626f0a72daSMatthew G. Knepley /* remove zeroed rows of off diagonal matrix */ 18636f0a72daSMatthew G. Knepley for (i = 0; i < len; ++i) { 18646f0a72daSMatthew G. Knepley row = lrows[i]; 18656f0a72daSMatthew G. Knepley count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 18666f0a72daSMatthew G. Knepley aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 18676f0a72daSMatthew G. Knepley for (k = 0; k < count; ++k) { 18686f0a72daSMatthew G. Knepley aa[0] = 0.0; 18696f0a72daSMatthew G. Knepley aa += bs; 18706f0a72daSMatthew G. Knepley } 18716f0a72daSMatthew G. Knepley } 18726f0a72daSMatthew G. Knepley /* loop over all elements of off process part of matrix zeroing removed columns*/ 18736f0a72daSMatthew G. Knepley for (i = 0; i < l->B->rmap->N; ++i) { 18746f0a72daSMatthew G. Knepley row = i/bs; 18756f0a72daSMatthew G. Knepley for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 18766f0a72daSMatthew G. Knepley for (k = 0; k < bs; ++k) { 18776f0a72daSMatthew G. Knepley col = bs*baij->j[j] + k; 18786f0a72daSMatthew G. Knepley if (PetscAbsScalar(mask[col])) { 18796f0a72daSMatthew G. Knepley aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 18806f0a72daSMatthew G. Knepley if (b) bb[i] -= aa[0]*xx[col]; 18816f0a72daSMatthew G. Knepley aa[0] = 0.0; 18826f0a72daSMatthew G. Knepley } 18836f0a72daSMatthew G. Knepley } 18846f0a72daSMatthew G. Knepley } 18856f0a72daSMatthew G. Knepley } 18866f0a72daSMatthew G. Knepley if (x) { 18876f0a72daSMatthew G. Knepley ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 18886f0a72daSMatthew G. Knepley ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 18896f0a72daSMatthew G. Knepley } 18906f0a72daSMatthew G. Knepley ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 18916f0a72daSMatthew G. Knepley ierr = VecDestroy(&lmask);CHKERRQ(ierr); 18926f0a72daSMatthew G. Knepley ierr = PetscFree(lrows);CHKERRQ(ierr); 18934f9cfa9eSBarry Smith 18944f9cfa9eSBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 18954f9cfa9eSBarry Smith if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 18964f9cfa9eSBarry Smith PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 18974f9cfa9eSBarry Smith ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 18984f9cfa9eSBarry Smith } 18996f0a72daSMatthew G. Knepley PetscFunctionReturn(0); 19006f0a72daSMatthew G. Knepley } 19016f0a72daSMatthew G. Knepley 19026f0a72daSMatthew G. Knepley #undef __FUNCT__ 19034a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1904dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1905bb5a7306SBarry Smith { 1906bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1907dfbe8321SBarry Smith PetscErrorCode ierr; 1908d64ed03dSBarry Smith 1909d64ed03dSBarry Smith PetscFunctionBegin; 1910bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 19113a40ed3dSBarry Smith PetscFunctionReturn(0); 1912bb5a7306SBarry Smith } 1913bb5a7306SBarry Smith 19146849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 19150ac07820SSatish Balay 19164a2ae208SSatish Balay #undef __FUNCT__ 19174a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ" 1918ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 19197fc3c18eSBarry Smith { 19207fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 19217fc3c18eSBarry Smith Mat a,b,c,d; 1922ace3abfcSBarry Smith PetscBool flg; 1923dfbe8321SBarry Smith PetscErrorCode ierr; 19247fc3c18eSBarry Smith 19257fc3c18eSBarry Smith PetscFunctionBegin; 19267fc3c18eSBarry Smith a = matA->A; b = matA->B; 19277fc3c18eSBarry Smith c = matB->A; d = matB->B; 19287fc3c18eSBarry Smith 19297fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1930abc0a331SBarry Smith if (flg) { 19317fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 19327fc3c18eSBarry Smith } 1933ce94432eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 19347fc3c18eSBarry Smith PetscFunctionReturn(0); 19357fc3c18eSBarry Smith } 19367fc3c18eSBarry Smith 19373c896bc6SHong Zhang #undef __FUNCT__ 19383c896bc6SHong Zhang #define __FUNCT__ "MatCopy_MPIBAIJ" 19393c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 19403c896bc6SHong Zhang { 19413c896bc6SHong Zhang PetscErrorCode ierr; 19423c896bc6SHong Zhang Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19433c896bc6SHong Zhang Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 19443c896bc6SHong Zhang 19453c896bc6SHong Zhang PetscFunctionBegin; 19463c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 19473c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 19483c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 19493c896bc6SHong Zhang } else { 19503c896bc6SHong Zhang ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 19513c896bc6SHong Zhang ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 19523c896bc6SHong Zhang } 19533c896bc6SHong Zhang PetscFunctionReturn(0); 19543c896bc6SHong Zhang } 1955273d9f13SBarry Smith 19564a2ae208SSatish Balay #undef __FUNCT__ 19574994cf47SJed Brown #define __FUNCT__ "MatSetUp_MPIBAIJ" 19584994cf47SJed Brown PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1959273d9f13SBarry Smith { 1960dfbe8321SBarry Smith PetscErrorCode ierr; 1961273d9f13SBarry Smith 1962273d9f13SBarry Smith PetscFunctionBegin; 1963535b19f3SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1964273d9f13SBarry Smith PetscFunctionReturn(0); 1965273d9f13SBarry Smith } 1966273d9f13SBarry Smith 19674fe895cdSHong Zhang #undef __FUNCT__ 19684de5dceeSHong Zhang #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ" 19694de5dceeSHong Zhang PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 19704de5dceeSHong Zhang { 1971001ddc4fSHong Zhang PetscErrorCode ierr; 1972001ddc4fSHong Zhang PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 19734de5dceeSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 19744de5dceeSHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 19754de5dceeSHong Zhang 19764de5dceeSHong Zhang PetscFunctionBegin; 1977001ddc4fSHong Zhang ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 19784de5dceeSHong Zhang PetscFunctionReturn(0); 19794de5dceeSHong Zhang } 19804de5dceeSHong Zhang 19814de5dceeSHong Zhang #undef __FUNCT__ 19824fe895cdSHong Zhang #define __FUNCT__ "MatAXPY_MPIBAIJ" 19834fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 19844fe895cdSHong Zhang { 19854fe895cdSHong Zhang PetscErrorCode ierr; 19864fe895cdSHong Zhang Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 19874fe895cdSHong Zhang PetscBLASInt bnz,one=1; 19884fe895cdSHong Zhang Mat_SeqBAIJ *x,*y; 19894fe895cdSHong Zhang 19904fe895cdSHong Zhang PetscFunctionBegin; 19914fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 19924fe895cdSHong Zhang PetscScalar alpha = a; 19934fe895cdSHong Zhang x = (Mat_SeqBAIJ*)xx->A->data; 19944fe895cdSHong Zhang y = (Mat_SeqBAIJ*)yy->A->data; 1995c5df96a5SBarry Smith ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 19968b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 19974fe895cdSHong Zhang x = (Mat_SeqBAIJ*)xx->B->data; 19984fe895cdSHong Zhang y = (Mat_SeqBAIJ*)yy->B->data; 1999c5df96a5SBarry Smith ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 20008b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2001a3fa217bSJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 20024fe895cdSHong Zhang } else { 20034de5dceeSHong Zhang Mat B; 20044de5dceeSHong Zhang PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 20054de5dceeSHong Zhang ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 20064de5dceeSHong Zhang ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 20074de5dceeSHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 20084de5dceeSHong Zhang ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 20094de5dceeSHong Zhang ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 20104de5dceeSHong Zhang ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 20114de5dceeSHong Zhang ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 20124de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 20134de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 20144de5dceeSHong Zhang ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 20154de5dceeSHong Zhang /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 20164de5dceeSHong Zhang ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 20174de5dceeSHong Zhang ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 20184de5dceeSHong Zhang ierr = PetscFree(nnz_d);CHKERRQ(ierr); 20194de5dceeSHong Zhang ierr = PetscFree(nnz_o);CHKERRQ(ierr); 20204fe895cdSHong Zhang } 20214fe895cdSHong Zhang PetscFunctionReturn(0); 20224fe895cdSHong Zhang } 20234fe895cdSHong Zhang 202499cafbc1SBarry Smith #undef __FUNCT__ 202599cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_MPIBAIJ" 202699cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 202799cafbc1SBarry Smith { 202899cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 202999cafbc1SBarry Smith PetscErrorCode ierr; 203099cafbc1SBarry Smith 203199cafbc1SBarry Smith PetscFunctionBegin; 203299cafbc1SBarry Smith ierr = MatRealPart(a->A);CHKERRQ(ierr); 203399cafbc1SBarry Smith ierr = MatRealPart(a->B);CHKERRQ(ierr); 203499cafbc1SBarry Smith PetscFunctionReturn(0); 203599cafbc1SBarry Smith } 203699cafbc1SBarry Smith 203799cafbc1SBarry Smith #undef __FUNCT__ 203899cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 203999cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 204099cafbc1SBarry Smith { 204199cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 204299cafbc1SBarry Smith PetscErrorCode ierr; 204399cafbc1SBarry Smith 204499cafbc1SBarry Smith PetscFunctionBegin; 204599cafbc1SBarry Smith ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 204699cafbc1SBarry Smith ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 204799cafbc1SBarry Smith PetscFunctionReturn(0); 204899cafbc1SBarry Smith } 204999cafbc1SBarry Smith 205082094794SBarry Smith #undef __FUNCT__ 205182094794SBarry Smith #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 20524aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 20534aa3045dSJed Brown { 20544aa3045dSJed Brown PetscErrorCode ierr; 20554aa3045dSJed Brown IS iscol_local; 20564aa3045dSJed Brown PetscInt csize; 20574aa3045dSJed Brown 20584aa3045dSJed Brown PetscFunctionBegin; 20594aa3045dSJed Brown ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2060b79d0421SJed Brown if (call == MAT_REUSE_MATRIX) { 2061b79d0421SJed Brown ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2062e32f2f54SBarry Smith if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2063b79d0421SJed Brown } else { 20644aa3045dSJed Brown ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2065b79d0421SJed Brown } 20664aa3045dSJed Brown ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2067b79d0421SJed Brown if (call == MAT_INITIAL_MATRIX) { 2068b79d0421SJed Brown ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 20696bf464f9SBarry Smith ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2070b79d0421SJed Brown } 20714aa3045dSJed Brown PetscFunctionReturn(0); 20724aa3045dSJed Brown } 207329dcf524SDmitry Karpeev extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 20744aa3045dSJed Brown #undef __FUNCT__ 2075dd183c9eSJed Brown #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 207682094794SBarry Smith /* 207782094794SBarry Smith Not great since it makes two copies of the submatrix, first an SeqBAIJ 207882094794SBarry Smith in local and then by concatenating the local matrices the end result. 207982094794SBarry Smith Writing it directly would be much like MatGetSubMatrices_MPIBAIJ() 208082094794SBarry Smith */ 20814aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 208282094794SBarry Smith { 208382094794SBarry Smith PetscErrorCode ierr; 208482094794SBarry Smith PetscMPIInt rank,size; 208582094794SBarry Smith PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 208629dcf524SDmitry Karpeev PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 208729dcf524SDmitry Karpeev Mat M,Mreuse; 208882094794SBarry Smith MatScalar *vwork,*aa; 2089ce94432eSBarry Smith MPI_Comm comm; 209029dcf524SDmitry Karpeev IS isrow_new, iscol_new; 209129dcf524SDmitry Karpeev PetscBool idflag,allrows, allcols; 209282094794SBarry Smith Mat_SeqBAIJ *aij; 209382094794SBarry Smith 209482094794SBarry Smith PetscFunctionBegin; 2095ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 209682094794SBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 209782094794SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 209829dcf524SDmitry Karpeev /* The compression and expansion should be avoided. Doesn't point 209929dcf524SDmitry Karpeev out errors, might change the indices, hence buggey */ 210029dcf524SDmitry Karpeev ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 210129dcf524SDmitry Karpeev ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 210282094794SBarry Smith 210329dcf524SDmitry Karpeev /* Check for special case: each processor gets entire matrix columns */ 210429dcf524SDmitry Karpeev ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 210529dcf524SDmitry Karpeev ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 210626fbe8dcSKarl Rupp if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 210726fbe8dcSKarl Rupp else allcols = PETSC_FALSE; 210829dcf524SDmitry Karpeev 210929dcf524SDmitry Karpeev ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 211029dcf524SDmitry Karpeev ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 211126fbe8dcSKarl Rupp if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 211226fbe8dcSKarl Rupp else allrows = PETSC_FALSE; 211326fbe8dcSKarl Rupp 211482094794SBarry Smith if (call == MAT_REUSE_MATRIX) { 211582094794SBarry Smith ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2116e32f2f54SBarry Smith if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 211775f6568bSJed Brown ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 211882094794SBarry Smith } else { 211975f6568bSJed Brown ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 212082094794SBarry Smith } 212129dcf524SDmitry Karpeev ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 212229dcf524SDmitry Karpeev ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 212382094794SBarry Smith /* 212482094794SBarry Smith m - number of local rows 212582094794SBarry Smith n - number of columns (same on all processors) 212682094794SBarry Smith rstart - first row in new global matrix generated 212782094794SBarry Smith */ 212882094794SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 212982094794SBarry Smith ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 213082094794SBarry Smith m = m/bs; 213182094794SBarry Smith n = n/bs; 213282094794SBarry Smith 213382094794SBarry Smith if (call == MAT_INITIAL_MATRIX) { 213482094794SBarry Smith aij = (Mat_SeqBAIJ*)(Mreuse)->data; 213582094794SBarry Smith ii = aij->i; 213682094794SBarry Smith jj = aij->j; 213782094794SBarry Smith 213882094794SBarry Smith /* 213982094794SBarry Smith Determine the number of non-zeros in the diagonal and off-diagonal 214082094794SBarry Smith portions of the matrix in order to do correct preallocation 214182094794SBarry Smith */ 214282094794SBarry Smith 214382094794SBarry Smith /* first get start and end of "diagonal" columns */ 214482094794SBarry Smith if (csize == PETSC_DECIDE) { 214582094794SBarry Smith ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 214682094794SBarry Smith if (mglobal == n*bs) { /* square matrix */ 214782094794SBarry Smith nlocal = m; 214882094794SBarry Smith } else { 214982094794SBarry Smith nlocal = n/size + ((n % size) > rank); 215082094794SBarry Smith } 215182094794SBarry Smith } else { 215282094794SBarry Smith nlocal = csize/bs; 215382094794SBarry Smith } 215482094794SBarry Smith ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 215582094794SBarry Smith rstart = rend - nlocal; 215665e19b50SBarry Smith if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 215782094794SBarry Smith 215882094794SBarry Smith /* next, compute all the lengths */ 2159dcca6d9dSJed Brown ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 216082094794SBarry Smith for (i=0; i<m; i++) { 216182094794SBarry Smith jend = ii[i+1] - ii[i]; 216282094794SBarry Smith olen = 0; 216382094794SBarry Smith dlen = 0; 216482094794SBarry Smith for (j=0; j<jend; j++) { 216582094794SBarry Smith if (*jj < rstart || *jj >= rend) olen++; 216682094794SBarry Smith else dlen++; 216782094794SBarry Smith jj++; 216882094794SBarry Smith } 216982094794SBarry Smith olens[i] = olen; 217082094794SBarry Smith dlens[i] = dlen; 217182094794SBarry Smith } 217282094794SBarry Smith ierr = MatCreate(comm,&M);CHKERRQ(ierr); 217382094794SBarry Smith ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 217482094794SBarry Smith ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 217582094794SBarry Smith ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2176eb9baa12SBarry Smith ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 217782094794SBarry Smith } else { 217882094794SBarry Smith PetscInt ml,nl; 217982094794SBarry Smith 218082094794SBarry Smith M = *newmat; 218182094794SBarry Smith ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2182e32f2f54SBarry Smith if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 218382094794SBarry Smith ierr = MatZeroEntries(M);CHKERRQ(ierr); 218482094794SBarry Smith /* 218582094794SBarry Smith The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 218682094794SBarry Smith rather than the slower MatSetValues(). 218782094794SBarry Smith */ 218882094794SBarry Smith M->was_assembled = PETSC_TRUE; 218982094794SBarry Smith M->assembled = PETSC_FALSE; 219082094794SBarry Smith } 219182094794SBarry Smith ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 219282094794SBarry Smith ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 219382094794SBarry Smith aij = (Mat_SeqBAIJ*)(Mreuse)->data; 219482094794SBarry Smith ii = aij->i; 219582094794SBarry Smith jj = aij->j; 219682094794SBarry Smith aa = aij->a; 219782094794SBarry Smith for (i=0; i<m; i++) { 219882094794SBarry Smith row = rstart/bs + i; 219982094794SBarry Smith nz = ii[i+1] - ii[i]; 220082094794SBarry Smith cwork = jj; jj += nz; 220175f6568bSJed Brown vwork = aa; aa += nz*bs*bs; 220282094794SBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 220382094794SBarry Smith } 220482094794SBarry Smith 220582094794SBarry Smith ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 220682094794SBarry Smith ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 220782094794SBarry Smith *newmat = M; 220882094794SBarry Smith 220982094794SBarry Smith /* save submatrix used in processor for next request */ 221082094794SBarry Smith if (call == MAT_INITIAL_MATRIX) { 221182094794SBarry Smith ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 221282094794SBarry Smith ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 221382094794SBarry Smith } 221482094794SBarry Smith PetscFunctionReturn(0); 221582094794SBarry Smith } 221682094794SBarry Smith 221782094794SBarry Smith #undef __FUNCT__ 221882094794SBarry Smith #define __FUNCT__ "MatPermute_MPIBAIJ" 221982094794SBarry Smith PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 222082094794SBarry Smith { 222182094794SBarry Smith MPI_Comm comm,pcomm; 2222a0a83eb5SRémi Lacroix PetscInt clocal_size,nrows; 222382094794SBarry Smith const PetscInt *rows; 2224dbf0e21dSBarry Smith PetscMPIInt size; 2225a0a83eb5SRémi Lacroix IS crowp,lcolp; 222682094794SBarry Smith PetscErrorCode ierr; 222782094794SBarry Smith 222882094794SBarry Smith PetscFunctionBegin; 222982094794SBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 223082094794SBarry Smith /* make a collective version of 'rowp' */ 223182094794SBarry Smith ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 223282094794SBarry Smith if (pcomm==comm) { 223382094794SBarry Smith crowp = rowp; 223482094794SBarry Smith } else { 223582094794SBarry Smith ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 223682094794SBarry Smith ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 223770b3c8c7SBarry Smith ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 223882094794SBarry Smith ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 223982094794SBarry Smith } 2240a0a83eb5SRémi Lacroix ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2241a0a83eb5SRémi Lacroix /* make a local version of 'colp' */ 224282094794SBarry Smith ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2243dbf0e21dSBarry Smith ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2244dbf0e21dSBarry Smith if (size==1) { 224582094794SBarry Smith lcolp = colp; 224682094794SBarry Smith } else { 224775f6568bSJed Brown ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 224882094794SBarry Smith } 2249dbf0e21dSBarry Smith ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 225075f6568bSJed Brown /* now we just get the submatrix */ 22517afc1a8bSJed Brown ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2252a0a83eb5SRémi Lacroix ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2253a0a83eb5SRémi Lacroix /* clean up */ 2254a0a83eb5SRémi Lacroix if (pcomm!=comm) { 2255a0a83eb5SRémi Lacroix ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2256a0a83eb5SRémi Lacroix } 2257dbf0e21dSBarry Smith if (size>1) { 22586bf464f9SBarry Smith ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 225982094794SBarry Smith } 226082094794SBarry Smith PetscFunctionReturn(0); 226182094794SBarry Smith } 226282094794SBarry Smith 22638c7482ecSBarry Smith #undef __FUNCT__ 22648c7482ecSBarry Smith #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 22657087cfbeSBarry Smith PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 22668c7482ecSBarry Smith { 22678c7482ecSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 22688c7482ecSBarry Smith Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 22698c7482ecSBarry Smith 22708c7482ecSBarry Smith PetscFunctionBegin; 227126fbe8dcSKarl Rupp if (nghosts) *nghosts = B->nbs; 227226fbe8dcSKarl Rupp if (ghosts) *ghosts = baij->garray; 22738c7482ecSBarry Smith PetscFunctionReturn(0); 22748c7482ecSBarry Smith } 22758c7482ecSBarry Smith 2276f6d58c54SBarry Smith #undef __FUNCT__ 2277d1adec66SJed Brown #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2278d1adec66SJed Brown PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2279f6d58c54SBarry Smith { 2280f6d58c54SBarry Smith Mat B; 2281f6d58c54SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2282f6d58c54SBarry Smith Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2283f6d58c54SBarry Smith Mat_SeqAIJ *b; 2284f6d58c54SBarry Smith PetscErrorCode ierr; 2285f6d58c54SBarry Smith PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2286f6d58c54SBarry Smith PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2287f6d58c54SBarry Smith PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2288f6d58c54SBarry Smith 2289f6d58c54SBarry Smith PetscFunctionBegin; 2290ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2291ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2292f6d58c54SBarry Smith 2293f6d58c54SBarry Smith /* ---------------------------------------------------------------- 2294f6d58c54SBarry Smith Tell every processor the number of nonzeros per row 2295f6d58c54SBarry Smith */ 2296785e854fSJed Brown ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr); 2297f6d58c54SBarry Smith for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2298f6d58c54SBarry Smith lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2299f6d58c54SBarry Smith } 2300f6d58c54SBarry Smith sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2301785e854fSJed Brown ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2302f6d58c54SBarry Smith displs = recvcounts + size; 2303f6d58c54SBarry Smith for (i=0; i<size; i++) { 2304f6d58c54SBarry Smith recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2305f6d58c54SBarry Smith displs[i] = A->rmap->range[i]/bs; 2306f6d58c54SBarry Smith } 2307f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE) 2308ce94432eSBarry Smith ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2309f6d58c54SBarry Smith #else 2310ce94432eSBarry Smith ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2311f6d58c54SBarry Smith #endif 2312f6d58c54SBarry Smith /* --------------------------------------------------------------- 2313f6d58c54SBarry Smith Create the sequential matrix of the same type as the local block diagonal 2314f6d58c54SBarry Smith */ 2315f6d58c54SBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2316f6d58c54SBarry Smith ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2317f6d58c54SBarry Smith ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2318f6d58c54SBarry Smith ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2319f6d58c54SBarry Smith b = (Mat_SeqAIJ*)B->data; 2320f6d58c54SBarry Smith 2321f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2322f6d58c54SBarry Smith Copy my part of matrix column indices over 2323f6d58c54SBarry Smith */ 2324f6d58c54SBarry Smith sendcount = ad->nz + bd->nz; 2325f6d58c54SBarry Smith jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2326f6d58c54SBarry Smith a_jsendbuf = ad->j; 2327f6d58c54SBarry Smith b_jsendbuf = bd->j; 2328f6d58c54SBarry Smith n = A->rmap->rend/bs - A->rmap->rstart/bs; 2329f6d58c54SBarry Smith cnt = 0; 2330f6d58c54SBarry Smith for (i=0; i<n; i++) { 2331f6d58c54SBarry Smith 2332f6d58c54SBarry Smith /* put in lower diagonal portion */ 2333f6d58c54SBarry Smith m = bd->i[i+1] - bd->i[i]; 2334f6d58c54SBarry Smith while (m > 0) { 2335f6d58c54SBarry Smith /* is it above diagonal (in bd (compressed) numbering) */ 2336f6d58c54SBarry Smith if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2337f6d58c54SBarry Smith jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2338f6d58c54SBarry Smith m--; 2339f6d58c54SBarry Smith } 2340f6d58c54SBarry Smith 2341f6d58c54SBarry Smith /* put in diagonal portion */ 2342f6d58c54SBarry Smith for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2343f6d58c54SBarry Smith jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2344f6d58c54SBarry Smith } 2345f6d58c54SBarry Smith 2346f6d58c54SBarry Smith /* put in upper diagonal portion */ 2347f6d58c54SBarry Smith while (m-- > 0) { 2348f6d58c54SBarry Smith jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2349f6d58c54SBarry Smith } 2350f6d58c54SBarry Smith } 2351e32f2f54SBarry Smith if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2352f6d58c54SBarry Smith 2353f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2354f6d58c54SBarry Smith Gather all column indices to all processors 2355f6d58c54SBarry Smith */ 2356f6d58c54SBarry Smith for (i=0; i<size; i++) { 2357f6d58c54SBarry Smith recvcounts[i] = 0; 2358f6d58c54SBarry Smith for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2359f6d58c54SBarry Smith recvcounts[i] += lens[j]; 2360f6d58c54SBarry Smith } 2361f6d58c54SBarry Smith } 2362f6d58c54SBarry Smith displs[0] = 0; 2363f6d58c54SBarry Smith for (i=1; i<size; i++) { 2364f6d58c54SBarry Smith displs[i] = displs[i-1] + recvcounts[i-1]; 2365f6d58c54SBarry Smith } 2366f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE) 2367ce94432eSBarry Smith ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2368f6d58c54SBarry Smith #else 2369ce94432eSBarry Smith ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2370f6d58c54SBarry Smith #endif 2371f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2372f6d58c54SBarry Smith Assemble the matrix into useable form (note numerical values not yet set) 2373f6d58c54SBarry Smith */ 2374f6d58c54SBarry Smith /* set the b->ilen (length of each row) values */ 2375f6d58c54SBarry Smith ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2376f6d58c54SBarry Smith /* set the b->i indices */ 2377f6d58c54SBarry Smith b->i[0] = 0; 2378f6d58c54SBarry Smith for (i=1; i<=A->rmap->N/bs; i++) { 2379f6d58c54SBarry Smith b->i[i] = b->i[i-1] + lens[i-1]; 2380f6d58c54SBarry Smith } 2381f6d58c54SBarry Smith ierr = PetscFree(lens);CHKERRQ(ierr); 2382f6d58c54SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2383f6d58c54SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2384f6d58c54SBarry Smith ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2385f6d58c54SBarry Smith 2386f6d58c54SBarry Smith if (A->symmetric) { 2387f6d58c54SBarry Smith ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2388f6d58c54SBarry Smith } else if (A->hermitian) { 2389f6d58c54SBarry Smith ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2390f6d58c54SBarry Smith } else if (A->structurally_symmetric) { 2391f6d58c54SBarry Smith ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2392f6d58c54SBarry Smith } 2393f6d58c54SBarry Smith *newmat = B; 2394f6d58c54SBarry Smith PetscFunctionReturn(0); 2395f6d58c54SBarry Smith } 2396f6d58c54SBarry Smith 2397b1a666ecSBarry Smith #undef __FUNCT__ 2398b1a666ecSBarry Smith #define __FUNCT__ "MatSOR_MPIBAIJ" 2399b1a666ecSBarry Smith PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2400b1a666ecSBarry Smith { 2401b1a666ecSBarry Smith Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2402b1a666ecSBarry Smith PetscErrorCode ierr; 2403b1a666ecSBarry Smith Vec bb1 = 0; 2404b1a666ecSBarry Smith 2405b1a666ecSBarry Smith PetscFunctionBegin; 2406b1a666ecSBarry Smith if (flag == SOR_APPLY_UPPER) { 2407b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2408b1a666ecSBarry Smith PetscFunctionReturn(0); 2409b1a666ecSBarry Smith } 2410b1a666ecSBarry Smith 24114e980039SJed Brown if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 24124e980039SJed Brown ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 24134e980039SJed Brown } 24144e980039SJed Brown 2415b1a666ecSBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2416b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 2417b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2418b1a666ecSBarry Smith its--; 2419b1a666ecSBarry Smith } 2420b1a666ecSBarry Smith 2421b1a666ecSBarry Smith while (its--) { 2422b1a666ecSBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2423b1a666ecSBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2424b1a666ecSBarry Smith 2425b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 2426b1a666ecSBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2427b1a666ecSBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2428b1a666ecSBarry Smith 2429b1a666ecSBarry Smith /* local sweep */ 2430b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2431b1a666ecSBarry Smith } 2432b1a666ecSBarry Smith } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2433b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 2434b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2435b1a666ecSBarry Smith its--; 2436b1a666ecSBarry Smith } 2437b1a666ecSBarry Smith while (its--) { 2438b1a666ecSBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2439b1a666ecSBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2440b1a666ecSBarry Smith 2441b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 2442b1a666ecSBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2443b1a666ecSBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2444b1a666ecSBarry Smith 2445b1a666ecSBarry Smith /* local sweep */ 2446b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2447b1a666ecSBarry Smith } 2448b1a666ecSBarry Smith } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2449b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 2450b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2451b1a666ecSBarry Smith its--; 2452b1a666ecSBarry Smith } 2453b1a666ecSBarry Smith while (its--) { 2454b1a666ecSBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2455b1a666ecSBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2456b1a666ecSBarry Smith 2457b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 2458b1a666ecSBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2459b1a666ecSBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2460b1a666ecSBarry Smith 2461b1a666ecSBarry Smith /* local sweep */ 2462b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2463b1a666ecSBarry Smith } 2464ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2465b1a666ecSBarry Smith 24666bf464f9SBarry Smith ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2467b1a666ecSBarry Smith PetscFunctionReturn(0); 2468b1a666ecSBarry Smith } 2469b1a666ecSBarry Smith 2470bbead8a2SBarry Smith #undef __FUNCT__ 247147f7623dSRémi Lacroix #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 247247f7623dSRémi Lacroix PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 247347f7623dSRémi Lacroix { 247447f7623dSRémi Lacroix PetscErrorCode ierr; 247547f7623dSRémi Lacroix Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 247647f7623dSRémi Lacroix PetscInt N,i,*garray = aij->garray; 247747f7623dSRémi Lacroix PetscInt ib,jb,bs = A->rmap->bs; 247847f7623dSRémi Lacroix Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 247947f7623dSRémi Lacroix MatScalar *a_val = a_aij->a; 248047f7623dSRémi Lacroix Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 248147f7623dSRémi Lacroix MatScalar *b_val = b_aij->a; 248247f7623dSRémi Lacroix PetscReal *work; 248347f7623dSRémi Lacroix 248447f7623dSRémi Lacroix PetscFunctionBegin; 248547f7623dSRémi Lacroix ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 24861795a4d1SJed Brown ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 248747f7623dSRémi Lacroix if (type == NORM_2) { 248847f7623dSRémi Lacroix for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 248947f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 249047f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 249147f7623dSRémi Lacroix work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 249247f7623dSRémi Lacroix a_val++; 249347f7623dSRémi Lacroix } 249447f7623dSRémi Lacroix } 249547f7623dSRémi Lacroix } 249647f7623dSRémi Lacroix for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 249747f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 249847f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 249947f7623dSRémi Lacroix work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 250047f7623dSRémi Lacroix b_val++; 250147f7623dSRémi Lacroix } 250247f7623dSRémi Lacroix } 250347f7623dSRémi Lacroix } 250447f7623dSRémi Lacroix } else if (type == NORM_1) { 250547f7623dSRémi Lacroix for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 250647f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 250747f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 250847f7623dSRémi Lacroix work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 250947f7623dSRémi Lacroix a_val++; 251047f7623dSRémi Lacroix } 251147f7623dSRémi Lacroix } 251247f7623dSRémi Lacroix } 251347f7623dSRémi Lacroix for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 251447f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 251547f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 251647f7623dSRémi Lacroix work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 251747f7623dSRémi Lacroix b_val++; 251847f7623dSRémi Lacroix } 251947f7623dSRémi Lacroix } 252047f7623dSRémi Lacroix } 252147f7623dSRémi Lacroix } else if (type == NORM_INFINITY) { 252247f7623dSRémi Lacroix for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 252347f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 252447f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 252547f7623dSRémi Lacroix int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 252647f7623dSRémi Lacroix work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 252747f7623dSRémi Lacroix a_val++; 252847f7623dSRémi Lacroix } 252947f7623dSRémi Lacroix } 253047f7623dSRémi Lacroix } 253147f7623dSRémi Lacroix for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 253247f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 253347f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 253447f7623dSRémi Lacroix int col = garray[b_aij->j[i]] * bs + jb; 253547f7623dSRémi Lacroix work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 253647f7623dSRémi Lacroix b_val++; 253747f7623dSRémi Lacroix } 253847f7623dSRémi Lacroix } 253947f7623dSRémi Lacroix } 254047f7623dSRémi Lacroix } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 254147f7623dSRémi Lacroix if (type == NORM_INFINITY) { 254247f7623dSRémi Lacroix ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 254347f7623dSRémi Lacroix } else { 254447f7623dSRémi Lacroix ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 254547f7623dSRémi Lacroix } 254647f7623dSRémi Lacroix ierr = PetscFree(work);CHKERRQ(ierr); 254747f7623dSRémi Lacroix if (type == NORM_2) { 254847f7623dSRémi Lacroix for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 254947f7623dSRémi Lacroix } 255047f7623dSRémi Lacroix PetscFunctionReturn(0); 255147f7623dSRémi Lacroix } 255247f7623dSRémi Lacroix 255347f7623dSRémi Lacroix #undef __FUNCT__ 2554bbead8a2SBarry Smith #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2555713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2556bbead8a2SBarry Smith { 2557bbead8a2SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2558bbead8a2SBarry Smith PetscErrorCode ierr; 2559bbead8a2SBarry Smith 2560bbead8a2SBarry Smith PetscFunctionBegin; 2561bbead8a2SBarry Smith ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2562bbead8a2SBarry Smith PetscFunctionReturn(0); 2563bbead8a2SBarry Smith } 2564bbead8a2SBarry Smith 25658c7482ecSBarry Smith 256679bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 25673964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2568cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 2569cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 2570cc2dc46cSBarry Smith MatMult_MPIBAIJ, 257197304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ, 25727c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 25737c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 2574cc2dc46cSBarry Smith 0, 2575cc2dc46cSBarry Smith 0, 2576cc2dc46cSBarry Smith 0, 257797304618SKris Buschelman /*10*/ 0, 2578cc2dc46cSBarry Smith 0, 2579cc2dc46cSBarry Smith 0, 2580b1a666ecSBarry Smith MatSOR_MPIBAIJ, 2581cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 258297304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ, 25837fc3c18eSBarry Smith MatEqual_MPIBAIJ, 2584cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 2585cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 2586cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 258797304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ, 2588cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 2589cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 2590cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 2591d519adbfSMatthew Knepley /*24*/ MatZeroRows_MPIBAIJ, 2592cc2dc46cSBarry Smith 0, 2593cc2dc46cSBarry Smith 0, 2594cc2dc46cSBarry Smith 0, 2595cc2dc46cSBarry Smith 0, 25964994cf47SJed Brown /*29*/ MatSetUp_MPIBAIJ, 2597273d9f13SBarry Smith 0, 2598cc2dc46cSBarry Smith 0, 2599cc2dc46cSBarry Smith 0, 2600cc2dc46cSBarry Smith 0, 2601d519adbfSMatthew Knepley /*34*/ MatDuplicate_MPIBAIJ, 2602cc2dc46cSBarry Smith 0, 2603cc2dc46cSBarry Smith 0, 2604cc2dc46cSBarry Smith 0, 2605cc2dc46cSBarry Smith 0, 2606d519adbfSMatthew Knepley /*39*/ MatAXPY_MPIBAIJ, 2607cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 2608cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 2609cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 26103c896bc6SHong Zhang MatCopy_MPIBAIJ, 2611d519adbfSMatthew Knepley /*44*/ 0, 2612cc2dc46cSBarry Smith MatScale_MPIBAIJ, 2613cc2dc46cSBarry Smith 0, 2614cc2dc46cSBarry Smith 0, 26156f0a72daSMatthew G. Knepley MatZeroRowsColumns_MPIBAIJ, 2616f73d5cc4SBarry Smith /*49*/ 0, 2617cc2dc46cSBarry Smith 0, 2618cc2dc46cSBarry Smith 0, 2619cc2dc46cSBarry Smith 0, 2620cc2dc46cSBarry Smith 0, 262193dfae19SHong Zhang /*54*/ MatFDColoringCreate_MPIXAIJ, 2622cc2dc46cSBarry Smith 0, 2623cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 262482094794SBarry Smith MatPermute_MPIBAIJ, 2625cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 2626d519adbfSMatthew Knepley /*59*/ MatGetSubMatrix_MPIBAIJ, 2627f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 2628f14a1c24SBarry Smith MatView_MPIBAIJ, 2629357abbc8SBarry Smith 0, 26307843d17aSBarry Smith 0, 2631d519adbfSMatthew Knepley /*64*/ 0, 26327843d17aSBarry Smith 0, 26337843d17aSBarry Smith 0, 26347843d17aSBarry Smith 0, 26357843d17aSBarry Smith 0, 2636d519adbfSMatthew Knepley /*69*/ MatGetRowMaxAbs_MPIBAIJ, 26377843d17aSBarry Smith 0, 263897304618SKris Buschelman 0, 263997304618SKris Buschelman 0, 264097304618SKris Buschelman 0, 2641d519adbfSMatthew Knepley /*74*/ 0, 2642f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 264397304618SKris Buschelman 0, 264497304618SKris Buschelman 0, 264597304618SKris Buschelman 0, 2646d519adbfSMatthew Knepley /*79*/ 0, 264797304618SKris Buschelman 0, 264897304618SKris Buschelman 0, 264997304618SKris Buschelman 0, 26505bba2384SShri Abhyankar MatLoad_MPIBAIJ, 2651d519adbfSMatthew Knepley /*84*/ 0, 2652865e5f61SKris Buschelman 0, 2653865e5f61SKris Buschelman 0, 2654865e5f61SKris Buschelman 0, 2655865e5f61SKris Buschelman 0, 2656d519adbfSMatthew Knepley /*89*/ 0, 2657865e5f61SKris Buschelman 0, 2658865e5f61SKris Buschelman 0, 2659865e5f61SKris Buschelman 0, 2660865e5f61SKris Buschelman 0, 2661d519adbfSMatthew Knepley /*94*/ 0, 2662865e5f61SKris Buschelman 0, 2663865e5f61SKris Buschelman 0, 266499cafbc1SBarry Smith 0, 266599cafbc1SBarry Smith 0, 2666d519adbfSMatthew Knepley /*99*/ 0, 266799cafbc1SBarry Smith 0, 266899cafbc1SBarry Smith 0, 266999cafbc1SBarry Smith 0, 267099cafbc1SBarry Smith 0, 2671d519adbfSMatthew Knepley /*104*/0, 267299cafbc1SBarry Smith MatRealPart_MPIBAIJ, 26738c7482ecSBarry Smith MatImaginaryPart_MPIBAIJ, 26748c7482ecSBarry Smith 0, 26758c7482ecSBarry Smith 0, 2676d519adbfSMatthew Knepley /*109*/0, 26778c7482ecSBarry Smith 0, 26788c7482ecSBarry Smith 0, 26798c7482ecSBarry Smith 0, 26808c7482ecSBarry Smith 0, 2681d1adec66SJed Brown /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 26828c7482ecSBarry Smith 0, 26834683f7a4SShri Abhyankar MatGetGhosts_MPIBAIJ, 26844683f7a4SShri Abhyankar 0, 26854683f7a4SShri Abhyankar 0, 26864683f7a4SShri Abhyankar /*119*/0, 26874683f7a4SShri Abhyankar 0, 26884683f7a4SShri Abhyankar 0, 2689bbead8a2SBarry Smith 0, 2690e8271787SHong Zhang MatGetMultiProcBlock_MPIBAIJ, 2691bbead8a2SBarry Smith /*124*/0, 269247f7623dSRémi Lacroix MatGetColumnNorms_MPIBAIJ, 26933964eb88SJed Brown MatInvertBlockDiagonal_MPIBAIJ, 26943964eb88SJed Brown 0, 26953964eb88SJed Brown 0, 26963964eb88SJed Brown /*129*/ 0, 26973964eb88SJed Brown 0, 26983964eb88SJed Brown 0, 26993964eb88SJed Brown 0, 27003964eb88SJed Brown 0, 27013964eb88SJed Brown /*134*/ 0, 27023964eb88SJed Brown 0, 27033964eb88SJed Brown 0, 27043964eb88SJed Brown 0, 27053964eb88SJed Brown 0, 27063964eb88SJed Brown /*139*/ 0, 2707f9426fe0SMark Adams 0, 27081919a2e2SJed Brown 0, 2709f86b9fbaSHong Zhang MatFDColoringSetUp_MPIXAIJ 27108c7482ecSBarry Smith }; 271179bdfe76SSatish Balay 27124a2ae208SSatish Balay #undef __FUNCT__ 27134a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 271411bd1e4dSLisandro Dalcin PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 27155ef9f2a5SBarry Smith { 27165ef9f2a5SBarry Smith PetscFunctionBegin; 27175ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ*)A->data)->A; 27185ef9f2a5SBarry Smith PetscFunctionReturn(0); 27195ef9f2a5SBarry Smith } 272079bdfe76SSatish Balay 27218cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2722d94109b8SHong Zhang 2723aac34f13SBarry Smith #undef __FUNCT__ 2724aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2725cf12db73SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2726aac34f13SBarry Smith { 2727b8d659d7SLisandro Dalcin PetscInt m,rstart,cstart,cend; 2728b8d659d7SLisandro Dalcin PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2729b8d659d7SLisandro Dalcin const PetscInt *JJ =0; 2730b8d659d7SLisandro Dalcin PetscScalar *values=0; 2731d47bf9aaSJed Brown PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2732aac34f13SBarry Smith PetscErrorCode ierr; 2733aac34f13SBarry Smith 2734aac34f13SBarry Smith PetscFunctionBegin; 273526283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 273626283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 273726283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 273826283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2739e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2740d0f46423SBarry Smith m = B->rmap->n/bs; 2741d0f46423SBarry Smith rstart = B->rmap->rstart/bs; 2742d0f46423SBarry Smith cstart = B->cmap->rstart/bs; 2743d0f46423SBarry Smith cend = B->cmap->rend/bs; 2744b8d659d7SLisandro Dalcin 2745e32f2f54SBarry Smith if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2746dcca6d9dSJed Brown ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2747aac34f13SBarry Smith for (i=0; i<m; i++) { 2748cf12db73SBarry Smith nz = ii[i+1] - ii[i]; 2749e32f2f54SBarry Smith if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2750b8d659d7SLisandro Dalcin nz_max = PetscMax(nz_max,nz); 2751cf12db73SBarry Smith JJ = jj + ii[i]; 2752b8d659d7SLisandro Dalcin for (j=0; j<nz; j++) { 2753aac34f13SBarry Smith if (*JJ >= cstart) break; 2754aac34f13SBarry Smith JJ++; 2755aac34f13SBarry Smith } 2756aac34f13SBarry Smith d = 0; 2757b8d659d7SLisandro Dalcin for (; j<nz; j++) { 2758aac34f13SBarry Smith if (*JJ++ >= cend) break; 2759aac34f13SBarry Smith d++; 2760aac34f13SBarry Smith } 2761aac34f13SBarry Smith d_nnz[i] = d; 2762b8d659d7SLisandro Dalcin o_nnz[i] = nz - d; 2763aac34f13SBarry Smith } 2764aac34f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2765fca92195SBarry Smith ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2766aac34f13SBarry Smith 2767b8d659d7SLisandro Dalcin values = (PetscScalar*)V; 2768b8d659d7SLisandro Dalcin if (!values) { 2769785e854fSJed Brown ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2770b8d659d7SLisandro Dalcin ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2771b8d659d7SLisandro Dalcin } 2772b8d659d7SLisandro Dalcin for (i=0; i<m; i++) { 2773b8d659d7SLisandro Dalcin PetscInt row = i + rstart; 2774cf12db73SBarry Smith PetscInt ncols = ii[i+1] - ii[i]; 2775cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 27763adadaf3SJed Brown if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2777cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2778b8d659d7SLisandro Dalcin ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 27793adadaf3SJed Brown } else { /* block ordering does not match so we can only insert one block at a time. */ 27803adadaf3SJed Brown PetscInt j; 27813adadaf3SJed Brown for (j=0; j<ncols; j++) { 27823adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 27833adadaf3SJed Brown ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 27843adadaf3SJed Brown } 27853adadaf3SJed Brown } 2786aac34f13SBarry Smith } 2787aac34f13SBarry Smith 2788b8d659d7SLisandro Dalcin if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2789aac34f13SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2790aac34f13SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 27917827cd58SJed Brown ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2792aac34f13SBarry Smith PetscFunctionReturn(0); 2793aac34f13SBarry Smith } 2794aac34f13SBarry Smith 2795aac34f13SBarry Smith #undef __FUNCT__ 2796aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2797aac34f13SBarry Smith /*@C 2798dfb205c3SBarry Smith MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2799aac34f13SBarry Smith (the default parallel PETSc format). 2800aac34f13SBarry Smith 2801aac34f13SBarry Smith Collective on MPI_Comm 2802aac34f13SBarry Smith 2803aac34f13SBarry Smith Input Parameters: 28041c4f3114SJed Brown + B - the matrix 2805dfb205c3SBarry Smith . bs - the block size 2806aac34f13SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2807aac34f13SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2808aac34f13SBarry Smith - v - optional values in the matrix 2809aac34f13SBarry Smith 2810aac34f13SBarry Smith Level: developer 2811aac34f13SBarry Smith 28123adadaf3SJed Brown Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 28133adadaf3SJed Brown may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 28143adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 28153adadaf3SJed Brown MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 28163adadaf3SJed Brown block column and the second index is over columns within a block. 28173adadaf3SJed Brown 2818aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 2819aac34f13SBarry Smith 28203adadaf3SJed Brown .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2821aac34f13SBarry Smith @*/ 28227087cfbeSBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2823aac34f13SBarry Smith { 28244ac538c5SBarry Smith PetscErrorCode ierr; 2825aac34f13SBarry Smith 2826aac34f13SBarry Smith PetscFunctionBegin; 28276ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 28286ba663aaSJed Brown PetscValidType(B,1); 28296ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 28304ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2831aac34f13SBarry Smith PetscFunctionReturn(0); 2832aac34f13SBarry Smith } 2833aac34f13SBarry Smith 28344a2ae208SSatish Balay #undef __FUNCT__ 2835a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2836b2573a8aSBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2837a23d5eceSKris Buschelman { 2838a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 2839dfbe8321SBarry Smith PetscErrorCode ierr; 2840535b19f3SBarry Smith PetscInt i; 2841a23d5eceSKris Buschelman 2842a23d5eceSKris Buschelman PetscFunctionBegin; 284333d57670SJed Brown ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 284426283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 284526283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2846e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2847899cda47SBarry Smith 2848a23d5eceSKris Buschelman if (d_nnz) { 2849d0f46423SBarry Smith for (i=0; i<B->rmap->n/bs; i++) { 2850e32f2f54SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2851a23d5eceSKris Buschelman } 2852a23d5eceSKris Buschelman } 2853a23d5eceSKris Buschelman if (o_nnz) { 2854d0f46423SBarry Smith for (i=0; i<B->rmap->n/bs; i++) { 2855e32f2f54SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2856a23d5eceSKris Buschelman } 2857a23d5eceSKris Buschelman } 2858a23d5eceSKris Buschelman 2859a23d5eceSKris Buschelman b = (Mat_MPIBAIJ*)B->data; 2860a23d5eceSKris Buschelman b->bs2 = bs*bs; 2861d0f46423SBarry Smith b->mbs = B->rmap->n/bs; 2862d0f46423SBarry Smith b->nbs = B->cmap->n/bs; 2863d0f46423SBarry Smith b->Mbs = B->rmap->N/bs; 2864d0f46423SBarry Smith b->Nbs = B->cmap->N/bs; 2865a23d5eceSKris Buschelman 2866a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2867d0f46423SBarry Smith b->rangebs[i] = B->rmap->range[i]/bs; 2868a23d5eceSKris Buschelman } 2869d0f46423SBarry Smith b->rstartbs = B->rmap->rstart/bs; 2870d0f46423SBarry Smith b->rendbs = B->rmap->rend/bs; 2871d0f46423SBarry Smith b->cstartbs = B->cmap->rstart/bs; 2872d0f46423SBarry Smith b->cendbs = B->cmap->rend/bs; 2873a23d5eceSKris Buschelman 2874526dfc15SBarry Smith if (!B->preallocated) { 2875f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2876d0f46423SBarry Smith ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 28779c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 28783bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2879f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2880d0f46423SBarry Smith ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 28819c097c71SKris Buschelman ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 28823bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2883ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2884526dfc15SBarry Smith } 2885a23d5eceSKris Buschelman 2886526dfc15SBarry Smith ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2887526dfc15SBarry Smith ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2888526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 2889a23d5eceSKris Buschelman PetscFunctionReturn(0); 2890a23d5eceSKris Buschelman } 2891a23d5eceSKris Buschelman 28927087cfbeSBarry Smith extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 28937087cfbeSBarry Smith extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 28945bf65638SKris Buschelman 289582094794SBarry Smith #undef __FUNCT__ 289682094794SBarry Smith #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 28978cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 289882094794SBarry Smith { 289982094794SBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 290082094794SBarry Smith PetscErrorCode ierr; 290182094794SBarry Smith Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 290282094794SBarry Smith PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 290382094794SBarry Smith const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 290482094794SBarry Smith 290582094794SBarry Smith PetscFunctionBegin; 2906785e854fSJed Brown ierr = PetscMalloc1((M+1),&ii);CHKERRQ(ierr); 290782094794SBarry Smith ii[0] = 0; 290882094794SBarry Smith for (i=0; i<M; i++) { 2909e32f2f54SBarry Smith if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2910e32f2f54SBarry Smith if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 291182094794SBarry Smith ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 29125ee9ba1cSJed Brown /* remove one from count of matrix has diagonal */ 29135ee9ba1cSJed Brown for (j=id[i]; j<id[i+1]; j++) { 29145ee9ba1cSJed Brown if (jd[j] == i) {ii[i+1]--;break;} 29155ee9ba1cSJed Brown } 291682094794SBarry Smith } 2917785e854fSJed Brown ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 291882094794SBarry Smith cnt = 0; 291982094794SBarry Smith for (i=0; i<M; i++) { 292082094794SBarry Smith for (j=io[i]; j<io[i+1]; j++) { 292182094794SBarry Smith if (garray[jo[j]] > rstart) break; 292282094794SBarry Smith jj[cnt++] = garray[jo[j]]; 292382094794SBarry Smith } 292482094794SBarry Smith for (k=id[i]; k<id[i+1]; k++) { 29255ee9ba1cSJed Brown if (jd[k] != i) { 292682094794SBarry Smith jj[cnt++] = rstart + jd[k]; 292782094794SBarry Smith } 29285ee9ba1cSJed Brown } 292982094794SBarry Smith for (; j<io[i+1]; j++) { 293082094794SBarry Smith jj[cnt++] = garray[jo[j]]; 293182094794SBarry Smith } 293282094794SBarry Smith } 2933ce94432eSBarry Smith ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 293482094794SBarry Smith PetscFunctionReturn(0); 293582094794SBarry Smith } 293682094794SBarry Smith 2937c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 293862471d69SBarry Smith 29398cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2940b2573a8aSBarry Smith 294162471d69SBarry Smith #undef __FUNCT__ 294262471d69SBarry Smith #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 29438cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 294462471d69SBarry Smith { 294562471d69SBarry Smith PetscErrorCode ierr; 294662471d69SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 294762471d69SBarry Smith Mat B; 294885a69837SSatish Balay Mat_MPIAIJ *b; 294962471d69SBarry Smith 295062471d69SBarry Smith PetscFunctionBegin; 2951ce94432eSBarry Smith if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 295262471d69SBarry Smith 2953ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 295462471d69SBarry Smith ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 29556d0a4a0eSHong Zhang ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 29560298fd71SBarry Smith ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 29570298fd71SBarry Smith ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 295862471d69SBarry Smith b = (Mat_MPIAIJ*) B->data; 295962471d69SBarry Smith 29606bf464f9SBarry Smith ierr = MatDestroy(&b->A);CHKERRQ(ierr); 29616bf464f9SBarry Smith ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2962ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 296362471d69SBarry Smith ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 296462471d69SBarry Smith ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 296562471d69SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 296662471d69SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 29676a719282SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 29686a719282SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 296962471d69SBarry Smith if (reuse == MAT_REUSE_MATRIX) { 297062471d69SBarry Smith ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 297162471d69SBarry Smith } else { 297262471d69SBarry Smith *newmat = B; 297362471d69SBarry Smith } 297462471d69SBarry Smith PetscFunctionReturn(0); 297562471d69SBarry Smith } 297662471d69SBarry Smith 2977450b117fSShri Abhyankar #if defined(PETSC_HAVE_MUMPS) 29788cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*); 2979450b117fSShri Abhyankar #endif 2980450b117fSShri Abhyankar 29810bad9183SKris Buschelman /*MC 2982fafad747SKris Buschelman MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 29830bad9183SKris Buschelman 29840bad9183SKris Buschelman Options Database Keys: 29858c07d4e3SBarry Smith + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 29868c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix 29878c07d4e3SBarry Smith - -mat_use_hash_table <fact> 29880bad9183SKris Buschelman 29890bad9183SKris Buschelman Level: beginner 29900bad9183SKris Buschelman 29910bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ 29920bad9183SKris Buschelman M*/ 29930bad9183SKris Buschelman 29948cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2995c0cdd4a1SDahai Guo 2996a23d5eceSKris Buschelman #undef __FUNCT__ 29974a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ" 29988cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2999273d9f13SBarry Smith { 3000273d9f13SBarry Smith Mat_MPIBAIJ *b; 3001dfbe8321SBarry Smith PetscErrorCode ierr; 3002ace3abfcSBarry Smith PetscBool flg; 3003273d9f13SBarry Smith 3004273d9f13SBarry Smith PetscFunctionBegin; 3005b00a9115SJed Brown ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 300682502324SSatish Balay B->data = (void*)b; 300782502324SSatish Balay 3008273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3009273d9f13SBarry Smith B->assembled = PETSC_FALSE; 3010273d9f13SBarry Smith 3011273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 3012ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3013ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3014273d9f13SBarry Smith 3015273d9f13SBarry Smith /* build local table of row and column ownerships */ 3016785e854fSJed Brown ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr); 3017273d9f13SBarry Smith 3018273d9f13SBarry Smith /* build cache for off array entries formed */ 3019ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 302026fbe8dcSKarl Rupp 3021273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 30220298fd71SBarry Smith b->colmap = NULL; 30230298fd71SBarry Smith b->garray = NULL; 3024273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 3025273d9f13SBarry Smith 3026273d9f13SBarry Smith /* stuff used in block assembly */ 3027273d9f13SBarry Smith b->barray = 0; 3028273d9f13SBarry Smith 3029273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 3030273d9f13SBarry Smith b->lvec = 0; 3031273d9f13SBarry Smith b->Mvctx = 0; 3032273d9f13SBarry Smith 3033273d9f13SBarry Smith /* stuff for MatGetRow() */ 3034273d9f13SBarry Smith b->rowindices = 0; 3035273d9f13SBarry Smith b->rowvalues = 0; 3036273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 3037273d9f13SBarry Smith 3038273d9f13SBarry Smith /* hash table stuff */ 3039273d9f13SBarry Smith b->ht = 0; 3040273d9f13SBarry Smith b->hd = 0; 3041273d9f13SBarry Smith b->ht_size = 0; 3042273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 3043273d9f13SBarry Smith b->ht_fact = 0; 3044273d9f13SBarry Smith b->ht_total_ct = 0; 3045273d9f13SBarry Smith b->ht_insert_ct = 0; 3046273d9f13SBarry Smith 30477a868f3eSHong Zhang /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 30487a868f3eSHong Zhang b->ijonly = PETSC_FALSE; 30497a868f3eSHong Zhang 3050ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 30510298fd71SBarry Smith ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr); 3052273d9f13SBarry Smith if (flg) { 3053f6275e2eSBarry Smith PetscReal fact = 1.39; 30544e0d8c25SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 30550298fd71SBarry Smith ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 3056273d9f13SBarry Smith if (fact <= 1.0) fact = 1.39; 3057273d9f13SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 30581e2582c4SBarry Smith ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 3059273d9f13SBarry Smith } 30608c07d4e3SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 30618c07d4e3SBarry Smith 3062450b117fSShri Abhyankar #if defined(PETSC_HAVE_MUMPS) 3063bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr); 3064450b117fSShri Abhyankar #endif 3065bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3066bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3067bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3068bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3069bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3070bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3071bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3072bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3073bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3074bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3075bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 307617667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 3077273d9f13SBarry Smith PetscFunctionReturn(0); 3078273d9f13SBarry Smith } 3079273d9f13SBarry Smith 3080209238afSKris Buschelman /*MC 3081002d173eSKris Buschelman MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3082209238afSKris Buschelman 3083209238afSKris Buschelman This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3084209238afSKris Buschelman and MATMPIBAIJ otherwise. 3085209238afSKris Buschelman 3086209238afSKris Buschelman Options Database Keys: 3087209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3088209238afSKris Buschelman 3089209238afSKris Buschelman Level: beginner 3090209238afSKris Buschelman 309169b1f4b7SBarry Smith .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3092209238afSKris Buschelman M*/ 3093209238afSKris Buschelman 30944a2ae208SSatish Balay #undef __FUNCT__ 30954a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3096273d9f13SBarry Smith /*@C 3097aac34f13SBarry Smith MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3098273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 3099273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 3100273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3101273d9f13SBarry Smith performance can be increased by more than a factor of 50. 3102273d9f13SBarry Smith 3103273d9f13SBarry Smith Collective on Mat 3104273d9f13SBarry Smith 3105273d9f13SBarry Smith Input Parameters: 31061c4f3114SJed Brown + B - the matrix 3107e8271787SHong Zhang . bs - size of block 3108273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 3109273d9f13SBarry Smith submatrix (same for all local rows) 3110273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 3111273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 31120298fd71SBarry Smith row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 311395742e49SBarry Smith set it even if it is zero. 3114273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3115273d9f13SBarry Smith submatrix (same for all local rows). 3116273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 3117273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 31180298fd71SBarry Smith each block row) or NULL. 3119273d9f13SBarry Smith 312049a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 3121273d9f13SBarry Smith 3122273d9f13SBarry Smith Options Database Keys: 31238c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 31248c07d4e3SBarry Smith - -mat_use_hash_table <fact> 3125273d9f13SBarry Smith 3126273d9f13SBarry Smith Notes: 3127273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3128273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 3129273d9f13SBarry Smith 3130273d9f13SBarry Smith Storage Information: 3131273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 3132273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 3133273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 3134273d9f13SBarry Smith local matrix (a rectangular submatrix). 3135273d9f13SBarry Smith 3136273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 3137273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 31380298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3139273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 3140273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3141273d9f13SBarry Smith 3142273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3143273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 3144273d9f13SBarry Smith 3145273d9f13SBarry Smith .vb 3146273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 3147a4b1a0f6SJed Brown -------------------------- 3148273d9f13SBarry Smith row 3 |o o o d d d o o o o o o 3149273d9f13SBarry Smith row 4 |o o o d d d o o o o o o 3150273d9f13SBarry Smith row 5 |o o o d d d o o o o o o 3151a4b1a0f6SJed Brown -------------------------- 3152273d9f13SBarry Smith .ve 3153273d9f13SBarry Smith 3154273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 3155273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 3156273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 3157273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 3158273d9f13SBarry Smith 3159273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3160273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 3161273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 3162273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3163273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 3164273d9f13SBarry Smith matrices. 3165273d9f13SBarry Smith 3166aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3167aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3168aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3169aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3170aa95bbe8SBarry Smith 3171273d9f13SBarry Smith Level: intermediate 3172273d9f13SBarry Smith 3173273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel 3174273d9f13SBarry Smith 3175ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3176273d9f13SBarry Smith @*/ 31777087cfbeSBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3178273d9f13SBarry Smith { 31794ac538c5SBarry Smith PetscErrorCode ierr; 3180273d9f13SBarry Smith 3181273d9f13SBarry Smith PetscFunctionBegin; 31826ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 31836ba663aaSJed Brown PetscValidType(B,1); 31846ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 31854ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3186273d9f13SBarry Smith PetscFunctionReturn(0); 3187273d9f13SBarry Smith } 3188273d9f13SBarry Smith 31894a2ae208SSatish Balay #undef __FUNCT__ 319069b1f4b7SBarry Smith #define __FUNCT__ "MatCreateBAIJ" 319179bdfe76SSatish Balay /*@C 319269b1f4b7SBarry Smith MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 319379bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 319479bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 319579bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 319679bdfe76SSatish Balay performance can be increased by more than a factor of 50. 319779bdfe76SSatish Balay 3198db81eaa0SLois Curfman McInnes Collective on MPI_Comm 3199db81eaa0SLois Curfman McInnes 320079bdfe76SSatish Balay Input Parameters: 3201db81eaa0SLois Curfman McInnes + comm - MPI communicator 320279bdfe76SSatish Balay . bs - size of blockk 320379bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 320492e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 320592e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 320692e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 320792e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 320892e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 3209be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3210be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 321147a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 321279bdfe76SSatish Balay submatrix (same for all local rows) 321347a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 321492e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 32150298fd71SBarry Smith row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 321695742e49SBarry Smith and set it even if it is zero. 321747a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 321879bdfe76SSatish Balay submatrix (same for all local rows). 321947a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 322092e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 32210298fd71SBarry Smith each block row) or NULL. 322279bdfe76SSatish Balay 322379bdfe76SSatish Balay Output Parameter: 322479bdfe76SSatish Balay . A - the matrix 322579bdfe76SSatish Balay 3226db81eaa0SLois Curfman McInnes Options Database Keys: 32278c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 32288c07d4e3SBarry Smith - -mat_use_hash_table <fact> 32293ffaccefSLois Curfman McInnes 3230175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3231ae1d86c5SBarry Smith MatXXXXSetPreallocation() paradgm instead of this routine directly. 3232175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3233175b88e8SBarry Smith 3234b259b22eSLois Curfman McInnes Notes: 323549a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 323649a6f317SBarry Smith 323747a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 323847a75d0bSBarry Smith 323979bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 324079bdfe76SSatish Balay (possibly both). 324179bdfe76SSatish Balay 3242be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3243be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 3244be79a94dSBarry Smith 324579bdfe76SSatish Balay Storage Information: 324679bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 324779bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 324879bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 324979bdfe76SSatish Balay local matrix (a rectangular submatrix). 325079bdfe76SSatish Balay 325179bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 325279bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 32530298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 325479bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 325579bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 325679bdfe76SSatish Balay 325779bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 325879bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 325979bdfe76SSatish Balay 3260db81eaa0SLois Curfman McInnes .vb 3261db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 3262a4b1a0f6SJed Brown -------------------------- 3263db81eaa0SLois Curfman McInnes row 3 |o o o d d d o o o o o o 3264db81eaa0SLois Curfman McInnes row 4 |o o o d d d o o o o o o 3265db81eaa0SLois Curfman McInnes row 5 |o o o d d d o o o o o o 3266a4b1a0f6SJed Brown -------------------------- 3267db81eaa0SLois Curfman McInnes .ve 326879bdfe76SSatish Balay 326979bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 327079bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 327179bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 327257b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 327379bdfe76SSatish Balay 3274d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3275d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 327679bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 327792e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 327892e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 32796da5968aSLois Curfman McInnes matrices. 328079bdfe76SSatish Balay 3281027ccd11SLois Curfman McInnes Level: intermediate 3282027ccd11SLois Curfman McInnes 328392e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 328479bdfe76SSatish Balay 328569b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 328679bdfe76SSatish Balay @*/ 328769b1f4b7SBarry Smith PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 328879bdfe76SSatish Balay { 32896849ba73SBarry Smith PetscErrorCode ierr; 3290b24ad042SBarry Smith PetscMPIInt size; 329179bdfe76SSatish Balay 3292d64ed03dSBarry Smith PetscFunctionBegin; 3293f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 3294f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3295d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3296273d9f13SBarry Smith if (size > 1) { 3297273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3298273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3299273d9f13SBarry Smith } else { 3300273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3301273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 33023914022bSBarry Smith } 33033a40ed3dSBarry Smith PetscFunctionReturn(0); 330479bdfe76SSatish Balay } 3305026e39d0SSatish Balay 33064a2ae208SSatish Balay #undef __FUNCT__ 33074a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ" 33086849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 33090ac07820SSatish Balay { 33100ac07820SSatish Balay Mat mat; 33110ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3312dfbe8321SBarry Smith PetscErrorCode ierr; 3313b24ad042SBarry Smith PetscInt len=0; 33140ac07820SSatish Balay 3315d64ed03dSBarry Smith PetscFunctionBegin; 33160ac07820SSatish Balay *newmat = 0; 3317ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3318d0f46423SBarry Smith ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 33197adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 33201d5dac46SHong Zhang ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 33217fff6886SHong Zhang 3322d5f3da31SBarry Smith mat->factortype = matin->factortype; 3323273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 33240ac07820SSatish Balay mat->assembled = PETSC_TRUE; 33257fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 33267fff6886SHong Zhang 3327273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 3328d0f46423SBarry Smith mat->rmap->bs = matin->rmap->bs; 33290ac07820SSatish Balay a->bs2 = oldmat->bs2; 33300ac07820SSatish Balay a->mbs = oldmat->mbs; 33310ac07820SSatish Balay a->nbs = oldmat->nbs; 33320ac07820SSatish Balay a->Mbs = oldmat->Mbs; 33330ac07820SSatish Balay a->Nbs = oldmat->Nbs; 33340ac07820SSatish Balay 33351e1e43feSBarry Smith ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 33361e1e43feSBarry Smith ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3337899cda47SBarry Smith 33380ac07820SSatish Balay a->size = oldmat->size; 33390ac07820SSatish Balay a->rank = oldmat->rank; 3340aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 3341aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 3342aef5e8e0SSatish Balay a->rowindices = 0; 33430ac07820SSatish Balay a->rowvalues = 0; 33440ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 334530793edcSSatish Balay a->barray = 0; 3346899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 3347899cda47SBarry Smith a->rendbs = oldmat->rendbs; 3348899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 3349899cda47SBarry Smith a->cendbs = oldmat->cendbs; 33500ac07820SSatish Balay 3351133cdb44SSatish Balay /* hash table stuff */ 3352133cdb44SSatish Balay a->ht = 0; 3353133cdb44SSatish Balay a->hd = 0; 3354133cdb44SSatish Balay a->ht_size = 0; 3355133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 335625fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 3357133cdb44SSatish Balay a->ht_total_ct = 0; 3358133cdb44SSatish Balay a->ht_insert_ct = 0; 3359133cdb44SSatish Balay 3360899cda47SBarry Smith ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 33610ac07820SSatish Balay if (oldmat->colmap) { 3362aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 33630f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 336448e59246SSatish Balay #else 3365785e854fSJed Brown ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr); 33663bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3367b24ad042SBarry Smith ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 336848e59246SSatish Balay #endif 33690ac07820SSatish Balay } else a->colmap = 0; 33704beb1cfeSHong Zhang 33710ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3372785e854fSJed Brown ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 33733bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3374b24ad042SBarry Smith ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 33750ac07820SSatish Balay } else a->garray = 0; 33760ac07820SSatish Balay 3377ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 33780ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 33793bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 33800ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 33813bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 33827fff6886SHong Zhang 33832e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 33843bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 33852e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 33863bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3387140e18c1SBarry Smith ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 33880ac07820SSatish Balay *newmat = mat; 33893a40ed3dSBarry Smith PetscFunctionReturn(0); 33900ac07820SSatish Balay } 339157b952d6SSatish Balay 33924a2ae208SSatish Balay #undef __FUNCT__ 33935bba2384SShri Abhyankar #define __FUNCT__ "MatLoad_MPIBAIJ" 3394112444f4SShri Abhyankar PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 33954683f7a4SShri Abhyankar { 33964683f7a4SShri Abhyankar PetscErrorCode ierr; 33974683f7a4SShri Abhyankar int fd; 33984683f7a4SShri Abhyankar PetscInt i,nz,j,rstart,rend; 33994683f7a4SShri Abhyankar PetscScalar *vals,*buf; 3400ce94432eSBarry Smith MPI_Comm comm; 34014683f7a4SShri Abhyankar MPI_Status status; 34024683f7a4SShri Abhyankar PetscMPIInt rank,size,maxnz; 34034683f7a4SShri Abhyankar PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 34040298fd71SBarry Smith PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 34053059b6faSBarry Smith PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 34064683f7a4SShri Abhyankar PetscMPIInt tag = ((PetscObject)viewer)->tag; 34070298fd71SBarry Smith PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 34084683f7a4SShri Abhyankar PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 34094683f7a4SShri Abhyankar 34104683f7a4SShri Abhyankar PetscFunctionBegin; 3411ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 34120298fd71SBarry Smith ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 34130298fd71SBarry Smith ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 34144683f7a4SShri Abhyankar ierr = PetscOptionsEnd();CHKERRQ(ierr); 34153059b6faSBarry Smith if (bs < 0) bs = 1; 34164683f7a4SShri Abhyankar 34174683f7a4SShri Abhyankar ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 34184683f7a4SShri Abhyankar ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 34194683f7a4SShri Abhyankar if (!rank) { 34204683f7a4SShri Abhyankar ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 34214683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 34224683f7a4SShri Abhyankar if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 34234683f7a4SShri Abhyankar } 34244683f7a4SShri Abhyankar 34254683f7a4SShri Abhyankar if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 34264683f7a4SShri Abhyankar 34274683f7a4SShri Abhyankar ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 34284683f7a4SShri Abhyankar M = header[1]; N = header[2]; 34294683f7a4SShri Abhyankar 34304683f7a4SShri Abhyankar /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 34314683f7a4SShri Abhyankar if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 34324683f7a4SShri Abhyankar if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 34334683f7a4SShri Abhyankar 34344683f7a4SShri Abhyankar /* If global sizes are set, check if they are consistent with that given in the file */ 34354683f7a4SShri Abhyankar if (sizesset) { 34364683f7a4SShri Abhyankar ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 34374683f7a4SShri Abhyankar } 3438abd38a8fSBarry Smith if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3439abd38a8fSBarry Smith if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 34404683f7a4SShri Abhyankar 3441ce94432eSBarry Smith if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 34424683f7a4SShri Abhyankar 34434683f7a4SShri Abhyankar /* 34444683f7a4SShri Abhyankar This code adds extra rows to make sure the number of rows is 34454683f7a4SShri Abhyankar divisible by the blocksize 34464683f7a4SShri Abhyankar */ 34474683f7a4SShri Abhyankar Mbs = M/bs; 34484683f7a4SShri Abhyankar extra_rows = bs - M + bs*Mbs; 34494683f7a4SShri Abhyankar if (extra_rows == bs) extra_rows = 0; 34504683f7a4SShri Abhyankar else Mbs++; 34514683f7a4SShri Abhyankar if (extra_rows && !rank) { 34524683f7a4SShri Abhyankar ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 34534683f7a4SShri Abhyankar } 34544683f7a4SShri Abhyankar 34554683f7a4SShri Abhyankar /* determine ownership of all rows */ 34564683f7a4SShri Abhyankar if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 34574683f7a4SShri Abhyankar mbs = Mbs/size + ((Mbs % size) > rank); 34584683f7a4SShri Abhyankar m = mbs*bs; 34594683f7a4SShri Abhyankar } else { /* User set */ 34604683f7a4SShri Abhyankar m = newmat->rmap->n; 34614683f7a4SShri Abhyankar mbs = m/bs; 34624683f7a4SShri Abhyankar } 3463dcca6d9dSJed Brown ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 34644683f7a4SShri Abhyankar ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 34654683f7a4SShri Abhyankar 34664683f7a4SShri Abhyankar /* process 0 needs enough room for process with most rows */ 34674683f7a4SShri Abhyankar if (!rank) { 34684683f7a4SShri Abhyankar mmax = rowners[1]; 34691251c579SMatthew G Knepley for (i=2; i<=size; i++) { 34704683f7a4SShri Abhyankar mmax = PetscMax(mmax,rowners[i]); 34714683f7a4SShri Abhyankar } 34724683f7a4SShri Abhyankar mmax*=bs; 34733964eb88SJed Brown } else mmax = -1; /* unused, but compiler warns anyway */ 34744683f7a4SShri Abhyankar 34754683f7a4SShri Abhyankar rowners[0] = 0; 34764683f7a4SShri Abhyankar for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 34774683f7a4SShri Abhyankar for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 34784683f7a4SShri Abhyankar rstart = rowners[rank]; 34794683f7a4SShri Abhyankar rend = rowners[rank+1]; 34804683f7a4SShri Abhyankar 34814683f7a4SShri Abhyankar /* distribute row lengths to all processors */ 3482785e854fSJed Brown ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 34834683f7a4SShri Abhyankar if (!rank) { 34844683f7a4SShri Abhyankar mend = m; 34854683f7a4SShri Abhyankar if (size == 1) mend = mend - extra_rows; 34864683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 34874683f7a4SShri Abhyankar for (j=mend; j<m; j++) locrowlens[j] = 1; 3488785e854fSJed Brown ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 34891795a4d1SJed Brown ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 34904683f7a4SShri Abhyankar for (j=0; j<m; j++) { 34914683f7a4SShri Abhyankar procsnz[0] += locrowlens[j]; 34924683f7a4SShri Abhyankar } 34934683f7a4SShri Abhyankar for (i=1; i<size; i++) { 34944683f7a4SShri Abhyankar mend = browners[i+1] - browners[i]; 34954683f7a4SShri Abhyankar if (i == size-1) mend = mend - extra_rows; 34964683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 34974683f7a4SShri Abhyankar for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 34984683f7a4SShri Abhyankar /* calculate the number of nonzeros on each processor */ 34994683f7a4SShri Abhyankar for (j=0; j<browners[i+1]-browners[i]; j++) { 35004683f7a4SShri Abhyankar procsnz[i] += rowlengths[j]; 35014683f7a4SShri Abhyankar } 35024683f7a4SShri Abhyankar ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 35034683f7a4SShri Abhyankar } 35044683f7a4SShri Abhyankar ierr = PetscFree(rowlengths);CHKERRQ(ierr); 35054683f7a4SShri Abhyankar } else { 35064683f7a4SShri Abhyankar ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 35074683f7a4SShri Abhyankar } 35084683f7a4SShri Abhyankar 35094683f7a4SShri Abhyankar if (!rank) { 35104683f7a4SShri Abhyankar /* determine max buffer needed and allocate it */ 35114683f7a4SShri Abhyankar maxnz = procsnz[0]; 35124683f7a4SShri Abhyankar for (i=1; i<size; i++) { 35134683f7a4SShri Abhyankar maxnz = PetscMax(maxnz,procsnz[i]); 35144683f7a4SShri Abhyankar } 3515785e854fSJed Brown ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 35164683f7a4SShri Abhyankar 35174683f7a4SShri Abhyankar /* read in my part of the matrix column indices */ 35184683f7a4SShri Abhyankar nz = procsnz[0]; 3519785e854fSJed Brown ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 35204683f7a4SShri Abhyankar mycols = ibuf; 35214683f7a4SShri Abhyankar if (size == 1) nz -= extra_rows; 35224683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 352326fbe8dcSKarl Rupp if (size == 1) { 352426fbe8dcSKarl Rupp for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 352526fbe8dcSKarl Rupp } 35264683f7a4SShri Abhyankar 35274683f7a4SShri Abhyankar /* read in every ones (except the last) and ship off */ 35284683f7a4SShri Abhyankar for (i=1; i<size-1; i++) { 35294683f7a4SShri Abhyankar nz = procsnz[i]; 35304683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 35314683f7a4SShri Abhyankar ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 35324683f7a4SShri Abhyankar } 35334683f7a4SShri Abhyankar /* read in the stuff for the last proc */ 35344683f7a4SShri Abhyankar if (size != 1) { 35354683f7a4SShri Abhyankar nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 35364683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 35374683f7a4SShri Abhyankar for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 35384683f7a4SShri Abhyankar ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 35394683f7a4SShri Abhyankar } 35404683f7a4SShri Abhyankar ierr = PetscFree(cols);CHKERRQ(ierr); 35414683f7a4SShri Abhyankar } else { 35424683f7a4SShri Abhyankar /* determine buffer space needed for message */ 35434683f7a4SShri Abhyankar nz = 0; 35444683f7a4SShri Abhyankar for (i=0; i<m; i++) { 35454683f7a4SShri Abhyankar nz += locrowlens[i]; 35464683f7a4SShri Abhyankar } 3547785e854fSJed Brown ierr = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr); 35484683f7a4SShri Abhyankar mycols = ibuf; 35494683f7a4SShri Abhyankar /* receive message of column indices*/ 35504683f7a4SShri Abhyankar ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 35514683f7a4SShri Abhyankar ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 35524683f7a4SShri Abhyankar if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 35534683f7a4SShri Abhyankar } 35544683f7a4SShri Abhyankar 35554683f7a4SShri Abhyankar /* loop over local rows, determining number of off diagonal entries */ 3556dcca6d9dSJed Brown ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 35571795a4d1SJed Brown ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 35584683f7a4SShri Abhyankar rowcount = 0; nzcount = 0; 35594683f7a4SShri Abhyankar for (i=0; i<mbs; i++) { 35604683f7a4SShri Abhyankar dcount = 0; 35614683f7a4SShri Abhyankar odcount = 0; 35624683f7a4SShri Abhyankar for (j=0; j<bs; j++) { 35634683f7a4SShri Abhyankar kmax = locrowlens[rowcount]; 35644683f7a4SShri Abhyankar for (k=0; k<kmax; k++) { 35654683f7a4SShri Abhyankar tmp = mycols[nzcount++]/bs; 35664683f7a4SShri Abhyankar if (!mask[tmp]) { 35674683f7a4SShri Abhyankar mask[tmp] = 1; 35684683f7a4SShri Abhyankar if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 35694683f7a4SShri Abhyankar else masked1[dcount++] = tmp; 35704683f7a4SShri Abhyankar } 35714683f7a4SShri Abhyankar } 35724683f7a4SShri Abhyankar rowcount++; 35734683f7a4SShri Abhyankar } 35744683f7a4SShri Abhyankar 35754683f7a4SShri Abhyankar dlens[i] = dcount; 35764683f7a4SShri Abhyankar odlens[i] = odcount; 35774683f7a4SShri Abhyankar 35784683f7a4SShri Abhyankar /* zero out the mask elements we set */ 35794683f7a4SShri Abhyankar for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 35804683f7a4SShri Abhyankar for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 35814683f7a4SShri Abhyankar } 35824683f7a4SShri Abhyankar 35834683f7a4SShri Abhyankar 35844683f7a4SShri Abhyankar if (!sizesset) { 35854683f7a4SShri Abhyankar ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 35864683f7a4SShri Abhyankar } 35874683f7a4SShri Abhyankar ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 35884683f7a4SShri Abhyankar 35894683f7a4SShri Abhyankar if (!rank) { 3590785e854fSJed Brown ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr); 35914683f7a4SShri Abhyankar /* read in my part of the matrix numerical values */ 35924683f7a4SShri Abhyankar nz = procsnz[0]; 35934683f7a4SShri Abhyankar vals = buf; 35944683f7a4SShri Abhyankar mycols = ibuf; 35954683f7a4SShri Abhyankar if (size == 1) nz -= extra_rows; 35964683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 359726fbe8dcSKarl Rupp if (size == 1) { 359826fbe8dcSKarl Rupp for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 359926fbe8dcSKarl Rupp } 36004683f7a4SShri Abhyankar 36014683f7a4SShri Abhyankar /* insert into matrix */ 36024683f7a4SShri Abhyankar jj = rstart*bs; 36034683f7a4SShri Abhyankar for (i=0; i<m; i++) { 36044683f7a4SShri Abhyankar ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 36054683f7a4SShri Abhyankar mycols += locrowlens[i]; 36064683f7a4SShri Abhyankar vals += locrowlens[i]; 36074683f7a4SShri Abhyankar jj++; 36084683f7a4SShri Abhyankar } 36094683f7a4SShri Abhyankar /* read in other processors (except the last one) and ship out */ 36104683f7a4SShri Abhyankar for (i=1; i<size-1; i++) { 36114683f7a4SShri Abhyankar nz = procsnz[i]; 36124683f7a4SShri Abhyankar vals = buf; 36134683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3614479e424cSMichael Lange ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 36154683f7a4SShri Abhyankar } 36164683f7a4SShri Abhyankar /* the last proc */ 36174683f7a4SShri Abhyankar if (size != 1) { 36184683f7a4SShri Abhyankar nz = procsnz[i] - extra_rows; 36194683f7a4SShri Abhyankar vals = buf; 36204683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 36214683f7a4SShri Abhyankar for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3622479e424cSMichael Lange ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 36234683f7a4SShri Abhyankar } 36244683f7a4SShri Abhyankar ierr = PetscFree(procsnz);CHKERRQ(ierr); 36254683f7a4SShri Abhyankar } else { 36264683f7a4SShri Abhyankar /* receive numeric values */ 3627785e854fSJed Brown ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr); 36284683f7a4SShri Abhyankar 36294683f7a4SShri Abhyankar /* receive message of values*/ 36304683f7a4SShri Abhyankar vals = buf; 36314683f7a4SShri Abhyankar mycols = ibuf; 3632479e424cSMichael Lange ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 36334683f7a4SShri Abhyankar 36344683f7a4SShri Abhyankar /* insert into matrix */ 36354683f7a4SShri Abhyankar jj = rstart*bs; 36364683f7a4SShri Abhyankar for (i=0; i<m; i++) { 36374683f7a4SShri Abhyankar ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 36384683f7a4SShri Abhyankar mycols += locrowlens[i]; 36394683f7a4SShri Abhyankar vals += locrowlens[i]; 36404683f7a4SShri Abhyankar jj++; 36414683f7a4SShri Abhyankar } 36424683f7a4SShri Abhyankar } 36434683f7a4SShri Abhyankar ierr = PetscFree(locrowlens);CHKERRQ(ierr); 36444683f7a4SShri Abhyankar ierr = PetscFree(buf);CHKERRQ(ierr); 36454683f7a4SShri Abhyankar ierr = PetscFree(ibuf);CHKERRQ(ierr); 36464683f7a4SShri Abhyankar ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 36474683f7a4SShri Abhyankar ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 36484683f7a4SShri Abhyankar ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 36494683f7a4SShri Abhyankar ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 36504683f7a4SShri Abhyankar ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 36514683f7a4SShri Abhyankar PetscFunctionReturn(0); 36524683f7a4SShri Abhyankar } 36534683f7a4SShri Abhyankar 36544683f7a4SShri Abhyankar #undef __FUNCT__ 36554a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3656133cdb44SSatish Balay /*@ 3657133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3658133cdb44SSatish Balay 3659133cdb44SSatish Balay Input Parameters: 3660133cdb44SSatish Balay . mat - the matrix 3661133cdb44SSatish Balay . fact - factor 3662133cdb44SSatish Balay 3663c5eb9154SBarry Smith Not Collective, each process can use a different factor 3664fee21e36SBarry Smith 36658c890885SBarry Smith Level: advanced 36668c890885SBarry Smith 3667133cdb44SSatish Balay Notes: 36688c07d4e3SBarry Smith This can also be set by the command line option: -mat_use_hash_table <fact> 3669133cdb44SSatish Balay 3670133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 3671133cdb44SSatish Balay 3672133cdb44SSatish Balay .seealso: MatSetOption() 3673133cdb44SSatish Balay @*/ 36747087cfbeSBarry Smith PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3675133cdb44SSatish Balay { 36764ac538c5SBarry Smith PetscErrorCode ierr; 36775bf65638SKris Buschelman 36785bf65638SKris Buschelman PetscFunctionBegin; 36794ac538c5SBarry Smith ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 36805bf65638SKris Buschelman PetscFunctionReturn(0); 36815bf65638SKris Buschelman } 36825bf65638SKris Buschelman 36835bf65638SKris Buschelman #undef __FUNCT__ 36845bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 36857087cfbeSBarry Smith PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 36865bf65638SKris Buschelman { 368725fdafccSSatish Balay Mat_MPIBAIJ *baij; 3688133cdb44SSatish Balay 3689133cdb44SSatish Balay PetscFunctionBegin; 3690133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 3691133cdb44SSatish Balay baij->ht_fact = fact; 3692133cdb44SSatish Balay PetscFunctionReturn(0); 3693133cdb44SSatish Balay } 3694f2a5309cSSatish Balay 36954a2ae208SSatish Balay #undef __FUNCT__ 36964a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 36979230625dSJed Brown PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3698f2a5309cSSatish Balay { 3699f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 37005fd66863SKarl Rupp 3701f2a5309cSSatish Balay PetscFunctionBegin; 370221e72a00SBarry Smith if (Ad) *Ad = a->A; 370321e72a00SBarry Smith if (Ao) *Ao = a->B; 370421e72a00SBarry Smith if (colmap) *colmap = a->garray; 3705f2a5309cSSatish Balay PetscFunctionReturn(0); 3706f2a5309cSSatish Balay } 370785535b8eSBarry Smith 370885535b8eSBarry Smith /* 370985535b8eSBarry Smith Special version for direct calls from Fortran (to eliminate two function call overheads 371085535b8eSBarry Smith */ 371185535b8eSBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 371285535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 371385535b8eSBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 371485535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 371585535b8eSBarry Smith #endif 371685535b8eSBarry Smith 371785535b8eSBarry Smith #undef __FUNCT__ 371885535b8eSBarry Smith #define __FUNCT__ "matmpibiajsetvaluesblocked" 371985535b8eSBarry Smith /*@C 372085535b8eSBarry Smith MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 372185535b8eSBarry Smith 372285535b8eSBarry Smith Collective on Mat 372385535b8eSBarry Smith 372485535b8eSBarry Smith Input Parameters: 372585535b8eSBarry Smith + mat - the matrix 372685535b8eSBarry Smith . min - number of input rows 372785535b8eSBarry Smith . im - input rows 372885535b8eSBarry Smith . nin - number of input columns 372985535b8eSBarry Smith . in - input columns 373085535b8eSBarry Smith . v - numerical values input 373185535b8eSBarry Smith - addvin - INSERT_VALUES or ADD_VALUES 373285535b8eSBarry Smith 373385535b8eSBarry Smith Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 373485535b8eSBarry Smith 373585535b8eSBarry Smith Level: advanced 373685535b8eSBarry Smith 373785535b8eSBarry Smith .seealso: MatSetValuesBlocked() 373885535b8eSBarry Smith @*/ 373985535b8eSBarry Smith PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 374085535b8eSBarry Smith { 374185535b8eSBarry Smith /* convert input arguments to C version */ 374285535b8eSBarry Smith Mat mat = *matin; 374385535b8eSBarry Smith PetscInt m = *min, n = *nin; 374485535b8eSBarry Smith InsertMode addv = *addvin; 374585535b8eSBarry Smith 374685535b8eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 374785535b8eSBarry Smith const MatScalar *value; 374885535b8eSBarry Smith MatScalar *barray = baij->barray; 3749ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 375085535b8eSBarry Smith PetscErrorCode ierr; 375185535b8eSBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 375285535b8eSBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3753d0f46423SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 375485535b8eSBarry Smith 375585535b8eSBarry Smith PetscFunctionBegin; 375685535b8eSBarry Smith /* tasks normally handled by MatSetValuesBlocked() */ 375726fbe8dcSKarl Rupp if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 375885535b8eSBarry Smith #if defined(PETSC_USE_DEBUG) 3759e7e72b3dSBarry Smith else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3760e32f2f54SBarry Smith if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 376185535b8eSBarry Smith #endif 376285535b8eSBarry Smith if (mat->assembled) { 376385535b8eSBarry Smith mat->was_assembled = PETSC_TRUE; 376485535b8eSBarry Smith mat->assembled = PETSC_FALSE; 376585535b8eSBarry Smith } 376685535b8eSBarry Smith ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 376785535b8eSBarry Smith 376885535b8eSBarry Smith 376985535b8eSBarry Smith if (!barray) { 3770785e854fSJed Brown ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 377185535b8eSBarry Smith baij->barray = barray; 377285535b8eSBarry Smith } 377385535b8eSBarry Smith 377426fbe8dcSKarl Rupp if (roworiented) stepval = (n-1)*bs; 377526fbe8dcSKarl Rupp else stepval = (m-1)*bs; 377626fbe8dcSKarl Rupp 377785535b8eSBarry Smith for (i=0; i<m; i++) { 377885535b8eSBarry Smith if (im[i] < 0) continue; 377985535b8eSBarry Smith #if defined(PETSC_USE_DEBUG) 3780e32f2f54SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 378185535b8eSBarry Smith #endif 378285535b8eSBarry Smith if (im[i] >= rstart && im[i] < rend) { 378385535b8eSBarry Smith row = im[i] - rstart; 378485535b8eSBarry Smith for (j=0; j<n; j++) { 378585535b8eSBarry Smith /* If NumCol = 1 then a copy is not required */ 378685535b8eSBarry Smith if ((roworiented) && (n == 1)) { 378785535b8eSBarry Smith barray = (MatScalar*)v + i*bs2; 378885535b8eSBarry Smith } else if ((!roworiented) && (m == 1)) { 378985535b8eSBarry Smith barray = (MatScalar*)v + j*bs2; 379085535b8eSBarry Smith } else { /* Here a copy is required */ 379185535b8eSBarry Smith if (roworiented) { 379285535b8eSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 379385535b8eSBarry Smith } else { 379485535b8eSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 379585535b8eSBarry Smith } 379685535b8eSBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 379785535b8eSBarry Smith for (jj=0; jj<bs; jj++) { 379885535b8eSBarry Smith *barray++ = *value++; 379985535b8eSBarry Smith } 380085535b8eSBarry Smith } 380185535b8eSBarry Smith barray -=bs2; 380285535b8eSBarry Smith } 380385535b8eSBarry Smith 380485535b8eSBarry Smith if (in[j] >= cstart && in[j] < cend) { 380585535b8eSBarry Smith col = in[j] - cstart; 380697e5c40aSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 380726fbe8dcSKarl Rupp } else if (in[j] < 0) continue; 380885535b8eSBarry Smith #if defined(PETSC_USE_DEBUG) 3809cb9801acSJed Brown else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 381085535b8eSBarry Smith #endif 381185535b8eSBarry Smith else { 381285535b8eSBarry Smith if (mat->was_assembled) { 381385535b8eSBarry Smith if (!baij->colmap) { 3814ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 381585535b8eSBarry Smith } 381685535b8eSBarry Smith 381785535b8eSBarry Smith #if defined(PETSC_USE_DEBUG) 381885535b8eSBarry Smith #if defined(PETSC_USE_CTABLE) 381985535b8eSBarry Smith { PetscInt data; 382085535b8eSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3821e32f2f54SBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 382285535b8eSBarry Smith } 382385535b8eSBarry Smith #else 3824e32f2f54SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 382585535b8eSBarry Smith #endif 382685535b8eSBarry Smith #endif 382785535b8eSBarry Smith #if defined(PETSC_USE_CTABLE) 382885535b8eSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 382985535b8eSBarry Smith col = (col - 1)/bs; 383085535b8eSBarry Smith #else 383185535b8eSBarry Smith col = (baij->colmap[in[j]] - 1)/bs; 383285535b8eSBarry Smith #endif 383385535b8eSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3834ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 383585535b8eSBarry Smith col = in[j]; 383685535b8eSBarry Smith } 383726fbe8dcSKarl Rupp } else col = in[j]; 383897e5c40aSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 383985535b8eSBarry Smith } 384085535b8eSBarry Smith } 384185535b8eSBarry Smith } else { 384285535b8eSBarry Smith if (!baij->donotstash) { 384385535b8eSBarry Smith if (roworiented) { 384485535b8eSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 384585535b8eSBarry Smith } else { 384685535b8eSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 384785535b8eSBarry Smith } 384885535b8eSBarry Smith } 384985535b8eSBarry Smith } 385085535b8eSBarry Smith } 385185535b8eSBarry Smith 385285535b8eSBarry Smith /* task normally handled by MatSetValuesBlocked() */ 385385535b8eSBarry Smith ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 385485535b8eSBarry Smith PetscFunctionReturn(0); 385585535b8eSBarry Smith } 3856dfb205c3SBarry Smith 3857dfb205c3SBarry Smith #undef __FUNCT__ 3858dfb205c3SBarry Smith #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3859dfb205c3SBarry Smith /*@ 3860dfb205c3SBarry Smith MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3861dfb205c3SBarry Smith CSR format the local rows. 3862dfb205c3SBarry Smith 3863dfb205c3SBarry Smith Collective on MPI_Comm 3864dfb205c3SBarry Smith 3865dfb205c3SBarry Smith Input Parameters: 3866dfb205c3SBarry Smith + comm - MPI communicator 3867dfb205c3SBarry Smith . bs - the block size, only a block size of 1 is supported 3868dfb205c3SBarry Smith . m - number of local rows (Cannot be PETSC_DECIDE) 3869dfb205c3SBarry Smith . n - This value should be the same as the local size used in creating the 3870dfb205c3SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3871dfb205c3SBarry Smith calculated if N is given) For square matrices n is almost always m. 3872dfb205c3SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3873dfb205c3SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3874dfb205c3SBarry Smith . i - row indices 3875dfb205c3SBarry Smith . j - column indices 3876dfb205c3SBarry Smith - a - matrix values 3877dfb205c3SBarry Smith 3878dfb205c3SBarry Smith Output Parameter: 3879dfb205c3SBarry Smith . mat - the matrix 3880dfb205c3SBarry Smith 3881dfb205c3SBarry Smith Level: intermediate 3882dfb205c3SBarry Smith 3883dfb205c3SBarry Smith Notes: 3884dfb205c3SBarry Smith The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3885dfb205c3SBarry Smith thus you CANNOT change the matrix entries by changing the values of a[] after you have 3886dfb205c3SBarry Smith called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3887dfb205c3SBarry Smith 38883adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 38893adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 38903adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 38913adadaf3SJed Brown with column-major ordering within blocks. 38923adadaf3SJed Brown 3893dfb205c3SBarry Smith The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3894dfb205c3SBarry Smith 3895dfb205c3SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 3896dfb205c3SBarry Smith 3897dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 389869b1f4b7SBarry Smith MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3899dfb205c3SBarry Smith @*/ 39007087cfbeSBarry Smith PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3901dfb205c3SBarry Smith { 3902dfb205c3SBarry Smith PetscErrorCode ierr; 3903dfb205c3SBarry Smith 3904dfb205c3SBarry Smith PetscFunctionBegin; 3905f23aa3ddSBarry Smith if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3906dfb205c3SBarry Smith if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3907dfb205c3SBarry Smith ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3908dfb205c3SBarry Smith ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3909dfb205c3SBarry Smith ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3910d47bf9aaSJed Brown ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3911dfb205c3SBarry Smith ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3912d47bf9aaSJed Brown ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3913dfb205c3SBarry Smith PetscFunctionReturn(0); 3914dfb205c3SBarry Smith } 3915