179bdfe76SSatish Balay 2c6db04a5SJed Brown #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 3c5d9258eSSatish Balay 4c6db04a5SJed Brown #include <petscblaslapack.h> 565a92638SMatthew G. Knepley #include <petscsf.h> 679bdfe76SSatish Balay 74a2ae208SSatish Balay #undef __FUNCT__ 8985db425SBarry Smith #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ" 9985db425SBarry Smith PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[]) 107843d17aSBarry Smith { 117843d17aSBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 12dfbe8321SBarry Smith PetscErrorCode ierr; 13985db425SBarry Smith PetscInt i,*idxb = 0; 1487828ca2SBarry Smith PetscScalar *va,*vb; 157843d17aSBarry Smith Vec vtmp; 167843d17aSBarry Smith 177843d17aSBarry Smith PetscFunctionBegin; 18985db425SBarry Smith ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 191ebc52fbSHong Zhang ierr = VecGetArray(v,&va);CHKERRQ(ierr); 20985db425SBarry Smith if (idx) { 2126fbe8dcSKarl Rupp for (i=0; i<A->rmap->n; i++) { 2226fbe8dcSKarl Rupp if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2326fbe8dcSKarl Rupp } 24985db425SBarry Smith } 257843d17aSBarry Smith 26d0f46423SBarry Smith ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 27785e854fSJed Brown if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);} 28985db425SBarry Smith ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 291ebc52fbSHong Zhang ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 307843d17aSBarry Smith 31d0f46423SBarry Smith for (i=0; i<A->rmap->n; i++) { 3226fbe8dcSKarl Rupp if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 3326fbe8dcSKarl Rupp va[i] = vb[i]; 3426fbe8dcSKarl Rupp if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs); 3526fbe8dcSKarl Rupp } 367843d17aSBarry Smith } 377843d17aSBarry Smith 381ebc52fbSHong Zhang ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 391ebc52fbSHong Zhang ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 40c31cb41cSBarry Smith ierr = PetscFree(idxb);CHKERRQ(ierr); 416bf464f9SBarry Smith ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 427843d17aSBarry Smith PetscFunctionReturn(0); 437843d17aSBarry Smith } 447843d17aSBarry Smith 454a2ae208SSatish Balay #undef __FUNCT__ 464a2ae208SSatish Balay #define __FUNCT__ "MatStoreValues_MPIBAIJ" 477087cfbeSBarry Smith PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 487fc3c18eSBarry Smith { 497fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 50dfbe8321SBarry Smith PetscErrorCode ierr; 517fc3c18eSBarry Smith 527fc3c18eSBarry Smith PetscFunctionBegin; 537fc3c18eSBarry Smith ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 547fc3c18eSBarry Smith ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 557fc3c18eSBarry Smith PetscFunctionReturn(0); 567fc3c18eSBarry Smith } 577fc3c18eSBarry Smith 584a2ae208SSatish Balay #undef __FUNCT__ 594a2ae208SSatish Balay #define __FUNCT__ "MatRetrieveValues_MPIBAIJ" 607087cfbeSBarry Smith PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 617fc3c18eSBarry Smith { 627fc3c18eSBarry Smith Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)mat->data; 63dfbe8321SBarry Smith PetscErrorCode ierr; 647fc3c18eSBarry Smith 657fc3c18eSBarry Smith PetscFunctionBegin; 667fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 677fc3c18eSBarry Smith ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 687fc3c18eSBarry Smith PetscFunctionReturn(0); 697fc3c18eSBarry Smith } 707fc3c18eSBarry Smith 71537820f0SBarry Smith /* 72537820f0SBarry Smith Local utility routine that creates a mapping from the global column 7357b952d6SSatish Balay number to the local number in the off-diagonal part of the local 74e06f6af7SJed Brown storage of the matrix. This is done in a non scalable way since the 7557b952d6SSatish Balay length of colmap equals the global matrix length. 7657b952d6SSatish Balay */ 774a2ae208SSatish Balay #undef __FUNCT__ 78ab9863d7SBarry Smith #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private" 79ab9863d7SBarry Smith PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 8057b952d6SSatish Balay { 8157b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 8257b952d6SSatish Balay Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 836849ba73SBarry Smith PetscErrorCode ierr; 84d0f46423SBarry Smith PetscInt nbs = B->nbs,i,bs=mat->rmap->bs; 8557b952d6SSatish Balay 86d64ed03dSBarry Smith PetscFunctionBegin; 87aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 88e23dfa41SBarry Smith ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 8948e59246SSatish Balay for (i=0; i<nbs; i++) { 903861aac3SJed Brown ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr); 9148e59246SSatish Balay } 9248e59246SSatish Balay #else 93854ce69bSBarry Smith ierr = PetscMalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr); 943bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 95b24ad042SBarry Smith ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr); 96928fc39bSSatish Balay for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1; 9748e59246SSatish Balay #endif 983a40ed3dSBarry Smith PetscFunctionReturn(0); 9957b952d6SSatish Balay } 10057b952d6SSatish Balay 101f5e9677aSSatish Balay #define MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \ 10280c1aa95SSatish Balay { \ 10380c1aa95SSatish Balay \ 10480c1aa95SSatish Balay brow = row/bs; \ 10580c1aa95SSatish Balay rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; \ 106ac7a638eSSatish Balay rmax = aimax[brow]; nrow = ailen[brow]; \ 10780c1aa95SSatish Balay bcol = col/bs; \ 10880c1aa95SSatish Balay ridx = row % bs; cidx = col % bs; \ 109ab26458aSBarry Smith low = 0; high = nrow; \ 110ab26458aSBarry Smith while (high-low > 3) { \ 111ab26458aSBarry Smith t = (low+high)/2; \ 112ab26458aSBarry Smith if (rp[t] > bcol) high = t; \ 113ab26458aSBarry Smith else low = t; \ 114ab26458aSBarry Smith } \ 115ab26458aSBarry Smith for (_i=low; _i<high; _i++) { \ 11680c1aa95SSatish Balay if (rp[_i] > bcol) break; \ 11780c1aa95SSatish Balay if (rp[_i] == bcol) { \ 11880c1aa95SSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 119eada6651SSatish Balay if (addv == ADD_VALUES) *bap += value; \ 120eada6651SSatish Balay else *bap = value; \ 121ac7a638eSSatish Balay goto a_noinsert; \ 12280c1aa95SSatish Balay } \ 12380c1aa95SSatish Balay } \ 12489280ab3SLois Curfman McInnes if (a->nonew == 1) goto a_noinsert; \ 125e32f2f54SBarry Smith if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 126fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \ 12780c1aa95SSatish Balay N = nrow++ - 1; \ 12880c1aa95SSatish Balay /* shift up all the later entries in this row */ \ 12980c1aa95SSatish Balay for (ii=N; ii>=_i; ii--) { \ 13080c1aa95SSatish Balay rp[ii+1] = rp[ii]; \ 1313eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 13280c1aa95SSatish Balay } \ 1333eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); } \ 13480c1aa95SSatish Balay rp[_i] = bcol; \ 13580c1aa95SSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 136ac7a638eSSatish Balay a_noinsert:; \ 13780c1aa95SSatish Balay ailen[brow] = nrow; \ 13880c1aa95SSatish Balay } 13957b952d6SSatish Balay 140ac7a638eSSatish Balay #define MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \ 141ac7a638eSSatish Balay { \ 142ac7a638eSSatish Balay brow = row/bs; \ 143ac7a638eSSatish Balay rp = bj + bi[brow]; ap = ba + bs2*bi[brow]; \ 144ac7a638eSSatish Balay rmax = bimax[brow]; nrow = bilen[brow]; \ 145ac7a638eSSatish Balay bcol = col/bs; \ 146ac7a638eSSatish Balay ridx = row % bs; cidx = col % bs; \ 147ac7a638eSSatish Balay low = 0; high = nrow; \ 148ac7a638eSSatish Balay while (high-low > 3) { \ 149ac7a638eSSatish Balay t = (low+high)/2; \ 150ac7a638eSSatish Balay if (rp[t] > bcol) high = t; \ 151ac7a638eSSatish Balay else low = t; \ 152ac7a638eSSatish Balay } \ 153ac7a638eSSatish Balay for (_i=low; _i<high; _i++) { \ 154ac7a638eSSatish Balay if (rp[_i] > bcol) break; \ 155ac7a638eSSatish Balay if (rp[_i] == bcol) { \ 156ac7a638eSSatish Balay bap = ap + bs2*_i + bs*cidx + ridx; \ 157ac7a638eSSatish Balay if (addv == ADD_VALUES) *bap += value; \ 158ac7a638eSSatish Balay else *bap = value; \ 159ac7a638eSSatish Balay goto b_noinsert; \ 160ac7a638eSSatish Balay } \ 161ac7a638eSSatish Balay } \ 16289280ab3SLois Curfman McInnes if (b->nonew == 1) goto b_noinsert; \ 163e32f2f54SBarry Smith if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \ 164fef13f97SBarry Smith MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \ 165ac7a638eSSatish Balay N = nrow++ - 1; \ 166ac7a638eSSatish Balay /* shift up all the later entries in this row */ \ 167ac7a638eSSatish Balay for (ii=N; ii>=_i; ii--) { \ 168ac7a638eSSatish Balay rp[ii+1] = rp[ii]; \ 1693eda8832SBarry Smith ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \ 170ac7a638eSSatish Balay } \ 1713eda8832SBarry Smith if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);} \ 172ac7a638eSSatish Balay rp[_i] = bcol; \ 173ac7a638eSSatish Balay ap[bs2*_i + bs*cidx + ridx] = value; \ 174ac7a638eSSatish Balay b_noinsert:; \ 175ac7a638eSSatish Balay bilen[brow] = nrow; \ 176ac7a638eSSatish Balay } 177ac7a638eSSatish Balay 1784a2ae208SSatish Balay #undef __FUNCT__ 1794a2ae208SSatish Balay #define __FUNCT__ "MatSetValues_MPIBAIJ" 180b24ad042SBarry Smith PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 18157b952d6SSatish Balay { 18257b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 18393fea6afSBarry Smith MatScalar value; 184ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 185dfbe8321SBarry Smith PetscErrorCode ierr; 186b24ad042SBarry Smith PetscInt i,j,row,col; 187d0f46423SBarry Smith PetscInt rstart_orig=mat->rmap->rstart; 188d0f46423SBarry Smith PetscInt rend_orig =mat->rmap->rend,cstart_orig=mat->cmap->rstart; 189d0f46423SBarry Smith PetscInt cend_orig =mat->cmap->rend,bs=mat->rmap->bs; 19057b952d6SSatish Balay 191eada6651SSatish Balay /* Some Variables required in the macro */ 19280c1aa95SSatish Balay Mat A = baij->A; 19380c1aa95SSatish Balay Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data; 194b24ad042SBarry Smith PetscInt *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j; 1953eda8832SBarry Smith MatScalar *aa =a->a; 196ac7a638eSSatish Balay 197ac7a638eSSatish Balay Mat B = baij->B; 198ac7a638eSSatish Balay Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data; 199b24ad042SBarry Smith PetscInt *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j; 2003eda8832SBarry Smith MatScalar *ba =b->a; 201ac7a638eSSatish Balay 202b24ad042SBarry Smith PetscInt *rp,ii,nrow,_i,rmax,N,brow,bcol; 203b24ad042SBarry Smith PetscInt low,high,t,ridx,cidx,bs2=a->bs2; 2043eda8832SBarry Smith MatScalar *ap,*bap; 20580c1aa95SSatish Balay 206d64ed03dSBarry Smith PetscFunctionBegin; 20757b952d6SSatish Balay for (i=0; i<m; i++) { 2085ef9f2a5SBarry Smith if (im[i] < 0) continue; 2092515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 210e32f2f54SBarry Smith if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 211639f9d9dSBarry Smith #endif 21257b952d6SSatish Balay if (im[i] >= rstart_orig && im[i] < rend_orig) { 21357b952d6SSatish Balay row = im[i] - rstart_orig; 21457b952d6SSatish Balay for (j=0; j<n; j++) { 21557b952d6SSatish Balay if (in[j] >= cstart_orig && in[j] < cend_orig) { 21657b952d6SSatish Balay col = in[j] - cstart_orig; 217db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 218db4deed7SKarl Rupp else value = v[i+j*m]; 219f5e9677aSSatish Balay MatSetValues_SeqBAIJ_A_Private(row,col,value,addv); 22080c1aa95SSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 22173959e64SBarry Smith } else if (in[j] < 0) continue; 2222515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 223660746e0SBarry Smith else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 224639f9d9dSBarry Smith #endif 22557b952d6SSatish Balay else { 22657b952d6SSatish Balay if (mat->was_assembled) { 227905e6a2fSBarry Smith if (!baij->colmap) { 228ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 229905e6a2fSBarry Smith } 230aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 2310f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr); 232bba1ac68SSatish Balay col = col - 1; 23348e59246SSatish Balay #else 234bba1ac68SSatish Balay col = baij->colmap[in[j]/bs] - 1; 23548e59246SSatish Balay #endif 236c9ef50b2SBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 237ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 2388295de27SSatish Balay col = in[j]; 2399bf004c3SSatish Balay /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 2409bf004c3SSatish Balay B = baij->B; 2419bf004c3SSatish Balay b = (Mat_SeqBAIJ*)(B)->data; 2429bf004c3SSatish Balay bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j; 2439bf004c3SSatish Balay ba =b->a; 244c9ef50b2SBarry Smith } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]); 245c9ef50b2SBarry Smith else col += in[j]%bs; 2468295de27SSatish Balay } else col = in[j]; 247db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 248db4deed7SKarl Rupp else value = v[i+j*m]; 24990da58bdSSatish Balay MatSetValues_SeqBAIJ_B_Private(row,col,value,addv); 25090da58bdSSatish Balay /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */ 25157b952d6SSatish Balay } 25257b952d6SSatish Balay } 253d64ed03dSBarry Smith } else { 2544cb17eb5SBarry Smith if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 25590f02eecSBarry Smith if (!baij->donotstash) { 2565080c13bSMatthew G Knepley mat->assembled = PETSC_FALSE; 257ff2fd236SBarry Smith if (roworiented) { 258b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 259ff2fd236SBarry Smith } else { 260b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 26157b952d6SSatish Balay } 26257b952d6SSatish Balay } 26357b952d6SSatish Balay } 26490f02eecSBarry Smith } 2653a40ed3dSBarry Smith PetscFunctionReturn(0); 26657b952d6SSatish Balay } 26757b952d6SSatish Balay 2684a2ae208SSatish Balay #undef __FUNCT__ 26997e5c40aSBarry Smith #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ" 27097e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 271ab26458aSBarry Smith { 272ab26458aSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 273dd6ea824SBarry Smith const PetscScalar *value; 274f15d580aSBarry Smith MatScalar *barray = baij->barray; 275ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 276dfbe8321SBarry Smith PetscErrorCode ierr; 277899cda47SBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 278899cda47SBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 279d0f46423SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 280ab26458aSBarry Smith 281b16ae2b1SBarry Smith PetscFunctionBegin; 28230793edcSSatish Balay if (!barray) { 283785e854fSJed Brown ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 28482502324SSatish Balay baij->barray = barray; 28530793edcSSatish Balay } 28630793edcSSatish Balay 28726fbe8dcSKarl Rupp if (roworiented) stepval = (n-1)*bs; 28826fbe8dcSKarl Rupp else stepval = (m-1)*bs; 28926fbe8dcSKarl Rupp 290ab26458aSBarry Smith for (i=0; i<m; i++) { 2915ef9f2a5SBarry Smith if (im[i] < 0) continue; 2922515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 293e32f2f54SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 294ab26458aSBarry Smith #endif 295ab26458aSBarry Smith if (im[i] >= rstart && im[i] < rend) { 296ab26458aSBarry Smith row = im[i] - rstart; 297ab26458aSBarry Smith for (j=0; j<n; j++) { 29815b57d14SSatish Balay /* If NumCol = 1 then a copy is not required */ 29915b57d14SSatish Balay if ((roworiented) && (n == 1)) { 300f15d580aSBarry Smith barray = (MatScalar*)v + i*bs2; 30115b57d14SSatish Balay } else if ((!roworiented) && (m == 1)) { 302f15d580aSBarry Smith barray = (MatScalar*)v + j*bs2; 30315b57d14SSatish Balay } else { /* Here a copy is required */ 304ab26458aSBarry Smith if (roworiented) { 30553ef36baSBarry Smith value = v + (i*(stepval+bs) + j)*bs; 306ab26458aSBarry Smith } else { 30753ef36baSBarry Smith value = v + (j*(stepval+bs) + i)*bs; 308abef11f7SSatish Balay } 30953ef36baSBarry Smith for (ii=0; ii<bs; ii++,value+=bs+stepval) { 31026fbe8dcSKarl Rupp for (jj=0; jj<bs; jj++) barray[jj] = value[jj]; 31153ef36baSBarry Smith barray += bs; 31247513183SBarry Smith } 31330793edcSSatish Balay barray -= bs2; 31415b57d14SSatish Balay } 315abef11f7SSatish Balay 316abef11f7SSatish Balay if (in[j] >= cstart && in[j] < cend) { 317abef11f7SSatish Balay col = in[j] - cstart; 31897e5c40aSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 31926fbe8dcSKarl Rupp } else if (in[j] < 0) continue; 3202515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 321cb9801acSJed Brown else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 322ab26458aSBarry Smith #endif 323ab26458aSBarry Smith else { 324ab26458aSBarry Smith if (mat->was_assembled) { 325ab26458aSBarry Smith if (!baij->colmap) { 326ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 327ab26458aSBarry Smith } 328a5eb4965SSatish Balay 3292515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 330aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 331b24ad042SBarry Smith { PetscInt data; 3320f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 333e32f2f54SBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 334fa46199cSSatish Balay } 33548e59246SSatish Balay #else 336e32f2f54SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 337a5eb4965SSatish Balay #endif 33848e59246SSatish Balay #endif 339aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 3400f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 341fa46199cSSatish Balay col = (col - 1)/bs; 34248e59246SSatish Balay #else 343a5eb4965SSatish Balay col = (baij->colmap[in[j]] - 1)/bs; 34448e59246SSatish Balay #endif 3450e9bae81SBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) { 346ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 347ab26458aSBarry Smith col = in[j]; 3480e9bae81SBarry Smith } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]); 349db4deed7SKarl Rupp } else col = in[j]; 35097e5c40aSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 351ab26458aSBarry Smith } 352ab26458aSBarry Smith } 353d64ed03dSBarry Smith } else { 3544cb17eb5SBarry Smith if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 355ab26458aSBarry Smith if (!baij->donotstash) { 356ff2fd236SBarry Smith if (roworiented) { 3576fa18ffdSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 358ff2fd236SBarry Smith } else { 3596fa18ffdSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 360ff2fd236SBarry Smith } 361abef11f7SSatish Balay } 362ab26458aSBarry Smith } 363ab26458aSBarry Smith } 3643a40ed3dSBarry Smith PetscFunctionReturn(0); 365ab26458aSBarry Smith } 3666fa18ffdSBarry Smith 3670bdbc534SSatish Balay #define HASH_KEY 0.6180339887 368b24ad042SBarry Smith #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp))) 369b24ad042SBarry Smith /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 370b24ad042SBarry Smith /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 3714a2ae208SSatish Balay #undef __FUNCT__ 37297e5c40aSBarry Smith #define __FUNCT__ "MatSetValues_MPIBAIJ_HT" 37397e5c40aSBarry Smith PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 3740bdbc534SSatish Balay { 3750bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 376ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 377dfbe8321SBarry Smith PetscErrorCode ierr; 378b24ad042SBarry Smith PetscInt i,j,row,col; 379d0f46423SBarry Smith PetscInt rstart_orig=mat->rmap->rstart; 380d0f46423SBarry Smith PetscInt rend_orig =mat->rmap->rend,Nbs=baij->Nbs; 381d0f46423SBarry Smith PetscInt h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx; 382329f5518SBarry Smith PetscReal tmp; 3833eda8832SBarry Smith MatScalar **HD = baij->hd,value; 3842515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 385b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 3864a15367fSSatish Balay #endif 3870bdbc534SSatish Balay 3880bdbc534SSatish Balay PetscFunctionBegin; 3890bdbc534SSatish Balay for (i=0; i<m; i++) { 3902515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 391e32f2f54SBarry Smith if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); 392e32f2f54SBarry Smith if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 3930bdbc534SSatish Balay #endif 3940bdbc534SSatish Balay row = im[i]; 395c2760754SSatish Balay if (row >= rstart_orig && row < rend_orig) { 3960bdbc534SSatish Balay for (j=0; j<n; j++) { 3970bdbc534SSatish Balay col = in[j]; 398db4deed7SKarl Rupp if (roworiented) value = v[i*n+j]; 399db4deed7SKarl Rupp else value = v[i+j*m]; 400b24ad042SBarry Smith /* Look up PetscInto the Hash Table */ 401c2760754SSatish Balay key = (row/bs)*Nbs+(col/bs)+1; 402c2760754SSatish Balay h1 = HASH(size,key,tmp); 4030bdbc534SSatish Balay 404c2760754SSatish Balay 405c2760754SSatish Balay idx = h1; 4062515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 407187ce0cbSSatish Balay insert_ct++; 408187ce0cbSSatish Balay total_ct++; 409187ce0cbSSatish Balay if (HT[idx] != key) { 410187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 411187ce0cbSSatish Balay if (idx == size) { 412187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 413f23aa3ddSBarry Smith if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 414187ce0cbSSatish Balay } 415187ce0cbSSatish Balay } 416187ce0cbSSatish Balay #else 417c2760754SSatish Balay if (HT[idx] != key) { 418c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 419c2760754SSatish Balay if (idx == size) { 420c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 421f23aa3ddSBarry Smith if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 422c2760754SSatish Balay } 423c2760754SSatish Balay } 424187ce0cbSSatish Balay #endif 425c2760754SSatish Balay /* A HASH table entry is found, so insert the values at the correct address */ 426c2760754SSatish Balay if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value; 427c2760754SSatish Balay else *(HD[idx]+ (col % bs)*bs + (row % bs)) = value; 4280bdbc534SSatish Balay } 42926fbe8dcSKarl Rupp } else if (!baij->donotstash) { 430ff2fd236SBarry Smith if (roworiented) { 431b400d20cSBarry Smith ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 432ff2fd236SBarry Smith } else { 433b400d20cSBarry Smith ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 4340bdbc534SSatish Balay } 4350bdbc534SSatish Balay } 4360bdbc534SSatish Balay } 4372515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 438187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 439187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 440187ce0cbSSatish Balay #endif 4410bdbc534SSatish Balay PetscFunctionReturn(0); 4420bdbc534SSatish Balay } 4430bdbc534SSatish Balay 4444a2ae208SSatish Balay #undef __FUNCT__ 44597e5c40aSBarry Smith #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT" 44697e5c40aSBarry Smith PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 4470bdbc534SSatish Balay { 4480bdbc534SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 449ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 450dfbe8321SBarry Smith PetscErrorCode ierr; 451b24ad042SBarry Smith PetscInt i,j,ii,jj,row,col; 452899cda47SBarry Smith PetscInt rstart=baij->rstartbs; 453d0f46423SBarry Smith PetscInt rend =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2; 454b24ad042SBarry Smith PetscInt h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs; 455329f5518SBarry Smith PetscReal tmp; 4563eda8832SBarry Smith MatScalar **HD = baij->hd,*baij_a; 457dd6ea824SBarry Smith const PetscScalar *v_t,*value; 4582515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 459b24ad042SBarry Smith PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct; 4604a15367fSSatish Balay #endif 4610bdbc534SSatish Balay 462d0a41580SSatish Balay PetscFunctionBegin; 46326fbe8dcSKarl Rupp if (roworiented) stepval = (n-1)*bs; 46426fbe8dcSKarl Rupp else stepval = (m-1)*bs; 46526fbe8dcSKarl Rupp 4660bdbc534SSatish Balay for (i=0; i<m; i++) { 4672515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 468e32f2f54SBarry Smith if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]); 469e32f2f54SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1); 4700bdbc534SSatish Balay #endif 4710bdbc534SSatish Balay row = im[i]; 472ab715e2cSSatish Balay v_t = v + i*nbs2; 473c2760754SSatish Balay if (row >= rstart && row < rend) { 4740bdbc534SSatish Balay for (j=0; j<n; j++) { 4750bdbc534SSatish Balay col = in[j]; 4760bdbc534SSatish Balay 4770bdbc534SSatish Balay /* Look up into the Hash Table */ 478c2760754SSatish Balay key = row*Nbs+col+1; 479c2760754SSatish Balay h1 = HASH(size,key,tmp); 4800bdbc534SSatish Balay 481c2760754SSatish Balay idx = h1; 4822515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 483187ce0cbSSatish Balay total_ct++; 484187ce0cbSSatish Balay insert_ct++; 485187ce0cbSSatish Balay if (HT[idx] != key) { 486187ce0cbSSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ; 487187ce0cbSSatish Balay if (idx == size) { 488187ce0cbSSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ; 489f23aa3ddSBarry Smith if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 490187ce0cbSSatish Balay } 491187ce0cbSSatish Balay } 492187ce0cbSSatish Balay #else 493c2760754SSatish Balay if (HT[idx] != key) { 494c2760754SSatish Balay for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ; 495c2760754SSatish Balay if (idx == size) { 496c2760754SSatish Balay for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ; 497f23aa3ddSBarry Smith if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col); 498c2760754SSatish Balay } 499c2760754SSatish Balay } 500187ce0cbSSatish Balay #endif 501c2760754SSatish Balay baij_a = HD[idx]; 5020bdbc534SSatish Balay if (roworiented) { 503c2760754SSatish Balay /*value = v + i*(stepval+bs)*bs + j*bs;*/ 504187ce0cbSSatish Balay /* value = v + (i*(stepval+bs)+j)*bs; */ 505187ce0cbSSatish Balay value = v_t; 506187ce0cbSSatish Balay v_t += bs; 507fef45726SSatish Balay if (addv == ADD_VALUES) { 508c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 509c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 510fef45726SSatish Balay baij_a[jj] += *value++; 511b4cc0f5aSSatish Balay } 512b4cc0f5aSSatish Balay } 513fef45726SSatish Balay } else { 514c2760754SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval) { 515c2760754SSatish Balay for (jj=ii; jj<bs2; jj+=bs) { 516fef45726SSatish Balay baij_a[jj] = *value++; 517fef45726SSatish Balay } 518fef45726SSatish Balay } 519fef45726SSatish Balay } 5200bdbc534SSatish Balay } else { 5210bdbc534SSatish Balay value = v + j*(stepval+bs)*bs + i*bs; 522fef45726SSatish Balay if (addv == ADD_VALUES) { 523b4cc0f5aSSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 5240bdbc534SSatish Balay for (jj=0; jj<bs; jj++) { 525fef45726SSatish Balay baij_a[jj] += *value++; 526fef45726SSatish Balay } 527fef45726SSatish Balay } 528fef45726SSatish Balay } else { 529fef45726SSatish Balay for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) { 530fef45726SSatish Balay for (jj=0; jj<bs; jj++) { 531fef45726SSatish Balay baij_a[jj] = *value++; 532fef45726SSatish Balay } 533b4cc0f5aSSatish Balay } 5340bdbc534SSatish Balay } 5350bdbc534SSatish Balay } 5360bdbc534SSatish Balay } 5370bdbc534SSatish Balay } else { 5380bdbc534SSatish Balay if (!baij->donotstash) { 5390bdbc534SSatish Balay if (roworiented) { 5408798bf22SSatish Balay ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 5410bdbc534SSatish Balay } else { 5428798bf22SSatish Balay ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 5430bdbc534SSatish Balay } 5440bdbc534SSatish Balay } 5450bdbc534SSatish Balay } 5460bdbc534SSatish Balay } 5472515c552SBarry Smith #if defined(PETSC_USE_DEBUG) 548187ce0cbSSatish Balay baij->ht_total_ct = total_ct; 549187ce0cbSSatish Balay baij->ht_insert_ct = insert_ct; 550187ce0cbSSatish Balay #endif 5510bdbc534SSatish Balay PetscFunctionReturn(0); 5520bdbc534SSatish Balay } 553133cdb44SSatish Balay 5544a2ae208SSatish Balay #undef __FUNCT__ 5554a2ae208SSatish Balay #define __FUNCT__ "MatGetValues_MPIBAIJ" 556b24ad042SBarry Smith PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 557d6de1c52SSatish Balay { 558d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 5596849ba73SBarry Smith PetscErrorCode ierr; 560d0f46423SBarry Smith PetscInt bs = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend; 561d0f46423SBarry Smith PetscInt bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data; 562d6de1c52SSatish Balay 563133cdb44SSatish Balay PetscFunctionBegin; 564d6de1c52SSatish Balay for (i=0; i<m; i++) { 565e32f2f54SBarry Smith if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 566e32f2f54SBarry Smith if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 567d6de1c52SSatish Balay if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 568d6de1c52SSatish Balay row = idxm[i] - bsrstart; 569d6de1c52SSatish Balay for (j=0; j<n; j++) { 570e32f2f54SBarry Smith if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 571e32f2f54SBarry Smith if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 572d6de1c52SSatish Balay if (idxn[j] >= bscstart && idxn[j] < bscend) { 573d6de1c52SSatish Balay col = idxn[j] - bscstart; 57498dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 575d64ed03dSBarry Smith } else { 576905e6a2fSBarry Smith if (!baij->colmap) { 577ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 578905e6a2fSBarry Smith } 579aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 5800f5bd95cSBarry Smith ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr); 581fa46199cSSatish Balay data--; 58248e59246SSatish Balay #else 58348e59246SSatish Balay data = baij->colmap[idxn[j]/bs]-1; 58448e59246SSatish Balay #endif 58548e59246SSatish Balay if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0; 586d9d09a02SSatish Balay else { 58748e59246SSatish Balay col = data + idxn[j]%bs; 58898dd23e9SBarry Smith ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 589d6de1c52SSatish Balay } 590d6de1c52SSatish Balay } 591d6de1c52SSatish Balay } 592f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 593d6de1c52SSatish Balay } 5943a40ed3dSBarry Smith PetscFunctionReturn(0); 595d6de1c52SSatish Balay } 596d6de1c52SSatish Balay 5974a2ae208SSatish Balay #undef __FUNCT__ 5984a2ae208SSatish Balay #define __FUNCT__ "MatNorm_MPIBAIJ" 599dfbe8321SBarry Smith PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm) 600d6de1c52SSatish Balay { 601d6de1c52SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 602d6de1c52SSatish Balay Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data; 603dfbe8321SBarry Smith PetscErrorCode ierr; 604d0f46423SBarry Smith PetscInt i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col; 605329f5518SBarry Smith PetscReal sum = 0.0; 6063eda8832SBarry Smith MatScalar *v; 607d6de1c52SSatish Balay 608d64ed03dSBarry Smith PetscFunctionBegin; 609d6de1c52SSatish Balay if (baij->size == 1) { 610064f8208SBarry Smith ierr = MatNorm(baij->A,type,nrm);CHKERRQ(ierr); 611d6de1c52SSatish Balay } else { 612d6de1c52SSatish Balay if (type == NORM_FROBENIUS) { 613d6de1c52SSatish Balay v = amat->a; 6148a62d963SHong Zhang nz = amat->nz*bs2; 6158a62d963SHong Zhang for (i=0; i<nz; i++) { 616329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 617d6de1c52SSatish Balay } 618d6de1c52SSatish Balay v = bmat->a; 6198a62d963SHong Zhang nz = bmat->nz*bs2; 6208a62d963SHong Zhang for (i=0; i<nz; i++) { 621329f5518SBarry Smith sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 622d6de1c52SSatish Balay } 623ce94432eSBarry Smith ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6248f1a2a5eSBarry Smith *nrm = PetscSqrtReal(*nrm); 6258a62d963SHong Zhang } else if (type == NORM_1) { /* max column sum */ 6268a62d963SHong Zhang PetscReal *tmp,*tmp2; 627899cda47SBarry Smith PetscInt *jj,*garray=baij->garray,cstart=baij->rstartbs; 628dcca6d9dSJed Brown ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr); 629d0f46423SBarry Smith ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr); 6308a62d963SHong Zhang v = amat->a; jj = amat->j; 6318a62d963SHong Zhang for (i=0; i<amat->nz; i++) { 6328a62d963SHong Zhang for (j=0; j<bs; j++) { 6338a62d963SHong Zhang col = bs*(cstart + *jj) + j; /* column index */ 6348a62d963SHong Zhang for (row=0; row<bs; row++) { 6358a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 6368a62d963SHong Zhang } 6378a62d963SHong Zhang } 6388a62d963SHong Zhang jj++; 6398a62d963SHong Zhang } 6408a62d963SHong Zhang v = bmat->a; jj = bmat->j; 6418a62d963SHong Zhang for (i=0; i<bmat->nz; i++) { 6428a62d963SHong Zhang for (j=0; j<bs; j++) { 6438a62d963SHong Zhang col = bs*garray[*jj] + j; 6448a62d963SHong Zhang for (row=0; row<bs; row++) { 6458a62d963SHong Zhang tmp[col] += PetscAbsScalar(*v); v++; 6468a62d963SHong Zhang } 6478a62d963SHong Zhang } 6488a62d963SHong Zhang jj++; 6498a62d963SHong Zhang } 650ce94432eSBarry Smith ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6518a62d963SHong Zhang *nrm = 0.0; 652d0f46423SBarry Smith for (j=0; j<mat->cmap->N; j++) { 6538a62d963SHong Zhang if (tmp2[j] > *nrm) *nrm = tmp2[j]; 6548a62d963SHong Zhang } 655fca92195SBarry Smith ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 6568a62d963SHong Zhang } else if (type == NORM_INFINITY) { /* max row sum */ 657577dd1f9SKris Buschelman PetscReal *sums; 658785e854fSJed Brown ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr); 6598a62d963SHong Zhang sum = 0.0; 6608a62d963SHong Zhang for (j=0; j<amat->mbs; j++) { 6618a62d963SHong Zhang for (row=0; row<bs; row++) sums[row] = 0.0; 6628a62d963SHong Zhang v = amat->a + bs2*amat->i[j]; 6638a62d963SHong Zhang nz = amat->i[j+1]-amat->i[j]; 6648a62d963SHong Zhang for (i=0; i<nz; i++) { 6658a62d963SHong Zhang for (col=0; col<bs; col++) { 6668a62d963SHong Zhang for (row=0; row<bs; row++) { 6678a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 6688a62d963SHong Zhang } 6698a62d963SHong Zhang } 6708a62d963SHong Zhang } 6718a62d963SHong Zhang v = bmat->a + bs2*bmat->i[j]; 6728a62d963SHong Zhang nz = bmat->i[j+1]-bmat->i[j]; 6738a62d963SHong Zhang for (i=0; i<nz; i++) { 6748a62d963SHong Zhang for (col=0; col<bs; col++) { 6758a62d963SHong Zhang for (row=0; row<bs; row++) { 6768a62d963SHong Zhang sums[row] += PetscAbsScalar(*v); v++; 6778a62d963SHong Zhang } 6788a62d963SHong Zhang } 6798a62d963SHong Zhang } 6808a62d963SHong Zhang for (row=0; row<bs; row++) { 6818a62d963SHong Zhang if (sums[row] > sum) sum = sums[row]; 6828a62d963SHong Zhang } 6838a62d963SHong Zhang } 684ce94432eSBarry Smith ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 685577dd1f9SKris Buschelman ierr = PetscFree(sums);CHKERRQ(ierr); 686ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet"); 687d64ed03dSBarry Smith } 6883a40ed3dSBarry Smith PetscFunctionReturn(0); 689d6de1c52SSatish Balay } 69057b952d6SSatish Balay 691fef45726SSatish Balay /* 692fef45726SSatish Balay Creates the hash table, and sets the table 693fef45726SSatish Balay This table is created only once. 694fef45726SSatish Balay If new entried need to be added to the matrix 695fef45726SSatish Balay then the hash table has to be destroyed and 696fef45726SSatish Balay recreated. 697fef45726SSatish Balay */ 6984a2ae208SSatish Balay #undef __FUNCT__ 6994a2ae208SSatish Balay #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private" 700dfbe8321SBarry Smith PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor) 701596b8d2eSBarry Smith { 702596b8d2eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 703596b8d2eSBarry Smith Mat A = baij->A,B=baij->B; 704596b8d2eSBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data; 705b24ad042SBarry Smith PetscInt i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j; 7066849ba73SBarry Smith PetscErrorCode ierr; 707fca92195SBarry Smith PetscInt ht_size,bs2=baij->bs2,rstart=baij->rstartbs; 708899cda47SBarry Smith PetscInt cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs; 709b24ad042SBarry Smith PetscInt *HT,key; 7103eda8832SBarry Smith MatScalar **HD; 711329f5518SBarry Smith PetscReal tmp; 7126cf91177SBarry Smith #if defined(PETSC_USE_INFO) 713b24ad042SBarry Smith PetscInt ct=0,max=0; 7144a15367fSSatish Balay #endif 715fef45726SSatish Balay 716d64ed03dSBarry Smith PetscFunctionBegin; 717fca92195SBarry Smith if (baij->ht) PetscFunctionReturn(0); 718fef45726SSatish Balay 719fca92195SBarry Smith baij->ht_size = (PetscInt)(factor*nz); 720fca92195SBarry Smith ht_size = baij->ht_size; 7210bdbc534SSatish Balay 722fef45726SSatish Balay /* Allocate Memory for Hash Table */ 7231795a4d1SJed Brown ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr); 724b9e4cc15SSatish Balay HD = baij->hd; 725a07cd24cSSatish Balay HT = baij->ht; 726b9e4cc15SSatish Balay 727596b8d2eSBarry Smith /* Loop Over A */ 7280bdbc534SSatish Balay for (i=0; i<a->mbs; i++) { 729596b8d2eSBarry Smith for (j=ai[i]; j<ai[i+1]; j++) { 7300bdbc534SSatish Balay row = i+rstart; 7310bdbc534SSatish Balay col = aj[j]+cstart; 732596b8d2eSBarry Smith 733187ce0cbSSatish Balay key = row*Nbs + col + 1; 734fca92195SBarry Smith h1 = HASH(ht_size,key,tmp); 735fca92195SBarry Smith for (k=0; k<ht_size; k++) { 736fca92195SBarry Smith if (!HT[(h1+k)%ht_size]) { 737fca92195SBarry Smith HT[(h1+k)%ht_size] = key; 738fca92195SBarry Smith HD[(h1+k)%ht_size] = a->a + j*bs2; 739596b8d2eSBarry Smith break; 7406cf91177SBarry Smith #if defined(PETSC_USE_INFO) 741187ce0cbSSatish Balay } else { 742187ce0cbSSatish Balay ct++; 743187ce0cbSSatish Balay #endif 744596b8d2eSBarry Smith } 745187ce0cbSSatish Balay } 7466cf91177SBarry Smith #if defined(PETSC_USE_INFO) 747187ce0cbSSatish Balay if (k> max) max = k; 748187ce0cbSSatish Balay #endif 749596b8d2eSBarry Smith } 750596b8d2eSBarry Smith } 751596b8d2eSBarry Smith /* Loop Over B */ 7520bdbc534SSatish Balay for (i=0; i<b->mbs; i++) { 753596b8d2eSBarry Smith for (j=bi[i]; j<bi[i+1]; j++) { 7540bdbc534SSatish Balay row = i+rstart; 7550bdbc534SSatish Balay col = garray[bj[j]]; 756187ce0cbSSatish Balay key = row*Nbs + col + 1; 757fca92195SBarry Smith h1 = HASH(ht_size,key,tmp); 758fca92195SBarry Smith for (k=0; k<ht_size; k++) { 759fca92195SBarry Smith if (!HT[(h1+k)%ht_size]) { 760fca92195SBarry Smith HT[(h1+k)%ht_size] = key; 761fca92195SBarry Smith HD[(h1+k)%ht_size] = b->a + j*bs2; 762596b8d2eSBarry Smith break; 7636cf91177SBarry Smith #if defined(PETSC_USE_INFO) 764187ce0cbSSatish Balay } else { 765187ce0cbSSatish Balay ct++; 766187ce0cbSSatish Balay #endif 767596b8d2eSBarry Smith } 768187ce0cbSSatish Balay } 7696cf91177SBarry Smith #if defined(PETSC_USE_INFO) 770187ce0cbSSatish Balay if (k> max) max = k; 771187ce0cbSSatish Balay #endif 772596b8d2eSBarry Smith } 773596b8d2eSBarry Smith } 774596b8d2eSBarry Smith 775596b8d2eSBarry Smith /* Print Summary */ 7766cf91177SBarry Smith #if defined(PETSC_USE_INFO) 777fca92195SBarry Smith for (i=0,j=0; i<ht_size; i++) { 77826fbe8dcSKarl Rupp if (HT[i]) j++; 779c38d4ed2SBarry Smith } 7801e2582c4SBarry Smith ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr); 781187ce0cbSSatish Balay #endif 7823a40ed3dSBarry Smith PetscFunctionReturn(0); 783596b8d2eSBarry Smith } 78457b952d6SSatish Balay 7854a2ae208SSatish Balay #undef __FUNCT__ 7864a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ" 787dfbe8321SBarry Smith PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode) 788bbb85fb3SSatish Balay { 789bbb85fb3SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 790dfbe8321SBarry Smith PetscErrorCode ierr; 791b24ad042SBarry Smith PetscInt nstash,reallocs; 792bbb85fb3SSatish Balay InsertMode addv; 793bbb85fb3SSatish Balay 794bbb85fb3SSatish Balay PetscFunctionBegin; 79526fbe8dcSKarl Rupp if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 796bbb85fb3SSatish Balay 797bbb85fb3SSatish Balay /* make sure all processors are either in INSERTMODE or ADDMODE */ 798ce94432eSBarry Smith ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 799ce94432eSBarry Smith if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added"); 800bbb85fb3SSatish Balay mat->insertmode = addv; /* in case this processor had no cache */ 801bbb85fb3SSatish Balay 802d0f46423SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 8031e2582c4SBarry Smith ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr); 8048798bf22SSatish Balay ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 8051e2582c4SBarry Smith ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 80646680499SSatish Balay ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr); 8071e2582c4SBarry Smith ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 808bbb85fb3SSatish Balay PetscFunctionReturn(0); 809bbb85fb3SSatish Balay } 810bbb85fb3SSatish Balay 8114a2ae208SSatish Balay #undef __FUNCT__ 8124a2ae208SSatish Balay #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ" 813dfbe8321SBarry Smith PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode) 814bbb85fb3SSatish Balay { 815bbb85fb3SSatish Balay Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data; 81691c97fd4SSatish Balay Mat_SeqBAIJ *a =(Mat_SeqBAIJ*)baij->A->data; 8176849ba73SBarry Smith PetscErrorCode ierr; 818b24ad042SBarry Smith PetscInt i,j,rstart,ncols,flg,bs2=baij->bs2; 819e44c0bd4SBarry Smith PetscInt *row,*col; 820ace3abfcSBarry Smith PetscBool r1,r2,r3,other_disassembled; 8213eda8832SBarry Smith MatScalar *val; 822bbb85fb3SSatish Balay InsertMode addv = mat->insertmode; 823b24ad042SBarry Smith PetscMPIInt n; 824bbb85fb3SSatish Balay 825bbb85fb3SSatish Balay PetscFunctionBegin; 8265fd66863SKarl Rupp /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 8274cb17eb5SBarry Smith if (!baij->donotstash && !mat->nooffprocentries) { 828a2d1c673SSatish Balay while (1) { 8298798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 830a2d1c673SSatish Balay if (!flg) break; 831a2d1c673SSatish Balay 832bbb85fb3SSatish Balay for (i=0; i<n;) { 833bbb85fb3SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 83426fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 83526fbe8dcSKarl Rupp if (row[j] != rstart) break; 83626fbe8dcSKarl Rupp } 837bbb85fb3SSatish Balay if (j < n) ncols = j-i; 838bbb85fb3SSatish Balay else ncols = n-i; 839bbb85fb3SSatish Balay /* Now assemble all these values with a single function call */ 84097e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr); 841bbb85fb3SSatish Balay i = j; 842bbb85fb3SSatish Balay } 843bbb85fb3SSatish Balay } 8448798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 845a2d1c673SSatish Balay /* Now process the block-stash. Since the values are stashed column-oriented, 846a2d1c673SSatish Balay set the roworiented flag to column oriented, and after MatSetValues() 847a2d1c673SSatish Balay restore the original flags */ 848a2d1c673SSatish Balay r1 = baij->roworiented; 849a2d1c673SSatish Balay r2 = a->roworiented; 85091c97fd4SSatish Balay r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented; 85126fbe8dcSKarl Rupp 8527c922b88SBarry Smith baij->roworiented = PETSC_FALSE; 8537c922b88SBarry Smith a->roworiented = PETSC_FALSE; 85426fbe8dcSKarl Rupp 85591c97fd4SSatish Balay (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 856a2d1c673SSatish Balay while (1) { 8578798bf22SSatish Balay ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 858a2d1c673SSatish Balay if (!flg) break; 859a2d1c673SSatish Balay 860a2d1c673SSatish Balay for (i=0; i<n;) { 861a2d1c673SSatish Balay /* Now identify the consecutive vals belonging to the same row */ 86226fbe8dcSKarl Rupp for (j=i,rstart=row[j]; j<n; j++) { 86326fbe8dcSKarl Rupp if (row[j] != rstart) break; 86426fbe8dcSKarl Rupp } 865a2d1c673SSatish Balay if (j < n) ncols = j-i; 866a2d1c673SSatish Balay else ncols = n-i; 86797e5c40aSBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr); 868a2d1c673SSatish Balay i = j; 869a2d1c673SSatish Balay } 870a2d1c673SSatish Balay } 8718798bf22SSatish Balay ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr); 87226fbe8dcSKarl Rupp 873a2d1c673SSatish Balay baij->roworiented = r1; 874a2d1c673SSatish Balay a->roworiented = r2; 87526fbe8dcSKarl Rupp 87691c97fd4SSatish Balay ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */ 877bbb85fb3SSatish Balay } 878bbb85fb3SSatish Balay 879bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr); 880bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr); 881bbb85fb3SSatish Balay 882bbb85fb3SSatish Balay /* determine if any processor has disassembled, if so we must 883bbb85fb3SSatish Balay also disassemble ourselfs, in order that we may reassemble. */ 884bbb85fb3SSatish Balay /* 885bbb85fb3SSatish Balay if nonzero structure of submatrix B cannot change then we know that 886bbb85fb3SSatish Balay no processor disassembled thus we can skip this stuff 887bbb85fb3SSatish Balay */ 888bbb85fb3SSatish Balay if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) { 889ce94432eSBarry Smith ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 890bbb85fb3SSatish Balay if (mat->was_assembled && !other_disassembled) { 891ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 892bbb85fb3SSatish Balay } 893bbb85fb3SSatish Balay } 894bbb85fb3SSatish Balay 895bbb85fb3SSatish Balay if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 896bbb85fb3SSatish Balay ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr); 897bbb85fb3SSatish Balay } 898bbb85fb3SSatish Balay ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr); 899bbb85fb3SSatish Balay ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr); 900bbb85fb3SSatish Balay 9016cf91177SBarry Smith #if defined(PETSC_USE_INFO) 902bbb85fb3SSatish Balay if (baij->ht && mode== MAT_FINAL_ASSEMBLY) { 9031e2582c4SBarry Smith ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr); 90426fbe8dcSKarl Rupp 905bbb85fb3SSatish Balay baij->ht_total_ct = 0; 906bbb85fb3SSatish Balay baij->ht_insert_ct = 0; 907bbb85fb3SSatish Balay } 908bbb85fb3SSatish Balay #endif 909bbb85fb3SSatish Balay if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 910bbb85fb3SSatish Balay ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr); 91126fbe8dcSKarl Rupp 912bbb85fb3SSatish Balay mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 913bbb85fb3SSatish Balay mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 914bbb85fb3SSatish Balay } 915bbb85fb3SSatish Balay 916fca92195SBarry Smith ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 91726fbe8dcSKarl Rupp 918606d414cSSatish Balay baij->rowvalues = 0; 9194f9cfa9eSBarry Smith 9204f9cfa9eSBarry Smith /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 9214f9cfa9eSBarry Smith if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 922e56f5c9eSBarry Smith PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 92309e82e2bSBarry Smith ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 924e56f5c9eSBarry Smith } 925bbb85fb3SSatish Balay PetscFunctionReturn(0); 926bbb85fb3SSatish Balay } 92757b952d6SSatish Balay 9287da1fb6eSBarry Smith extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer); 9299804daf3SBarry Smith #include <petscdraw.h> 9304a2ae208SSatish Balay #undef __FUNCT__ 9314a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket" 9326849ba73SBarry Smith static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 93357b952d6SSatish Balay { 93457b952d6SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 935dfbe8321SBarry Smith PetscErrorCode ierr; 9367da1fb6eSBarry Smith PetscMPIInt rank = baij->rank; 937d0f46423SBarry Smith PetscInt bs = mat->rmap->bs; 938ace3abfcSBarry Smith PetscBool iascii,isdraw; 939b0a32e0cSBarry Smith PetscViewer sviewer; 940f3ef73ceSBarry Smith PetscViewerFormat format; 94157b952d6SSatish Balay 942d64ed03dSBarry Smith PetscFunctionBegin; 943251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 944251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 94532077d6dSBarry Smith if (iascii) { 946b0a32e0cSBarry Smith ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 947456192e2SBarry Smith if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 9484e220ebcSLois Curfman McInnes MatInfo info; 949ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 950d41123aaSBarry Smith ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 9517b23a99aSBarry Smith ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr); 95277431f27SBarry Smith ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n", 95316608c43SJed Brown rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr); 954d132466eSBarry Smith ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 955e6dd01d4SJed Brown ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 956d132466eSBarry Smith ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 957e6dd01d4SJed Brown ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 958b0a32e0cSBarry Smith ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 9597b23a99aSBarry Smith ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr); 96007d81ca4SBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 96157b952d6SSatish Balay ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr); 9623a40ed3dSBarry Smith PetscFunctionReturn(0); 963fb9695e5SSatish Balay } else if (format == PETSC_VIEWER_ASCII_INFO) { 96477431f27SBarry Smith ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 9653a40ed3dSBarry Smith PetscFunctionReturn(0); 96604929863SHong Zhang } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 96704929863SHong Zhang PetscFunctionReturn(0); 96857b952d6SSatish Balay } 96957b952d6SSatish Balay } 97057b952d6SSatish Balay 9710f5bd95cSBarry Smith if (isdraw) { 972b0a32e0cSBarry Smith PetscDraw draw; 973ace3abfcSBarry Smith PetscBool isnull; 974b0a32e0cSBarry Smith ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 975b0a32e0cSBarry Smith ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 97657b952d6SSatish Balay } 97757b952d6SSatish Balay 9787da1fb6eSBarry Smith { 97957b952d6SSatish Balay /* assemble the entire matrix onto first processor. */ 98057b952d6SSatish Balay Mat A; 98157b952d6SSatish Balay Mat_SeqBAIJ *Aloc; 982d0f46423SBarry Smith PetscInt M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs; 9833eda8832SBarry Smith MatScalar *a; 9843e219373SBarry Smith const char *matname; 98557b952d6SSatish Balay 986f204ca49SKris Buschelman /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 987f204ca49SKris Buschelman /* Perhaps this should be the type of mat? */ 988ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 98957b952d6SSatish Balay if (!rank) { 990f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 991d64ed03dSBarry Smith } else { 992f69a0ea3SMatthew Knepley ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 99357b952d6SSatish Balay } 994f204ca49SKris Buschelman ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr); 9950298fd71SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr); 9962b82e772SSatish Balay ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 9973bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 99857b952d6SSatish Balay 99957b952d6SSatish Balay /* copy over the A part */ 100057b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 100157b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1002785e854fSJed Brown ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 100357b952d6SSatish Balay 100457b952d6SSatish Balay for (i=0; i<mbs; i++) { 1005899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 100626fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 100757b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1008899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 100957b952d6SSatish Balay for (k=0; k<bs; k++) { 101097e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1011cee3aa6bSSatish Balay col++; a += bs; 101257b952d6SSatish Balay } 101357b952d6SSatish Balay } 101457b952d6SSatish Balay } 101557b952d6SSatish Balay /* copy over the B part */ 101657b952d6SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 101757b952d6SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 101857b952d6SSatish Balay for (i=0; i<mbs; i++) { 1019899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 102026fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 102157b952d6SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 102257b952d6SSatish Balay col = baij->garray[aj[j]]*bs; 102357b952d6SSatish Balay for (k=0; k<bs; k++) { 102497e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr); 1025cee3aa6bSSatish Balay col++; a += bs; 102657b952d6SSatish Balay } 102757b952d6SSatish Balay } 102857b952d6SSatish Balay } 1029606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 10306d4a8577SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 10316d4a8577SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 103255843e3eSBarry Smith /* 103355843e3eSBarry Smith Everyone has to call to draw the matrix since the graphics waits are 1034b0a32e0cSBarry Smith synchronized across all processors that share the PetscDraw object 103555843e3eSBarry Smith */ 1036b0a32e0cSBarry Smith ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr); 1037ade3a672SBarry Smith ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr); 10383e219373SBarry Smith if (!rank) { 1039ade3a672SBarry Smith ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr); 10407da1fb6eSBarry Smith ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 104157b952d6SSatish Balay } 1042b0a32e0cSBarry Smith ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr); 10436bf464f9SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 104457b952d6SSatish Balay } 10453a40ed3dSBarry Smith PetscFunctionReturn(0); 104657b952d6SSatish Balay } 104757b952d6SSatish Balay 10484a2ae208SSatish Balay #undef __FUNCT__ 1049660746e0SBarry Smith #define __FUNCT__ "MatView_MPIBAIJ_Binary" 1050660746e0SBarry Smith static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer) 1051660746e0SBarry Smith { 1052660746e0SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)mat->data; 1053660746e0SBarry Smith Mat_SeqBAIJ *A = (Mat_SeqBAIJ*)a->A->data; 1054660746e0SBarry Smith Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)a->B->data; 1055660746e0SBarry Smith PetscErrorCode ierr; 10565f48b12bSBarry Smith PetscInt i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen; 1057e96a6426SSatish Balay PetscInt *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll; 1058660746e0SBarry Smith int fd; 1059660746e0SBarry Smith PetscScalar *column_values; 1060660746e0SBarry Smith FILE *file; 1061660746e0SBarry Smith PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1062638eb2ebSBarry Smith PetscInt message_count,flowcontrolcount; 1063660746e0SBarry Smith 1064660746e0SBarry Smith PetscFunctionBegin; 1065ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1066ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1067660746e0SBarry Smith nz = bs2*(A->nz + B->nz); 1068660746e0SBarry Smith rlen = mat->rmap->n; 10695872f025SBarry Smith ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1070660746e0SBarry Smith if (!rank) { 1071660746e0SBarry Smith header[0] = MAT_FILE_CLASSID; 1072660746e0SBarry Smith header[1] = mat->rmap->N; 1073660746e0SBarry Smith header[2] = mat->cmap->N; 107426fbe8dcSKarl Rupp 1075ce94432eSBarry Smith ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1076660746e0SBarry Smith ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1077660746e0SBarry Smith /* get largest number of rows any processor has */ 1078660746e0SBarry Smith range = mat->rmap->range; 1079660746e0SBarry Smith for (i=1; i<size; i++) { 1080660746e0SBarry Smith rlen = PetscMax(rlen,range[i+1] - range[i]); 1081660746e0SBarry Smith } 1082660746e0SBarry Smith } else { 1083ce94432eSBarry Smith ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1084660746e0SBarry Smith } 1085660746e0SBarry Smith 1086854ce69bSBarry Smith ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr); 1087660746e0SBarry Smith /* compute lengths of each row */ 1088660746e0SBarry Smith for (i=0; i<a->mbs; i++) { 1089660746e0SBarry Smith crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1090660746e0SBarry Smith } 1091660746e0SBarry Smith /* store the row lengths to the file */ 1092638eb2ebSBarry Smith ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1093660746e0SBarry Smith if (!rank) { 1094660746e0SBarry Smith MPI_Status status; 1095785e854fSJed Brown ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr); 1096660746e0SBarry Smith rlen = (range[1] - range[0])/bs; 1097660746e0SBarry Smith for (i=0; i<rlen; i++) { 1098660746e0SBarry Smith for (j=0; j<bs; j++) { 1099660746e0SBarry Smith row_lens[i*bs+j] = bs*crow_lens[i]; 1100660746e0SBarry Smith } 1101660746e0SBarry Smith } 1102660746e0SBarry Smith ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1103660746e0SBarry Smith for (i=1; i<size; i++) { 1104660746e0SBarry Smith rlen = (range[i+1] - range[i])/bs; 1105639ff905SBarry Smith ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1106ce94432eSBarry Smith ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1107660746e0SBarry Smith for (k=0; k<rlen; k++) { 1108660746e0SBarry Smith for (j=0; j<bs; j++) { 1109660746e0SBarry Smith row_lens[k*bs+j] = bs*crow_lens[k]; 1110660746e0SBarry Smith } 1111660746e0SBarry Smith } 1112660746e0SBarry Smith ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1113660746e0SBarry Smith } 1114639ff905SBarry Smith ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1115660746e0SBarry Smith ierr = PetscFree(row_lens);CHKERRQ(ierr); 1116660746e0SBarry Smith } else { 1117639ff905SBarry Smith ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1118ce94432eSBarry Smith ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1119639ff905SBarry Smith ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1120660746e0SBarry Smith } 1121660746e0SBarry Smith ierr = PetscFree(crow_lens);CHKERRQ(ierr); 1122660746e0SBarry Smith 1123660746e0SBarry Smith /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the 1124660746e0SBarry Smith information needed to make it for each row from a block row. This does require more communication but still not more than 1125660746e0SBarry Smith the communication needed for the nonzero values */ 1126660746e0SBarry Smith nzmax = nz; /* space a largest processor needs */ 1127ce94432eSBarry Smith ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1128785e854fSJed Brown ierr = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr); 1129660746e0SBarry Smith cnt = 0; 1130660746e0SBarry Smith for (i=0; i<a->mbs; i++) { 1131660746e0SBarry Smith pcnt = cnt; 1132660746e0SBarry Smith for (j=B->i[i]; j<B->i[i+1]; j++) { 1133660746e0SBarry Smith if ((col = garray[B->j[j]]) > cstart) break; 1134660746e0SBarry Smith for (l=0; l<bs; l++) { 1135660746e0SBarry Smith column_indices[cnt++] = bs*col+l; 1136660746e0SBarry Smith } 1137660746e0SBarry Smith } 1138660746e0SBarry Smith for (k=A->i[i]; k<A->i[i+1]; k++) { 1139660746e0SBarry Smith for (l=0; l<bs; l++) { 1140660746e0SBarry Smith column_indices[cnt++] = bs*(A->j[k] + cstart)+l; 1141660746e0SBarry Smith } 1142660746e0SBarry Smith } 1143660746e0SBarry Smith for (; j<B->i[i+1]; j++) { 1144660746e0SBarry Smith for (l=0; l<bs; l++) { 1145660746e0SBarry Smith column_indices[cnt++] = bs*garray[B->j[j]]+l; 1146660746e0SBarry Smith } 1147660746e0SBarry Smith } 1148660746e0SBarry Smith len = cnt - pcnt; 1149660746e0SBarry Smith for (k=1; k<bs; k++) { 1150660746e0SBarry Smith ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr); 1151660746e0SBarry Smith cnt += len; 1152660746e0SBarry Smith } 1153660746e0SBarry Smith } 1154660746e0SBarry Smith if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1155660746e0SBarry Smith 1156660746e0SBarry Smith /* store the columns to the file */ 1157638eb2ebSBarry Smith ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1158660746e0SBarry Smith if (!rank) { 1159660746e0SBarry Smith MPI_Status status; 1160660746e0SBarry Smith ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1161660746e0SBarry Smith for (i=1; i<size; i++) { 1162639ff905SBarry Smith ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1163ce94432eSBarry Smith ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1164ce94432eSBarry Smith ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1165660746e0SBarry Smith ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1166660746e0SBarry Smith } 1167639ff905SBarry Smith ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1168660746e0SBarry Smith } else { 1169639ff905SBarry Smith ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1170ce94432eSBarry Smith ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1171ce94432eSBarry Smith ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1172639ff905SBarry Smith ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1173660746e0SBarry Smith } 1174660746e0SBarry Smith ierr = PetscFree(column_indices);CHKERRQ(ierr); 1175660746e0SBarry Smith 1176660746e0SBarry Smith /* load up the numerical values */ 1177785e854fSJed Brown ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr); 1178660746e0SBarry Smith cnt = 0; 1179660746e0SBarry Smith for (i=0; i<a->mbs; i++) { 1180660746e0SBarry Smith rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]); 1181660746e0SBarry Smith for (j=B->i[i]; j<B->i[i+1]; j++) { 1182660746e0SBarry Smith if (garray[B->j[j]] > cstart) break; 1183660746e0SBarry Smith for (l=0; l<bs; l++) { 1184660746e0SBarry Smith for (ll=0; ll<bs; ll++) { 1185660746e0SBarry Smith column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1186660746e0SBarry Smith } 1187660746e0SBarry Smith } 1188660746e0SBarry Smith cnt += bs; 1189660746e0SBarry Smith } 1190660746e0SBarry Smith for (k=A->i[i]; k<A->i[i+1]; k++) { 1191660746e0SBarry Smith for (l=0; l<bs; l++) { 1192660746e0SBarry Smith for (ll=0; ll<bs; ll++) { 1193660746e0SBarry Smith column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll]; 1194660746e0SBarry Smith } 1195660746e0SBarry Smith } 1196660746e0SBarry Smith cnt += bs; 1197660746e0SBarry Smith } 1198660746e0SBarry Smith for (; j<B->i[i+1]; j++) { 1199660746e0SBarry Smith for (l=0; l<bs; l++) { 1200660746e0SBarry Smith for (ll=0; ll<bs; ll++) { 1201660746e0SBarry Smith column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll]; 1202660746e0SBarry Smith } 1203660746e0SBarry Smith } 1204660746e0SBarry Smith cnt += bs; 1205660746e0SBarry Smith } 1206660746e0SBarry Smith cnt += (bs-1)*rlen; 1207660746e0SBarry Smith } 1208660746e0SBarry Smith if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1209660746e0SBarry Smith 1210660746e0SBarry Smith /* store the column values to the file */ 1211638eb2ebSBarry Smith ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1212660746e0SBarry Smith if (!rank) { 1213660746e0SBarry Smith MPI_Status status; 1214660746e0SBarry Smith ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1215660746e0SBarry Smith for (i=1; i<size; i++) { 1216639ff905SBarry Smith ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1217ce94432eSBarry Smith ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1218ce94432eSBarry Smith ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1219660746e0SBarry Smith ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1220660746e0SBarry Smith } 1221639ff905SBarry Smith ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1222660746e0SBarry Smith } else { 1223639ff905SBarry Smith ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1224ce94432eSBarry Smith ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225ce94432eSBarry Smith ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226639ff905SBarry Smith ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1227660746e0SBarry Smith } 1228660746e0SBarry Smith ierr = PetscFree(column_values);CHKERRQ(ierr); 1229660746e0SBarry Smith 1230660746e0SBarry Smith ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1231660746e0SBarry Smith if (file) { 1232660746e0SBarry Smith fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs); 1233660746e0SBarry Smith } 1234660746e0SBarry Smith PetscFunctionReturn(0); 1235660746e0SBarry Smith } 1236660746e0SBarry Smith 1237660746e0SBarry Smith #undef __FUNCT__ 12384a2ae208SSatish Balay #define __FUNCT__ "MatView_MPIBAIJ" 1239dfbe8321SBarry Smith PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer) 124057b952d6SSatish Balay { 1241dfbe8321SBarry Smith PetscErrorCode ierr; 1242ace3abfcSBarry Smith PetscBool iascii,isdraw,issocket,isbinary; 124357b952d6SSatish Balay 1244d64ed03dSBarry Smith PetscFunctionBegin; 1245251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1246251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1247251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1248251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1249660746e0SBarry Smith if (iascii || isdraw || issocket) { 12507b2a1423SBarry Smith ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1251660746e0SBarry Smith } else if (isbinary) { 1252660746e0SBarry Smith ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr); 125357b952d6SSatish Balay } 12543a40ed3dSBarry Smith PetscFunctionReturn(0); 125557b952d6SSatish Balay } 125657b952d6SSatish Balay 12574a2ae208SSatish Balay #undef __FUNCT__ 12584a2ae208SSatish Balay #define __FUNCT__ "MatDestroy_MPIBAIJ" 1259dfbe8321SBarry Smith PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 126079bdfe76SSatish Balay { 126179bdfe76SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1262dfbe8321SBarry Smith PetscErrorCode ierr; 126379bdfe76SSatish Balay 1264d64ed03dSBarry Smith PetscFunctionBegin; 1265aa482453SBarry Smith #if defined(PETSC_USE_LOG) 1266d0f46423SBarry Smith PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N); 126779bdfe76SSatish Balay #endif 12688798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 12698798bf22SSatish Balay ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr); 12706bf464f9SBarry Smith ierr = MatDestroy(&baij->A);CHKERRQ(ierr); 12716bf464f9SBarry Smith ierr = MatDestroy(&baij->B);CHKERRQ(ierr); 1272aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 12736bc0bbbfSBarry Smith ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 127448e59246SSatish Balay #else 127505b42c5fSBarry Smith ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 127648e59246SSatish Balay #endif 127705b42c5fSBarry Smith ierr = PetscFree(baij->garray);CHKERRQ(ierr); 12786bf464f9SBarry Smith ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 12796bf464f9SBarry Smith ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 1280fca92195SBarry Smith ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr); 128105b42c5fSBarry Smith ierr = PetscFree(baij->barray);CHKERRQ(ierr); 1282fca92195SBarry Smith ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr); 1283899cda47SBarry Smith ierr = PetscFree(baij->rangebs);CHKERRQ(ierr); 1284bf0cc555SLisandro Dalcin ierr = PetscFree(mat->data);CHKERRQ(ierr); 1285901853e0SKris Buschelman 1286dbd8c25aSHong Zhang ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1287bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1288bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1289bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr); 1290bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1291bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1292bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1293bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr); 1294bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1295bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr); 12963a40ed3dSBarry Smith PetscFunctionReturn(0); 129779bdfe76SSatish Balay } 129879bdfe76SSatish Balay 12994a2ae208SSatish Balay #undef __FUNCT__ 13004a2ae208SSatish Balay #define __FUNCT__ "MatMult_MPIBAIJ" 1301dfbe8321SBarry Smith PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy) 1302cee3aa6bSSatish Balay { 1303cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1304dfbe8321SBarry Smith PetscErrorCode ierr; 1305b24ad042SBarry Smith PetscInt nt; 1306cee3aa6bSSatish Balay 1307d64ed03dSBarry Smith PetscFunctionBegin; 1308e1311b90SBarry Smith ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1309e7e72b3dSBarry Smith if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx"); 1310e1311b90SBarry Smith ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr); 1311e7e72b3dSBarry Smith if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy"); 1312ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1313f830108cSBarry Smith ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1314ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1315f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 13163a40ed3dSBarry Smith PetscFunctionReturn(0); 1317cee3aa6bSSatish Balay } 1318cee3aa6bSSatish Balay 13194a2ae208SSatish Balay #undef __FUNCT__ 13204a2ae208SSatish Balay #define __FUNCT__ "MatMultAdd_MPIBAIJ" 1321dfbe8321SBarry Smith PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1322cee3aa6bSSatish Balay { 1323cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1324dfbe8321SBarry Smith PetscErrorCode ierr; 1325d64ed03dSBarry Smith 1326d64ed03dSBarry Smith PetscFunctionBegin; 1327ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1328f830108cSBarry Smith ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1329ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1330f830108cSBarry Smith ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 13313a40ed3dSBarry Smith PetscFunctionReturn(0); 1332cee3aa6bSSatish Balay } 1333cee3aa6bSSatish Balay 13344a2ae208SSatish Balay #undef __FUNCT__ 13354a2ae208SSatish Balay #define __FUNCT__ "MatMultTranspose_MPIBAIJ" 1336dfbe8321SBarry Smith PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy) 1337cee3aa6bSSatish Balay { 1338cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1339dfbe8321SBarry Smith PetscErrorCode ierr; 1340ace3abfcSBarry Smith PetscBool merged; 1341cee3aa6bSSatish Balay 1342d64ed03dSBarry Smith PetscFunctionBegin; 1343a5ff213dSBarry Smith ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1344cee3aa6bSSatish Balay /* do nondiagonal part */ 13457c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1346a5ff213dSBarry Smith if (!merged) { 1347cee3aa6bSSatish Balay /* send it on its way */ 1348ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1349cee3aa6bSSatish Balay /* do local part */ 13507c922b88SBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1351cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1352a5ff213dSBarry Smith /* inserted in yy until the next line */ 1353ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1354a5ff213dSBarry Smith } else { 1355a5ff213dSBarry Smith /* do local part */ 1356a5ff213dSBarry Smith ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1357a5ff213dSBarry Smith /* send it on its way */ 1358ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1359a5ff213dSBarry Smith /* values actually were received in the Begin() but we need to call this nop */ 1360ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1361a5ff213dSBarry Smith } 13623a40ed3dSBarry Smith PetscFunctionReturn(0); 1363cee3aa6bSSatish Balay } 1364cee3aa6bSSatish Balay 13654a2ae208SSatish Balay #undef __FUNCT__ 13664a2ae208SSatish Balay #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ" 1367dfbe8321SBarry Smith PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1368cee3aa6bSSatish Balay { 1369cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1370dfbe8321SBarry Smith PetscErrorCode ierr; 1371cee3aa6bSSatish Balay 1372d64ed03dSBarry Smith PetscFunctionBegin; 1373cee3aa6bSSatish Balay /* do nondiagonal part */ 13747c922b88SBarry Smith ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1375cee3aa6bSSatish Balay /* send it on its way */ 1376ca9f406cSSatish Balay ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1377cee3aa6bSSatish Balay /* do local part */ 13787c922b88SBarry Smith ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1379cee3aa6bSSatish Balay /* receive remote parts: note this assumes the values are not actually */ 1380cee3aa6bSSatish Balay /* inserted in yy until the next line, which is true for my implementation*/ 1381cee3aa6bSSatish Balay /* but is not perhaps always true. */ 1382ca9f406cSSatish Balay ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 13833a40ed3dSBarry Smith PetscFunctionReturn(0); 1384cee3aa6bSSatish Balay } 1385cee3aa6bSSatish Balay 1386cee3aa6bSSatish Balay /* 1387cee3aa6bSSatish Balay This only works correctly for square matrices where the subblock A->A is the 1388cee3aa6bSSatish Balay diagonal block 1389cee3aa6bSSatish Balay */ 13904a2ae208SSatish Balay #undef __FUNCT__ 13914a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonal_MPIBAIJ" 1392dfbe8321SBarry Smith PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v) 1393cee3aa6bSSatish Balay { 1394cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1395dfbe8321SBarry Smith PetscErrorCode ierr; 1396d64ed03dSBarry Smith 1397d64ed03dSBarry Smith PetscFunctionBegin; 1398e32f2f54SBarry Smith if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 13993a40ed3dSBarry Smith ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 14003a40ed3dSBarry Smith PetscFunctionReturn(0); 1401cee3aa6bSSatish Balay } 1402cee3aa6bSSatish Balay 14034a2ae208SSatish Balay #undef __FUNCT__ 14044a2ae208SSatish Balay #define __FUNCT__ "MatScale_MPIBAIJ" 1405f4df32b1SMatthew Knepley PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa) 1406cee3aa6bSSatish Balay { 1407cee3aa6bSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1408dfbe8321SBarry Smith PetscErrorCode ierr; 1409d64ed03dSBarry Smith 1410d64ed03dSBarry Smith PetscFunctionBegin; 1411f4df32b1SMatthew Knepley ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1412f4df32b1SMatthew Knepley ierr = MatScale(a->B,aa);CHKERRQ(ierr); 14133a40ed3dSBarry Smith PetscFunctionReturn(0); 1414cee3aa6bSSatish Balay } 1415026e39d0SSatish Balay 14164a2ae208SSatish Balay #undef __FUNCT__ 14174a2ae208SSatish Balay #define __FUNCT__ "MatGetRow_MPIBAIJ" 1418b24ad042SBarry Smith PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1419acdf5bf4SSatish Balay { 1420acdf5bf4SSatish Balay Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 142187828ca2SBarry Smith PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 14226849ba73SBarry Smith PetscErrorCode ierr; 1423d0f46423SBarry Smith PetscInt bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB; 1424d0f46423SBarry Smith PetscInt nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend; 1425899cda47SBarry Smith PetscInt *cmap,*idx_p,cstart = mat->cstartbs; 1426acdf5bf4SSatish Balay 1427d64ed03dSBarry Smith PetscFunctionBegin; 1428e7e72b3dSBarry Smith if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows"); 1429e32f2f54SBarry Smith if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1430acdf5bf4SSatish Balay mat->getrowactive = PETSC_TRUE; 1431acdf5bf4SSatish Balay 1432acdf5bf4SSatish Balay if (!mat->rowvalues && (idx || v)) { 1433acdf5bf4SSatish Balay /* 1434acdf5bf4SSatish Balay allocate enough space to hold information from the longest row. 1435acdf5bf4SSatish Balay */ 1436acdf5bf4SSatish Balay Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data; 1437b24ad042SBarry Smith PetscInt max = 1,mbs = mat->mbs,tmp; 1438bd16c2feSSatish Balay for (i=0; i<mbs; i++) { 1439acdf5bf4SSatish Balay tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 144026fbe8dcSKarl Rupp if (max < tmp) max = tmp; 1441acdf5bf4SSatish Balay } 1442dcca6d9dSJed Brown ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr); 1443acdf5bf4SSatish Balay } 1444d9d09a02SSatish Balay lrow = row - brstart; 1445acdf5bf4SSatish Balay 1446acdf5bf4SSatish Balay pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1447acdf5bf4SSatish Balay if (!v) {pvA = 0; pvB = 0;} 1448acdf5bf4SSatish Balay if (!idx) {pcA = 0; if (!v) pcB = 0;} 1449f830108cSBarry Smith ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1450f830108cSBarry Smith ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1451acdf5bf4SSatish Balay nztot = nzA + nzB; 1452acdf5bf4SSatish Balay 1453acdf5bf4SSatish Balay cmap = mat->garray; 1454acdf5bf4SSatish Balay if (v || idx) { 1455acdf5bf4SSatish Balay if (nztot) { 1456acdf5bf4SSatish Balay /* Sort by increasing column numbers, assuming A and B already sorted */ 1457b24ad042SBarry Smith PetscInt imark = -1; 1458acdf5bf4SSatish Balay if (v) { 1459acdf5bf4SSatish Balay *v = v_p = mat->rowvalues; 1460acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 1461d9d09a02SSatish Balay if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i]; 1462acdf5bf4SSatish Balay else break; 1463acdf5bf4SSatish Balay } 1464acdf5bf4SSatish Balay imark = i; 1465acdf5bf4SSatish Balay for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1466acdf5bf4SSatish Balay for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1467acdf5bf4SSatish Balay } 1468acdf5bf4SSatish Balay if (idx) { 1469acdf5bf4SSatish Balay *idx = idx_p = mat->rowindices; 1470acdf5bf4SSatish Balay if (imark > -1) { 1471acdf5bf4SSatish Balay for (i=0; i<imark; i++) { 1472bd16c2feSSatish Balay idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1473acdf5bf4SSatish Balay } 1474acdf5bf4SSatish Balay } else { 1475acdf5bf4SSatish Balay for (i=0; i<nzB; i++) { 147626fbe8dcSKarl Rupp if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs; 1477acdf5bf4SSatish Balay else break; 1478acdf5bf4SSatish Balay } 1479acdf5bf4SSatish Balay imark = i; 1480acdf5bf4SSatish Balay } 1481d9d09a02SSatish Balay for (i=0; i<nzA; i++) idx_p[imark+i] = cstart*bs + cworkA[i]; 1482d9d09a02SSatish Balay for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ; 1483acdf5bf4SSatish Balay } 1484d64ed03dSBarry Smith } else { 1485d212a18eSSatish Balay if (idx) *idx = 0; 1486d212a18eSSatish Balay if (v) *v = 0; 1487d212a18eSSatish Balay } 1488acdf5bf4SSatish Balay } 1489acdf5bf4SSatish Balay *nz = nztot; 1490f830108cSBarry Smith ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1491f830108cSBarry Smith ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 14923a40ed3dSBarry Smith PetscFunctionReturn(0); 1493acdf5bf4SSatish Balay } 1494acdf5bf4SSatish Balay 14954a2ae208SSatish Balay #undef __FUNCT__ 14964a2ae208SSatish Balay #define __FUNCT__ "MatRestoreRow_MPIBAIJ" 1497b24ad042SBarry Smith PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1498acdf5bf4SSatish Balay { 1499acdf5bf4SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 1500d64ed03dSBarry Smith 1501d64ed03dSBarry Smith PetscFunctionBegin; 1502e7e72b3dSBarry Smith if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called"); 1503acdf5bf4SSatish Balay baij->getrowactive = PETSC_FALSE; 15043a40ed3dSBarry Smith PetscFunctionReturn(0); 1505acdf5bf4SSatish Balay } 1506acdf5bf4SSatish Balay 15074a2ae208SSatish Balay #undef __FUNCT__ 15084a2ae208SSatish Balay #define __FUNCT__ "MatZeroEntries_MPIBAIJ" 1509dfbe8321SBarry Smith PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 151058667388SSatish Balay { 151158667388SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 1512dfbe8321SBarry Smith PetscErrorCode ierr; 1513d64ed03dSBarry Smith 1514d64ed03dSBarry Smith PetscFunctionBegin; 151558667388SSatish Balay ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 151658667388SSatish Balay ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 15173a40ed3dSBarry Smith PetscFunctionReturn(0); 151858667388SSatish Balay } 15190ac07820SSatish Balay 15204a2ae208SSatish Balay #undef __FUNCT__ 15214a2ae208SSatish Balay #define __FUNCT__ "MatGetInfo_MPIBAIJ" 1522dfbe8321SBarry Smith PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info) 15230ac07820SSatish Balay { 15244e220ebcSLois Curfman McInnes Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data; 15254e220ebcSLois Curfman McInnes Mat A = a->A,B = a->B; 1526dfbe8321SBarry Smith PetscErrorCode ierr; 1527329f5518SBarry Smith PetscReal isend[5],irecv[5]; 15280ac07820SSatish Balay 1529d64ed03dSBarry Smith PetscFunctionBegin; 1530d0f46423SBarry Smith info->block_size = (PetscReal)matin->rmap->bs; 153126fbe8dcSKarl Rupp 15324e220ebcSLois Curfman McInnes ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 153326fbe8dcSKarl Rupp 15340e4b21beSBarry Smith isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1535de87f314SBarry Smith isend[3] = info->memory; isend[4] = info->mallocs; 153626fbe8dcSKarl Rupp 15374e220ebcSLois Curfman McInnes ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 153826fbe8dcSKarl Rupp 15390e4b21beSBarry Smith isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1540de87f314SBarry Smith isend[3] += info->memory; isend[4] += info->mallocs; 154126fbe8dcSKarl Rupp 15420ac07820SSatish Balay if (flag == MAT_LOCAL) { 15434e220ebcSLois Curfman McInnes info->nz_used = isend[0]; 15444e220ebcSLois Curfman McInnes info->nz_allocated = isend[1]; 15454e220ebcSLois Curfman McInnes info->nz_unneeded = isend[2]; 15464e220ebcSLois Curfman McInnes info->memory = isend[3]; 15474e220ebcSLois Curfman McInnes info->mallocs = isend[4]; 15480ac07820SSatish Balay } else if (flag == MAT_GLOBAL_MAX) { 1549ce94432eSBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 155026fbe8dcSKarl Rupp 15514e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15524e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15534e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15544e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15554e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 15560ac07820SSatish Balay } else if (flag == MAT_GLOBAL_SUM) { 1557ce94432eSBarry Smith ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 155826fbe8dcSKarl Rupp 15594e220ebcSLois Curfman McInnes info->nz_used = irecv[0]; 15604e220ebcSLois Curfman McInnes info->nz_allocated = irecv[1]; 15614e220ebcSLois Curfman McInnes info->nz_unneeded = irecv[2]; 15624e220ebcSLois Curfman McInnes info->memory = irecv[3]; 15634e220ebcSLois Curfman McInnes info->mallocs = irecv[4]; 1564ce94432eSBarry Smith } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag); 15654e220ebcSLois Curfman McInnes info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 15664e220ebcSLois Curfman McInnes info->fill_ratio_needed = 0; 15674e220ebcSLois Curfman McInnes info->factor_mallocs = 0; 15683a40ed3dSBarry Smith PetscFunctionReturn(0); 15690ac07820SSatish Balay } 15700ac07820SSatish Balay 15714a2ae208SSatish Balay #undef __FUNCT__ 15724a2ae208SSatish Balay #define __FUNCT__ "MatSetOption_MPIBAIJ" 1573ace3abfcSBarry Smith PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg) 157458667388SSatish Balay { 157558667388SSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1576dfbe8321SBarry Smith PetscErrorCode ierr; 157758667388SSatish Balay 1578d64ed03dSBarry Smith PetscFunctionBegin; 157912c028f9SKris Buschelman switch (op) { 1580512a5fc5SBarry Smith case MAT_NEW_NONZERO_LOCATIONS: 158112c028f9SKris Buschelman case MAT_NEW_NONZERO_ALLOCATION_ERR: 158228b2fa4aSMatthew Knepley case MAT_UNUSED_NONZERO_LOCATION_ERR: 1583a9817697SBarry Smith case MAT_KEEP_NONZERO_PATTERN: 158412c028f9SKris Buschelman case MAT_NEW_NONZERO_LOCATION_ERR: 15854e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 15864e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 158712c028f9SKris Buschelman break; 158812c028f9SKris Buschelman case MAT_ROW_ORIENTED: 15894e0d8c25SBarry Smith a->roworiented = flg; 159026fbe8dcSKarl Rupp 15914e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 15924e0d8c25SBarry Smith ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 159312c028f9SKris Buschelman break; 15944e0d8c25SBarry Smith case MAT_NEW_DIAGONALS: 1595290bbb0aSBarry Smith ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 159612c028f9SKris Buschelman break; 159712c028f9SKris Buschelman case MAT_IGNORE_OFF_PROC_ENTRIES: 15984e0d8c25SBarry Smith a->donotstash = flg; 159912c028f9SKris Buschelman break; 160012c028f9SKris Buschelman case MAT_USE_HASH_TABLE: 16014e0d8c25SBarry Smith a->ht_flag = flg; 160212c028f9SKris Buschelman break; 160377e54ba9SKris Buschelman case MAT_SYMMETRIC: 160477e54ba9SKris Buschelman case MAT_STRUCTURALLY_SYMMETRIC: 16052188ac68SBarry Smith case MAT_HERMITIAN: 16062188ac68SBarry Smith case MAT_SYMMETRY_ETERNAL: 16074e0d8c25SBarry Smith ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 160877e54ba9SKris Buschelman break; 160912c028f9SKris Buschelman default: 1610ce94432eSBarry Smith SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op); 1611d64ed03dSBarry Smith } 16123a40ed3dSBarry Smith PetscFunctionReturn(0); 161358667388SSatish Balay } 161458667388SSatish Balay 16154a2ae208SSatish Balay #undef __FUNCT__ 16166a719282SBarry Smith #define __FUNCT__ "MatTranspose_MPIBAIJ" 1617fc4dec0aSBarry Smith PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout) 16180ac07820SSatish Balay { 16190ac07820SSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 16200ac07820SSatish Balay Mat_SeqBAIJ *Aloc; 16210ac07820SSatish Balay Mat B; 1622dfbe8321SBarry Smith PetscErrorCode ierr; 1623d0f46423SBarry Smith PetscInt M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col; 1624d0f46423SBarry Smith PetscInt bs=A->rmap->bs,mbs=baij->mbs; 16253eda8832SBarry Smith MatScalar *a; 16260ac07820SSatish Balay 1627d64ed03dSBarry Smith PetscFunctionBegin; 1628ce94432eSBarry Smith if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1629fc4dec0aSBarry Smith if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1630ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1631d0f46423SBarry Smith ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 16327adad957SLisandro Dalcin ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 16332e72b8d9SBarry Smith /* Do not know preallocation information, but must set block size */ 16340298fd71SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr); 1635fc4dec0aSBarry Smith } else { 1636fc4dec0aSBarry Smith B = *matout; 1637fc4dec0aSBarry Smith } 16380ac07820SSatish Balay 16390ac07820SSatish Balay /* copy over the A part */ 16400ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->A->data; 16410ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1642785e854fSJed Brown ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 16430ac07820SSatish Balay 16440ac07820SSatish Balay for (i=0; i<mbs; i++) { 1645899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 164626fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 16470ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 1648899cda47SBarry Smith col = (baij->cstartbs+aj[j])*bs; 16490ac07820SSatish Balay for (k=0; k<bs; k++) { 165097e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 165126fbe8dcSKarl Rupp 16520ac07820SSatish Balay col++; a += bs; 16530ac07820SSatish Balay } 16540ac07820SSatish Balay } 16550ac07820SSatish Balay } 16560ac07820SSatish Balay /* copy over the B part */ 16570ac07820SSatish Balay Aloc = (Mat_SeqBAIJ*)baij->B->data; 16580ac07820SSatish Balay ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 16590ac07820SSatish Balay for (i=0; i<mbs; i++) { 1660899cda47SBarry Smith rvals[0] = bs*(baij->rstartbs + i); 166126fbe8dcSKarl Rupp for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 16620ac07820SSatish Balay for (j=ai[i]; j<ai[i+1]; j++) { 16630ac07820SSatish Balay col = baij->garray[aj[j]]*bs; 16640ac07820SSatish Balay for (k=0; k<bs; k++) { 166597e5c40aSBarry Smith ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr); 166626fbe8dcSKarl Rupp col++; 166726fbe8dcSKarl Rupp a += bs; 16680ac07820SSatish Balay } 16690ac07820SSatish Balay } 16700ac07820SSatish Balay } 1671606d414cSSatish Balay ierr = PetscFree(rvals);CHKERRQ(ierr); 16720ac07820SSatish Balay ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16730ac07820SSatish Balay ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 16740ac07820SSatish Balay 167526fbe8dcSKarl Rupp if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B; 167626fbe8dcSKarl Rupp else { 1677eb6b5d47SBarry Smith ierr = MatHeaderMerge(A,B);CHKERRQ(ierr); 16780ac07820SSatish Balay } 16793a40ed3dSBarry Smith PetscFunctionReturn(0); 16800ac07820SSatish Balay } 16810e95ebc0SSatish Balay 16824a2ae208SSatish Balay #undef __FUNCT__ 16834a2ae208SSatish Balay #define __FUNCT__ "MatDiagonalScale_MPIBAIJ" 1684dfbe8321SBarry Smith PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr) 16850e95ebc0SSatish Balay { 168636c4a09eSSatish Balay Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 168736c4a09eSSatish Balay Mat a = baij->A,b = baij->B; 1688dfbe8321SBarry Smith PetscErrorCode ierr; 1689b24ad042SBarry Smith PetscInt s1,s2,s3; 16900e95ebc0SSatish Balay 1691d64ed03dSBarry Smith PetscFunctionBegin; 169236c4a09eSSatish Balay ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 169336c4a09eSSatish Balay if (rr) { 169436c4a09eSSatish Balay ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 1695e32f2f54SBarry Smith if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 169636c4a09eSSatish Balay /* Overlap communication with computation. */ 1697ca9f406cSSatish Balay ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 169836c4a09eSSatish Balay } 16990e95ebc0SSatish Balay if (ll) { 17000e95ebc0SSatish Balay ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 1701e32f2f54SBarry Smith if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 17020298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 17030e95ebc0SSatish Balay } 170436c4a09eSSatish Balay /* scale the diagonal block */ 170536c4a09eSSatish Balay ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 170636c4a09eSSatish Balay 170736c4a09eSSatish Balay if (rr) { 170836c4a09eSSatish Balay /* Do a scatter end and then right scale the off-diagonal block */ 1709ca9f406cSSatish Balay ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 17100298fd71SBarry Smith ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr); 171136c4a09eSSatish Balay } 17123a40ed3dSBarry Smith PetscFunctionReturn(0); 17130e95ebc0SSatish Balay } 17140e95ebc0SSatish Balay 17154a2ae208SSatish Balay #undef __FUNCT__ 17164a2ae208SSatish Balay #define __FUNCT__ "MatZeroRows_MPIBAIJ" 17172b40b63fSBarry Smith PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 17180ac07820SSatish Balay { 17190ac07820SSatish Balay Mat_MPIBAIJ *l = (Mat_MPIBAIJ *) A->data; 172065a92638SMatthew G. Knepley PetscInt *owners = A->rmap->range; 172165a92638SMatthew G. Knepley PetscInt n = A->rmap->n; 172265a92638SMatthew G. Knepley PetscSF sf; 172365a92638SMatthew G. Knepley PetscInt *lrows; 172465a92638SMatthew G. Knepley PetscSFNode *rrows; 172569ea2d38SJed Brown PetscInt r, p = 0, len = 0; 17266849ba73SBarry Smith PetscErrorCode ierr; 17270ac07820SSatish Balay 1728d64ed03dSBarry Smith PetscFunctionBegin; 172965a92638SMatthew G. Knepley /* Create SF where leaves are input rows and roots are owned rows */ 1730785e854fSJed Brown ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 173165a92638SMatthew G. Knepley for (r = 0; r < n; ++r) lrows[r] = -1; 1732a34163a4SJed Brown if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);} 173365a92638SMatthew G. Knepley for (r = 0; r < N; ++r) { 173465a92638SMatthew G. Knepley const PetscInt idx = rows[r]; 173569ea2d38SJed Brown if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 173669ea2d38SJed Brown if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 173769ea2d38SJed Brown ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 173869ea2d38SJed Brown } 1739a34163a4SJed Brown if (A->nooffproczerorows) { 1740a34163a4SJed Brown if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank); 1741a34163a4SJed Brown lrows[len++] = idx - owners[p]; 1742a34163a4SJed Brown } else { 174365a92638SMatthew G. Knepley rrows[r].rank = p; 174465a92638SMatthew G. Knepley rrows[r].index = rows[r] - owners[p]; 17450ac07820SSatish Balay } 1746a34163a4SJed Brown } 1747a34163a4SJed Brown if (!A->nooffproczerorows) { 174865a92638SMatthew G. Knepley ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 174965a92638SMatthew G. Knepley ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 175065a92638SMatthew G. Knepley /* Collect flags for rows to be zeroed */ 175165a92638SMatthew G. Knepley ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 175265a92638SMatthew G. Knepley ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 175365a92638SMatthew G. Knepley ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 175465a92638SMatthew G. Knepley /* Compress and put in row numbers */ 175565a92638SMatthew G. Knepley for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1756a34163a4SJed Brown } 175797b48c8fSBarry Smith /* fix right hand side if needed */ 175897b48c8fSBarry Smith if (x && b) { 175965a92638SMatthew G. Knepley const PetscScalar *xx; 176065a92638SMatthew G. Knepley PetscScalar *bb; 176165a92638SMatthew G. Knepley 176297b48c8fSBarry Smith ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 176397b48c8fSBarry Smith ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 176465a92638SMatthew G. Knepley for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 176597b48c8fSBarry Smith ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 176697b48c8fSBarry Smith ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 176797b48c8fSBarry Smith } 176897b48c8fSBarry Smith 17690ac07820SSatish Balay /* actually zap the local rows */ 177072dacd9aSBarry Smith /* 177172dacd9aSBarry Smith Zero the required rows. If the "diagonal block" of the matrix 1772a8c7a070SBarry Smith is square and the user wishes to set the diagonal we use separate 177372dacd9aSBarry Smith code so that MatSetValues() is not called for each diagonal allocating 177472dacd9aSBarry Smith new memory, thus calling lots of mallocs and slowing things down. 177572dacd9aSBarry Smith 177672dacd9aSBarry Smith */ 17779c957beeSSatish Balay /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1778a34163a4SJed Brown ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1779d0f46423SBarry Smith if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) { 1780a34163a4SJed Brown ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr); 1781f4df32b1SMatthew Knepley } else if (diag != 0.0) { 178265a92638SMatthew G. Knepley ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr); 1783e7e72b3dSBarry Smith if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1784512a5fc5SBarry Smith MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 178565a92638SMatthew G. Knepley for (r = 0; r < len; ++r) { 178665a92638SMatthew G. Knepley const PetscInt row = lrows[r] + A->rmap->rstart; 1787f4df32b1SMatthew Knepley ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 1788a07cd24cSSatish Balay } 1789a07cd24cSSatish Balay ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1790a07cd24cSSatish Balay ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 17919c957beeSSatish Balay } else { 1792a34163a4SJed Brown ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 1793a07cd24cSSatish Balay } 1794606d414cSSatish Balay ierr = PetscFree(lrows);CHKERRQ(ierr); 17954f9cfa9eSBarry Smith 17964f9cfa9eSBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 17974f9cfa9eSBarry Smith if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 1798e56f5c9eSBarry Smith PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 179909e82e2bSBarry Smith ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1800e56f5c9eSBarry Smith } 18013a40ed3dSBarry Smith PetscFunctionReturn(0); 18020ac07820SSatish Balay } 180372dacd9aSBarry Smith 18044a2ae208SSatish Balay #undef __FUNCT__ 18056f0a72daSMatthew G. Knepley #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ" 18066f0a72daSMatthew G. Knepley PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 18076f0a72daSMatthew G. Knepley { 18086f0a72daSMatthew G. Knepley Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data; 18096f0a72daSMatthew G. Knepley PetscErrorCode ierr; 18105ba17502SJed Brown PetscMPIInt n = A->rmap->n; 1811fbb64d0eSMatthew G. Knepley PetscInt i,j,k,r,p = 0,len = 0,row,col,count; 18126f0a72daSMatthew G. Knepley PetscInt *lrows,*owners = A->rmap->range; 18136f0a72daSMatthew G. Knepley PetscSFNode *rrows; 18146f0a72daSMatthew G. Knepley PetscSF sf; 18156f0a72daSMatthew G. Knepley const PetscScalar *xx; 18166f0a72daSMatthew G. Knepley PetscScalar *bb,*mask; 18176f0a72daSMatthew G. Knepley Vec xmask,lmask; 18186f0a72daSMatthew G. Knepley Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)l->B->data; 18196f0a72daSMatthew G. Knepley PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 18206f0a72daSMatthew G. Knepley PetscScalar *aa; 18216f0a72daSMatthew G. Knepley 18226f0a72daSMatthew G. Knepley PetscFunctionBegin; 18236f0a72daSMatthew G. Knepley /* Create SF where leaves are input rows and roots are owned rows */ 18246f0a72daSMatthew G. Knepley ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 18256f0a72daSMatthew G. Knepley for (r = 0; r < n; ++r) lrows[r] = -1; 18266f0a72daSMatthew G. Knepley ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 18276f0a72daSMatthew G. Knepley for (r = 0; r < N; ++r) { 18286f0a72daSMatthew G. Knepley const PetscInt idx = rows[r]; 18295ba17502SJed Brown if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 18305ba17502SJed Brown if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 18315ba17502SJed Brown ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 18325ba17502SJed Brown } 18336f0a72daSMatthew G. Knepley rrows[r].rank = p; 18346f0a72daSMatthew G. Knepley rrows[r].index = rows[r] - owners[p]; 18356f0a72daSMatthew G. Knepley } 18366f0a72daSMatthew G. Knepley ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 18376f0a72daSMatthew G. Knepley ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 18386f0a72daSMatthew G. Knepley /* Collect flags for rows to be zeroed */ 18396f0a72daSMatthew G. Knepley ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 18406f0a72daSMatthew G. Knepley ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 18416f0a72daSMatthew G. Knepley ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 18426f0a72daSMatthew G. Knepley /* Compress and put in row numbers */ 18436f0a72daSMatthew G. Knepley for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 18446f0a72daSMatthew G. Knepley /* zero diagonal part of matrix */ 18456f0a72daSMatthew G. Knepley ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 18466f0a72daSMatthew G. Knepley /* handle off diagonal part of matrix */ 18472a7a6963SBarry Smith ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 18486f0a72daSMatthew G. Knepley ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 18496f0a72daSMatthew G. Knepley ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 18506f0a72daSMatthew G. Knepley for (i=0; i<len; i++) bb[lrows[i]] = 1; 18516f0a72daSMatthew G. Knepley ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 18526f0a72daSMatthew G. Knepley ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 18536f0a72daSMatthew G. Knepley ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 18546f0a72daSMatthew G. Knepley ierr = VecDestroy(&xmask);CHKERRQ(ierr); 18556f0a72daSMatthew G. Knepley if (x) { 18566f0a72daSMatthew G. Knepley ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 18576f0a72daSMatthew G. Knepley ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 18586f0a72daSMatthew G. Knepley ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 18596f0a72daSMatthew G. Knepley ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 18606f0a72daSMatthew G. Knepley } 18616f0a72daSMatthew G. Knepley ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 18626f0a72daSMatthew G. Knepley /* remove zeroed rows of off diagonal matrix */ 18636f0a72daSMatthew G. Knepley for (i = 0; i < len; ++i) { 18646f0a72daSMatthew G. Knepley row = lrows[i]; 18656f0a72daSMatthew G. Knepley count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 18666f0a72daSMatthew G. Knepley aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 18676f0a72daSMatthew G. Knepley for (k = 0; k < count; ++k) { 18686f0a72daSMatthew G. Knepley aa[0] = 0.0; 18696f0a72daSMatthew G. Knepley aa += bs; 18706f0a72daSMatthew G. Knepley } 18716f0a72daSMatthew G. Knepley } 18726f0a72daSMatthew G. Knepley /* loop over all elements of off process part of matrix zeroing removed columns*/ 18736f0a72daSMatthew G. Knepley for (i = 0; i < l->B->rmap->N; ++i) { 18746f0a72daSMatthew G. Knepley row = i/bs; 18756f0a72daSMatthew G. Knepley for (j = baij->i[row]; j < baij->i[row+1]; ++j) { 18766f0a72daSMatthew G. Knepley for (k = 0; k < bs; ++k) { 18776f0a72daSMatthew G. Knepley col = bs*baij->j[j] + k; 18786f0a72daSMatthew G. Knepley if (PetscAbsScalar(mask[col])) { 18796f0a72daSMatthew G. Knepley aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 18806f0a72daSMatthew G. Knepley if (b) bb[i] -= aa[0]*xx[col]; 18816f0a72daSMatthew G. Knepley aa[0] = 0.0; 18826f0a72daSMatthew G. Knepley } 18836f0a72daSMatthew G. Knepley } 18846f0a72daSMatthew G. Knepley } 18856f0a72daSMatthew G. Knepley } 18866f0a72daSMatthew G. Knepley if (x) { 18876f0a72daSMatthew G. Knepley ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 18886f0a72daSMatthew G. Knepley ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 18896f0a72daSMatthew G. Knepley } 18906f0a72daSMatthew G. Knepley ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 18916f0a72daSMatthew G. Knepley ierr = VecDestroy(&lmask);CHKERRQ(ierr); 18926f0a72daSMatthew G. Knepley ierr = PetscFree(lrows);CHKERRQ(ierr); 18934f9cfa9eSBarry Smith 18944f9cfa9eSBarry Smith /* only change matrix nonzero state if pattern was allowed to be changed */ 18954f9cfa9eSBarry Smith if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) { 18964f9cfa9eSBarry Smith PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 18974f9cfa9eSBarry Smith ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 18984f9cfa9eSBarry Smith } 18996f0a72daSMatthew G. Knepley PetscFunctionReturn(0); 19006f0a72daSMatthew G. Knepley } 19016f0a72daSMatthew G. Knepley 19026f0a72daSMatthew G. Knepley #undef __FUNCT__ 19034a2ae208SSatish Balay #define __FUNCT__ "MatSetUnfactored_MPIBAIJ" 1904dfbe8321SBarry Smith PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1905bb5a7306SBarry Smith { 1906bb5a7306SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 1907dfbe8321SBarry Smith PetscErrorCode ierr; 1908d64ed03dSBarry Smith 1909d64ed03dSBarry Smith PetscFunctionBegin; 1910bb5a7306SBarry Smith ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 19113a40ed3dSBarry Smith PetscFunctionReturn(0); 1912bb5a7306SBarry Smith } 1913bb5a7306SBarry Smith 19146849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*); 19150ac07820SSatish Balay 19164a2ae208SSatish Balay #undef __FUNCT__ 19174a2ae208SSatish Balay #define __FUNCT__ "MatEqual_MPIBAIJ" 1918ace3abfcSBarry Smith PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool *flag) 19197fc3c18eSBarry Smith { 19207fc3c18eSBarry Smith Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data; 19217fc3c18eSBarry Smith Mat a,b,c,d; 1922ace3abfcSBarry Smith PetscBool flg; 1923dfbe8321SBarry Smith PetscErrorCode ierr; 19247fc3c18eSBarry Smith 19257fc3c18eSBarry Smith PetscFunctionBegin; 19267fc3c18eSBarry Smith a = matA->A; b = matA->B; 19277fc3c18eSBarry Smith c = matB->A; d = matB->B; 19287fc3c18eSBarry Smith 19297fc3c18eSBarry Smith ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 1930abc0a331SBarry Smith if (flg) { 19317fc3c18eSBarry Smith ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 19327fc3c18eSBarry Smith } 1933ce94432eSBarry Smith ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 19347fc3c18eSBarry Smith PetscFunctionReturn(0); 19357fc3c18eSBarry Smith } 19367fc3c18eSBarry Smith 19373c896bc6SHong Zhang #undef __FUNCT__ 19383c896bc6SHong Zhang #define __FUNCT__ "MatCopy_MPIBAIJ" 19393c896bc6SHong Zhang PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str) 19403c896bc6SHong Zhang { 19413c896bc6SHong Zhang PetscErrorCode ierr; 19423c896bc6SHong Zhang Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 19433c896bc6SHong Zhang Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 19443c896bc6SHong Zhang 19453c896bc6SHong Zhang PetscFunctionBegin; 19463c896bc6SHong Zhang /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 19473c896bc6SHong Zhang if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 19483c896bc6SHong Zhang ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 19493c896bc6SHong Zhang } else { 19503c896bc6SHong Zhang ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 19513c896bc6SHong Zhang ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 19523c896bc6SHong Zhang } 19533c896bc6SHong Zhang PetscFunctionReturn(0); 19543c896bc6SHong Zhang } 1955273d9f13SBarry Smith 19564a2ae208SSatish Balay #undef __FUNCT__ 19574994cf47SJed Brown #define __FUNCT__ "MatSetUp_MPIBAIJ" 19584994cf47SJed Brown PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1959273d9f13SBarry Smith { 1960dfbe8321SBarry Smith PetscErrorCode ierr; 1961273d9f13SBarry Smith 1962273d9f13SBarry Smith PetscFunctionBegin; 1963535b19f3SBarry Smith ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 1964273d9f13SBarry Smith PetscFunctionReturn(0); 1965273d9f13SBarry Smith } 1966273d9f13SBarry Smith 19674fe895cdSHong Zhang #undef __FUNCT__ 19684de5dceeSHong Zhang #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ" 19694de5dceeSHong Zhang PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 19704de5dceeSHong Zhang { 1971001ddc4fSHong Zhang PetscErrorCode ierr; 1972001ddc4fSHong Zhang PetscInt bs = Y->rmap->bs,m = Y->rmap->N/bs; 19734de5dceeSHong Zhang Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 19744de5dceeSHong Zhang Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 19754de5dceeSHong Zhang 19764de5dceeSHong Zhang PetscFunctionBegin; 1977001ddc4fSHong Zhang ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 19784de5dceeSHong Zhang PetscFunctionReturn(0); 19794de5dceeSHong Zhang } 19804de5dceeSHong Zhang 19814de5dceeSHong Zhang #undef __FUNCT__ 19824fe895cdSHong Zhang #define __FUNCT__ "MatAXPY_MPIBAIJ" 19834fe895cdSHong Zhang PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 19844fe895cdSHong Zhang { 19854fe895cdSHong Zhang PetscErrorCode ierr; 19864fe895cdSHong Zhang Mat_MPIBAIJ *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data; 19874fe895cdSHong Zhang PetscBLASInt bnz,one=1; 19884fe895cdSHong Zhang Mat_SeqBAIJ *x,*y; 19894fe895cdSHong Zhang 19904fe895cdSHong Zhang PetscFunctionBegin; 19914fe895cdSHong Zhang if (str == SAME_NONZERO_PATTERN) { 19924fe895cdSHong Zhang PetscScalar alpha = a; 19934fe895cdSHong Zhang x = (Mat_SeqBAIJ*)xx->A->data; 19944fe895cdSHong Zhang y = (Mat_SeqBAIJ*)yy->A->data; 1995c5df96a5SBarry Smith ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 19968b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 19974fe895cdSHong Zhang x = (Mat_SeqBAIJ*)xx->B->data; 19984fe895cdSHong Zhang y = (Mat_SeqBAIJ*)yy->B->data; 1999c5df96a5SBarry Smith ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 20008b83055fSJed Brown PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2001a3fa217bSJose E. Roman ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2002ab784542SHong Zhang } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2003ab784542SHong Zhang ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 20044fe895cdSHong Zhang } else { 20054de5dceeSHong Zhang Mat B; 20064de5dceeSHong Zhang PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs; 20074de5dceeSHong Zhang ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 20084de5dceeSHong Zhang ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 20094de5dceeSHong Zhang ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 20104de5dceeSHong Zhang ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 20114de5dceeSHong Zhang ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 20124de5dceeSHong Zhang ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 20134de5dceeSHong Zhang ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr); 20144de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 20154de5dceeSHong Zhang ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 20164de5dceeSHong Zhang ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 20174de5dceeSHong Zhang /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 20184de5dceeSHong Zhang ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 20194de5dceeSHong Zhang ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr); 20204de5dceeSHong Zhang ierr = PetscFree(nnz_d);CHKERRQ(ierr); 20214de5dceeSHong Zhang ierr = PetscFree(nnz_o);CHKERRQ(ierr); 20224fe895cdSHong Zhang } 20234fe895cdSHong Zhang PetscFunctionReturn(0); 20244fe895cdSHong Zhang } 20254fe895cdSHong Zhang 202699cafbc1SBarry Smith #undef __FUNCT__ 202799cafbc1SBarry Smith #define __FUNCT__ "MatRealPart_MPIBAIJ" 202899cafbc1SBarry Smith PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 202999cafbc1SBarry Smith { 203099cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 203199cafbc1SBarry Smith PetscErrorCode ierr; 203299cafbc1SBarry Smith 203399cafbc1SBarry Smith PetscFunctionBegin; 203499cafbc1SBarry Smith ierr = MatRealPart(a->A);CHKERRQ(ierr); 203599cafbc1SBarry Smith ierr = MatRealPart(a->B);CHKERRQ(ierr); 203699cafbc1SBarry Smith PetscFunctionReturn(0); 203799cafbc1SBarry Smith } 203899cafbc1SBarry Smith 203999cafbc1SBarry Smith #undef __FUNCT__ 204099cafbc1SBarry Smith #define __FUNCT__ "MatImaginaryPart_MPIBAIJ" 204199cafbc1SBarry Smith PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 204299cafbc1SBarry Smith { 204399cafbc1SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 204499cafbc1SBarry Smith PetscErrorCode ierr; 204599cafbc1SBarry Smith 204699cafbc1SBarry Smith PetscFunctionBegin; 204799cafbc1SBarry Smith ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 204899cafbc1SBarry Smith ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 204999cafbc1SBarry Smith PetscFunctionReturn(0); 205099cafbc1SBarry Smith } 205199cafbc1SBarry Smith 205282094794SBarry Smith #undef __FUNCT__ 205382094794SBarry Smith #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ" 20544aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 20554aa3045dSJed Brown { 20564aa3045dSJed Brown PetscErrorCode ierr; 20574aa3045dSJed Brown IS iscol_local; 20584aa3045dSJed Brown PetscInt csize; 20594aa3045dSJed Brown 20604aa3045dSJed Brown PetscFunctionBegin; 20614aa3045dSJed Brown ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 2062b79d0421SJed Brown if (call == MAT_REUSE_MATRIX) { 2063b79d0421SJed Brown ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 2064e32f2f54SBarry Smith if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 2065b79d0421SJed Brown } else { 20664aa3045dSJed Brown ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2067b79d0421SJed Brown } 20684aa3045dSJed Brown ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 2069b79d0421SJed Brown if (call == MAT_INITIAL_MATRIX) { 2070b79d0421SJed Brown ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 20716bf464f9SBarry Smith ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 2072b79d0421SJed Brown } 20734aa3045dSJed Brown PetscFunctionReturn(0); 20744aa3045dSJed Brown } 207529dcf524SDmitry Karpeev extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*); 20764aa3045dSJed Brown #undef __FUNCT__ 2077dd183c9eSJed Brown #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private" 207882094794SBarry Smith /* 207982094794SBarry Smith Not great since it makes two copies of the submatrix, first an SeqBAIJ 208082094794SBarry Smith in local and then by concatenating the local matrices the end result. 20818f46ffcaSHong Zhang Writing it directly would be much like MatGetSubMatrices_MPIBAIJ(). 20828f46ffcaSHong Zhang This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 208382094794SBarry Smith */ 20844aa3045dSJed Brown PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 208582094794SBarry Smith { 208682094794SBarry Smith PetscErrorCode ierr; 208782094794SBarry Smith PetscMPIInt rank,size; 208882094794SBarry Smith PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs; 208929dcf524SDmitry Karpeev PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow; 209029dcf524SDmitry Karpeev Mat M,Mreuse; 209182094794SBarry Smith MatScalar *vwork,*aa; 2092ce94432eSBarry Smith MPI_Comm comm; 209329dcf524SDmitry Karpeev IS isrow_new, iscol_new; 209429dcf524SDmitry Karpeev PetscBool idflag,allrows, allcols; 209582094794SBarry Smith Mat_SeqBAIJ *aij; 209682094794SBarry Smith 209782094794SBarry Smith PetscFunctionBegin; 2098ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 209982094794SBarry Smith ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 210082094794SBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 210129dcf524SDmitry Karpeev /* The compression and expansion should be avoided. Doesn't point 210229dcf524SDmitry Karpeev out errors, might change the indices, hence buggey */ 210329dcf524SDmitry Karpeev ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr); 210429dcf524SDmitry Karpeev ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr); 210582094794SBarry Smith 210629dcf524SDmitry Karpeev /* Check for special case: each processor gets entire matrix columns */ 210729dcf524SDmitry Karpeev ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr); 210829dcf524SDmitry Karpeev ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 210926fbe8dcSKarl Rupp if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE; 211026fbe8dcSKarl Rupp else allcols = PETSC_FALSE; 211129dcf524SDmitry Karpeev 211229dcf524SDmitry Karpeev ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr); 211329dcf524SDmitry Karpeev ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr); 211426fbe8dcSKarl Rupp if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE; 211526fbe8dcSKarl Rupp else allrows = PETSC_FALSE; 211626fbe8dcSKarl Rupp 211782094794SBarry Smith if (call == MAT_REUSE_MATRIX) { 211882094794SBarry Smith ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 2119e32f2f54SBarry Smith if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 212075f6568bSJed Brown ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 212182094794SBarry Smith } else { 212275f6568bSJed Brown ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr); 212382094794SBarry Smith } 212429dcf524SDmitry Karpeev ierr = ISDestroy(&isrow_new);CHKERRQ(ierr); 212529dcf524SDmitry Karpeev ierr = ISDestroy(&iscol_new);CHKERRQ(ierr); 212682094794SBarry Smith /* 212782094794SBarry Smith m - number of local rows 212882094794SBarry Smith n - number of columns (same on all processors) 212982094794SBarry Smith rstart - first row in new global matrix generated 213082094794SBarry Smith */ 213182094794SBarry Smith ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr); 213282094794SBarry Smith ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 213382094794SBarry Smith m = m/bs; 213482094794SBarry Smith n = n/bs; 213582094794SBarry Smith 213682094794SBarry Smith if (call == MAT_INITIAL_MATRIX) { 213782094794SBarry Smith aij = (Mat_SeqBAIJ*)(Mreuse)->data; 213882094794SBarry Smith ii = aij->i; 213982094794SBarry Smith jj = aij->j; 214082094794SBarry Smith 214182094794SBarry Smith /* 214282094794SBarry Smith Determine the number of non-zeros in the diagonal and off-diagonal 214382094794SBarry Smith portions of the matrix in order to do correct preallocation 214482094794SBarry Smith */ 214582094794SBarry Smith 214682094794SBarry Smith /* first get start and end of "diagonal" columns */ 214782094794SBarry Smith if (csize == PETSC_DECIDE) { 214882094794SBarry Smith ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 214982094794SBarry Smith if (mglobal == n*bs) { /* square matrix */ 215082094794SBarry Smith nlocal = m; 215182094794SBarry Smith } else { 215282094794SBarry Smith nlocal = n/size + ((n % size) > rank); 215382094794SBarry Smith } 215482094794SBarry Smith } else { 215582094794SBarry Smith nlocal = csize/bs; 215682094794SBarry Smith } 215782094794SBarry Smith ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 215882094794SBarry Smith rstart = rend - nlocal; 215965e19b50SBarry Smith if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 216082094794SBarry Smith 216182094794SBarry Smith /* next, compute all the lengths */ 2162dcca6d9dSJed Brown ierr = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr); 216382094794SBarry Smith for (i=0; i<m; i++) { 216482094794SBarry Smith jend = ii[i+1] - ii[i]; 216582094794SBarry Smith olen = 0; 216682094794SBarry Smith dlen = 0; 216782094794SBarry Smith for (j=0; j<jend; j++) { 216882094794SBarry Smith if (*jj < rstart || *jj >= rend) olen++; 216982094794SBarry Smith else dlen++; 217082094794SBarry Smith jj++; 217182094794SBarry Smith } 217282094794SBarry Smith olens[i] = olen; 217382094794SBarry Smith dlens[i] = dlen; 217482094794SBarry Smith } 217582094794SBarry Smith ierr = MatCreate(comm,&M);CHKERRQ(ierr); 217682094794SBarry Smith ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr); 217782094794SBarry Smith ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 217882094794SBarry Smith ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 21798f46ffcaSHong Zhang ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr); 2180eb9baa12SBarry Smith ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 218182094794SBarry Smith } else { 218282094794SBarry Smith PetscInt ml,nl; 218382094794SBarry Smith 218482094794SBarry Smith M = *newmat; 218582094794SBarry Smith ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 2186e32f2f54SBarry Smith if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 218782094794SBarry Smith ierr = MatZeroEntries(M);CHKERRQ(ierr); 218882094794SBarry Smith /* 218982094794SBarry Smith The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 219082094794SBarry Smith rather than the slower MatSetValues(). 219182094794SBarry Smith */ 219282094794SBarry Smith M->was_assembled = PETSC_TRUE; 219382094794SBarry Smith M->assembled = PETSC_FALSE; 219482094794SBarry Smith } 219582094794SBarry Smith ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 219682094794SBarry Smith ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 219782094794SBarry Smith aij = (Mat_SeqBAIJ*)(Mreuse)->data; 219882094794SBarry Smith ii = aij->i; 219982094794SBarry Smith jj = aij->j; 220082094794SBarry Smith aa = aij->a; 220182094794SBarry Smith for (i=0; i<m; i++) { 220282094794SBarry Smith row = rstart/bs + i; 220382094794SBarry Smith nz = ii[i+1] - ii[i]; 220482094794SBarry Smith cwork = jj; jj += nz; 220575f6568bSJed Brown vwork = aa; aa += nz*bs*bs; 220682094794SBarry Smith ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 220782094794SBarry Smith } 220882094794SBarry Smith 220982094794SBarry Smith ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 221082094794SBarry Smith ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 221182094794SBarry Smith *newmat = M; 221282094794SBarry Smith 221382094794SBarry Smith /* save submatrix used in processor for next request */ 221482094794SBarry Smith if (call == MAT_INITIAL_MATRIX) { 221582094794SBarry Smith ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 221682094794SBarry Smith ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr); 221782094794SBarry Smith } 221882094794SBarry Smith PetscFunctionReturn(0); 221982094794SBarry Smith } 222082094794SBarry Smith 222182094794SBarry Smith #undef __FUNCT__ 222282094794SBarry Smith #define __FUNCT__ "MatPermute_MPIBAIJ" 222382094794SBarry Smith PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B) 222482094794SBarry Smith { 222582094794SBarry Smith MPI_Comm comm,pcomm; 2226a0a83eb5SRémi Lacroix PetscInt clocal_size,nrows; 222782094794SBarry Smith const PetscInt *rows; 2228dbf0e21dSBarry Smith PetscMPIInt size; 2229a0a83eb5SRémi Lacroix IS crowp,lcolp; 223082094794SBarry Smith PetscErrorCode ierr; 223182094794SBarry Smith 223282094794SBarry Smith PetscFunctionBegin; 223382094794SBarry Smith ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 223482094794SBarry Smith /* make a collective version of 'rowp' */ 223582094794SBarry Smith ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr); 223682094794SBarry Smith if (pcomm==comm) { 223782094794SBarry Smith crowp = rowp; 223882094794SBarry Smith } else { 223982094794SBarry Smith ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr); 224082094794SBarry Smith ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr); 224170b3c8c7SBarry Smith ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr); 224282094794SBarry Smith ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr); 224382094794SBarry Smith } 2244a0a83eb5SRémi Lacroix ierr = ISSetPermutation(crowp);CHKERRQ(ierr); 2245a0a83eb5SRémi Lacroix /* make a local version of 'colp' */ 224682094794SBarry Smith ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr); 2247dbf0e21dSBarry Smith ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr); 2248dbf0e21dSBarry Smith if (size==1) { 224982094794SBarry Smith lcolp = colp; 225082094794SBarry Smith } else { 225175f6568bSJed Brown ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr); 225282094794SBarry Smith } 2253dbf0e21dSBarry Smith ierr = ISSetPermutation(lcolp);CHKERRQ(ierr); 225475f6568bSJed Brown /* now we just get the submatrix */ 22557afc1a8bSJed Brown ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr); 2256a0a83eb5SRémi Lacroix ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2257a0a83eb5SRémi Lacroix /* clean up */ 2258a0a83eb5SRémi Lacroix if (pcomm!=comm) { 2259a0a83eb5SRémi Lacroix ierr = ISDestroy(&crowp);CHKERRQ(ierr); 2260a0a83eb5SRémi Lacroix } 2261dbf0e21dSBarry Smith if (size>1) { 22626bf464f9SBarry Smith ierr = ISDestroy(&lcolp);CHKERRQ(ierr); 226382094794SBarry Smith } 226482094794SBarry Smith PetscFunctionReturn(0); 226582094794SBarry Smith } 226682094794SBarry Smith 22678c7482ecSBarry Smith #undef __FUNCT__ 22688c7482ecSBarry Smith #define __FUNCT__ "MatGetGhosts_MPIBAIJ" 22697087cfbeSBarry Smith PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 22708c7482ecSBarry Smith { 22718c7482ecSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data; 22728c7482ecSBarry Smith Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data; 22738c7482ecSBarry Smith 22748c7482ecSBarry Smith PetscFunctionBegin; 227526fbe8dcSKarl Rupp if (nghosts) *nghosts = B->nbs; 227626fbe8dcSKarl Rupp if (ghosts) *ghosts = baij->garray; 22778c7482ecSBarry Smith PetscFunctionReturn(0); 22788c7482ecSBarry Smith } 22798c7482ecSBarry Smith 2280f6d58c54SBarry Smith #undef __FUNCT__ 2281d1adec66SJed Brown #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ" 2282d1adec66SJed Brown PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat) 2283f6d58c54SBarry Smith { 2284f6d58c54SBarry Smith Mat B; 2285f6d58c54SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 2286f6d58c54SBarry Smith Mat_SeqBAIJ *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data; 2287f6d58c54SBarry Smith Mat_SeqAIJ *b; 2288f6d58c54SBarry Smith PetscErrorCode ierr; 2289f6d58c54SBarry Smith PetscMPIInt size,rank,*recvcounts = 0,*displs = 0; 2290f6d58c54SBarry Smith PetscInt sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs; 2291f6d58c54SBarry Smith PetscInt m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf; 2292f6d58c54SBarry Smith 2293f6d58c54SBarry Smith PetscFunctionBegin; 2294ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 2295ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 2296f6d58c54SBarry Smith 2297f6d58c54SBarry Smith /* ---------------------------------------------------------------- 2298f6d58c54SBarry Smith Tell every processor the number of nonzeros per row 2299f6d58c54SBarry Smith */ 2300854ce69bSBarry Smith ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr); 2301f6d58c54SBarry Smith for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) { 2302f6d58c54SBarry Smith lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs]; 2303f6d58c54SBarry Smith } 2304f6d58c54SBarry Smith sendcount = A->rmap->rend/bs - A->rmap->rstart/bs; 2305785e854fSJed Brown ierr = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr); 2306f6d58c54SBarry Smith displs = recvcounts + size; 2307f6d58c54SBarry Smith for (i=0; i<size; i++) { 2308f6d58c54SBarry Smith recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs; 2309f6d58c54SBarry Smith displs[i] = A->rmap->range[i]/bs; 2310f6d58c54SBarry Smith } 2311f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE) 2312ce94432eSBarry Smith ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2313f6d58c54SBarry Smith #else 2314ce94432eSBarry Smith ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2315f6d58c54SBarry Smith #endif 2316f6d58c54SBarry Smith /* --------------------------------------------------------------- 2317f6d58c54SBarry Smith Create the sequential matrix of the same type as the local block diagonal 2318f6d58c54SBarry Smith */ 2319f6d58c54SBarry Smith ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 2320f6d58c54SBarry Smith ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 2321f6d58c54SBarry Smith ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 2322f6d58c54SBarry Smith ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr); 2323f6d58c54SBarry Smith b = (Mat_SeqAIJ*)B->data; 2324f6d58c54SBarry Smith 2325f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2326f6d58c54SBarry Smith Copy my part of matrix column indices over 2327f6d58c54SBarry Smith */ 2328f6d58c54SBarry Smith sendcount = ad->nz + bd->nz; 2329f6d58c54SBarry Smith jsendbuf = b->j + b->i[rstarts[rank]/bs]; 2330f6d58c54SBarry Smith a_jsendbuf = ad->j; 2331f6d58c54SBarry Smith b_jsendbuf = bd->j; 2332f6d58c54SBarry Smith n = A->rmap->rend/bs - A->rmap->rstart/bs; 2333f6d58c54SBarry Smith cnt = 0; 2334f6d58c54SBarry Smith for (i=0; i<n; i++) { 2335f6d58c54SBarry Smith 2336f6d58c54SBarry Smith /* put in lower diagonal portion */ 2337f6d58c54SBarry Smith m = bd->i[i+1] - bd->i[i]; 2338f6d58c54SBarry Smith while (m > 0) { 2339f6d58c54SBarry Smith /* is it above diagonal (in bd (compressed) numbering) */ 2340f6d58c54SBarry Smith if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break; 2341f6d58c54SBarry Smith jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2342f6d58c54SBarry Smith m--; 2343f6d58c54SBarry Smith } 2344f6d58c54SBarry Smith 2345f6d58c54SBarry Smith /* put in diagonal portion */ 2346f6d58c54SBarry Smith for (j=ad->i[i]; j<ad->i[i+1]; j++) { 2347f6d58c54SBarry Smith jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++; 2348f6d58c54SBarry Smith } 2349f6d58c54SBarry Smith 2350f6d58c54SBarry Smith /* put in upper diagonal portion */ 2351f6d58c54SBarry Smith while (m-- > 0) { 2352f6d58c54SBarry Smith jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2353f6d58c54SBarry Smith } 2354f6d58c54SBarry Smith } 2355e32f2f54SBarry Smith if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt); 2356f6d58c54SBarry Smith 2357f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2358f6d58c54SBarry Smith Gather all column indices to all processors 2359f6d58c54SBarry Smith */ 2360f6d58c54SBarry Smith for (i=0; i<size; i++) { 2361f6d58c54SBarry Smith recvcounts[i] = 0; 2362f6d58c54SBarry Smith for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) { 2363f6d58c54SBarry Smith recvcounts[i] += lens[j]; 2364f6d58c54SBarry Smith } 2365f6d58c54SBarry Smith } 2366f6d58c54SBarry Smith displs[0] = 0; 2367f6d58c54SBarry Smith for (i=1; i<size; i++) { 2368f6d58c54SBarry Smith displs[i] = displs[i-1] + recvcounts[i-1]; 2369f6d58c54SBarry Smith } 2370f6d58c54SBarry Smith #if defined(PETSC_HAVE_MPI_IN_PLACE) 2371ce94432eSBarry Smith ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2372f6d58c54SBarry Smith #else 2373ce94432eSBarry Smith ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2374f6d58c54SBarry Smith #endif 2375f6d58c54SBarry Smith /*-------------------------------------------------------------------- 2376f6d58c54SBarry Smith Assemble the matrix into useable form (note numerical values not yet set) 2377f6d58c54SBarry Smith */ 2378f6d58c54SBarry Smith /* set the b->ilen (length of each row) values */ 2379f6d58c54SBarry Smith ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr); 2380f6d58c54SBarry Smith /* set the b->i indices */ 2381f6d58c54SBarry Smith b->i[0] = 0; 2382f6d58c54SBarry Smith for (i=1; i<=A->rmap->N/bs; i++) { 2383f6d58c54SBarry Smith b->i[i] = b->i[i-1] + lens[i-1]; 2384f6d58c54SBarry Smith } 2385f6d58c54SBarry Smith ierr = PetscFree(lens);CHKERRQ(ierr); 2386f6d58c54SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387f6d58c54SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388f6d58c54SBarry Smith ierr = PetscFree(recvcounts);CHKERRQ(ierr); 2389f6d58c54SBarry Smith 2390f6d58c54SBarry Smith if (A->symmetric) { 2391f6d58c54SBarry Smith ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2392f6d58c54SBarry Smith } else if (A->hermitian) { 2393f6d58c54SBarry Smith ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr); 2394f6d58c54SBarry Smith } else if (A->structurally_symmetric) { 2395f6d58c54SBarry Smith ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 2396f6d58c54SBarry Smith } 2397f6d58c54SBarry Smith *newmat = B; 2398f6d58c54SBarry Smith PetscFunctionReturn(0); 2399f6d58c54SBarry Smith } 2400f6d58c54SBarry Smith 2401b1a666ecSBarry Smith #undef __FUNCT__ 2402b1a666ecSBarry Smith #define __FUNCT__ "MatSOR_MPIBAIJ" 2403b1a666ecSBarry Smith PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 2404b1a666ecSBarry Smith { 2405b1a666ecSBarry Smith Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data; 2406b1a666ecSBarry Smith PetscErrorCode ierr; 2407b1a666ecSBarry Smith Vec bb1 = 0; 2408b1a666ecSBarry Smith 2409b1a666ecSBarry Smith PetscFunctionBegin; 2410b1a666ecSBarry Smith if (flag == SOR_APPLY_UPPER) { 2411b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2412b1a666ecSBarry Smith PetscFunctionReturn(0); 2413b1a666ecSBarry Smith } 2414b1a666ecSBarry Smith 24154e980039SJed Brown if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) { 24164e980039SJed Brown ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 24174e980039SJed Brown } 24184e980039SJed Brown 2419b1a666ecSBarry Smith if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2420b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 2421b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2422b1a666ecSBarry Smith its--; 2423b1a666ecSBarry Smith } 2424b1a666ecSBarry Smith 2425b1a666ecSBarry Smith while (its--) { 2426b1a666ecSBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2427b1a666ecSBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2428b1a666ecSBarry Smith 2429b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 2430b1a666ecSBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2431b1a666ecSBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2432b1a666ecSBarry Smith 2433b1a666ecSBarry Smith /* local sweep */ 2434b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2435b1a666ecSBarry Smith } 2436b1a666ecSBarry Smith } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2437b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 2438b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2439b1a666ecSBarry Smith its--; 2440b1a666ecSBarry Smith } 2441b1a666ecSBarry Smith while (its--) { 2442b1a666ecSBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2443b1a666ecSBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2444b1a666ecSBarry Smith 2445b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 2446b1a666ecSBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2447b1a666ecSBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2448b1a666ecSBarry Smith 2449b1a666ecSBarry Smith /* local sweep */ 2450b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2451b1a666ecSBarry Smith } 2452b1a666ecSBarry Smith } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2453b1a666ecSBarry Smith if (flag & SOR_ZERO_INITIAL_GUESS) { 2454b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 2455b1a666ecSBarry Smith its--; 2456b1a666ecSBarry Smith } 2457b1a666ecSBarry Smith while (its--) { 2458b1a666ecSBarry Smith ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2459b1a666ecSBarry Smith ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2460b1a666ecSBarry Smith 2461b1a666ecSBarry Smith /* update rhs: bb1 = bb - B*x */ 2462b1a666ecSBarry Smith ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 2463b1a666ecSBarry Smith ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 2464b1a666ecSBarry Smith 2465b1a666ecSBarry Smith /* local sweep */ 2466b1a666ecSBarry Smith ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 2467b1a666ecSBarry Smith } 2468ce94432eSBarry Smith } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported"); 2469b1a666ecSBarry Smith 24706bf464f9SBarry Smith ierr = VecDestroy(&bb1);CHKERRQ(ierr); 2471b1a666ecSBarry Smith PetscFunctionReturn(0); 2472b1a666ecSBarry Smith } 2473b1a666ecSBarry Smith 2474bbead8a2SBarry Smith #undef __FUNCT__ 247547f7623dSRémi Lacroix #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ" 247647f7623dSRémi Lacroix PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms) 247747f7623dSRémi Lacroix { 247847f7623dSRémi Lacroix PetscErrorCode ierr; 247947f7623dSRémi Lacroix Mat_MPIBAIJ *aij = (Mat_MPIBAIJ*)A->data; 248047f7623dSRémi Lacroix PetscInt N,i,*garray = aij->garray; 248147f7623dSRémi Lacroix PetscInt ib,jb,bs = A->rmap->bs; 248247f7623dSRémi Lacroix Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ*) aij->A->data; 248347f7623dSRémi Lacroix MatScalar *a_val = a_aij->a; 248447f7623dSRémi Lacroix Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ*) aij->B->data; 248547f7623dSRémi Lacroix MatScalar *b_val = b_aij->a; 248647f7623dSRémi Lacroix PetscReal *work; 248747f7623dSRémi Lacroix 248847f7623dSRémi Lacroix PetscFunctionBegin; 248947f7623dSRémi Lacroix ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 24901795a4d1SJed Brown ierr = PetscCalloc1(N,&work);CHKERRQ(ierr); 249147f7623dSRémi Lacroix if (type == NORM_2) { 249247f7623dSRémi Lacroix for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 249347f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 249447f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 249547f7623dSRémi Lacroix work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 249647f7623dSRémi Lacroix a_val++; 249747f7623dSRémi Lacroix } 249847f7623dSRémi Lacroix } 249947f7623dSRémi Lacroix } 250047f7623dSRémi Lacroix for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 250147f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 250247f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 250347f7623dSRémi Lacroix work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 250447f7623dSRémi Lacroix b_val++; 250547f7623dSRémi Lacroix } 250647f7623dSRémi Lacroix } 250747f7623dSRémi Lacroix } 250847f7623dSRémi Lacroix } else if (type == NORM_1) { 250947f7623dSRémi Lacroix for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 251047f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 251147f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 251247f7623dSRémi Lacroix work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 251347f7623dSRémi Lacroix a_val++; 251447f7623dSRémi Lacroix } 251547f7623dSRémi Lacroix } 251647f7623dSRémi Lacroix } 251747f7623dSRémi Lacroix for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 251847f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 251947f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 252047f7623dSRémi Lacroix work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 252147f7623dSRémi Lacroix b_val++; 252247f7623dSRémi Lacroix } 252347f7623dSRémi Lacroix } 252447f7623dSRémi Lacroix } 252547f7623dSRémi Lacroix } else if (type == NORM_INFINITY) { 252647f7623dSRémi Lacroix for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) { 252747f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 252847f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 252947f7623dSRémi Lacroix int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 253047f7623dSRémi Lacroix work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 253147f7623dSRémi Lacroix a_val++; 253247f7623dSRémi Lacroix } 253347f7623dSRémi Lacroix } 253447f7623dSRémi Lacroix } 253547f7623dSRémi Lacroix for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) { 253647f7623dSRémi Lacroix for (jb=0; jb<bs; jb++) { 253747f7623dSRémi Lacroix for (ib=0; ib<bs; ib++) { 253847f7623dSRémi Lacroix int col = garray[b_aij->j[i]] * bs + jb; 253947f7623dSRémi Lacroix work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 254047f7623dSRémi Lacroix b_val++; 254147f7623dSRémi Lacroix } 254247f7623dSRémi Lacroix } 254347f7623dSRémi Lacroix } 254447f7623dSRémi Lacroix } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 254547f7623dSRémi Lacroix if (type == NORM_INFINITY) { 254647f7623dSRémi Lacroix ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 254747f7623dSRémi Lacroix } else { 254847f7623dSRémi Lacroix ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 254947f7623dSRémi Lacroix } 255047f7623dSRémi Lacroix ierr = PetscFree(work);CHKERRQ(ierr); 255147f7623dSRémi Lacroix if (type == NORM_2) { 255247f7623dSRémi Lacroix for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]); 255347f7623dSRémi Lacroix } 255447f7623dSRémi Lacroix PetscFunctionReturn(0); 255547f7623dSRémi Lacroix } 255647f7623dSRémi Lacroix 255747f7623dSRémi Lacroix #undef __FUNCT__ 2558bbead8a2SBarry Smith #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ" 2559713ccfa9SJed Brown PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values) 2560bbead8a2SBarry Smith { 2561bbead8a2SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; 2562bbead8a2SBarry Smith PetscErrorCode ierr; 2563bbead8a2SBarry Smith 2564bbead8a2SBarry Smith PetscFunctionBegin; 2565bbead8a2SBarry Smith ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2566bbead8a2SBarry Smith PetscFunctionReturn(0); 2567bbead8a2SBarry Smith } 2568bbead8a2SBarry Smith 25698c7482ecSBarry Smith 257079bdfe76SSatish Balay /* -------------------------------------------------------------------*/ 25713964eb88SJed Brown static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2572cc2dc46cSBarry Smith MatGetRow_MPIBAIJ, 2573cc2dc46cSBarry Smith MatRestoreRow_MPIBAIJ, 2574cc2dc46cSBarry Smith MatMult_MPIBAIJ, 257597304618SKris Buschelman /* 4*/ MatMultAdd_MPIBAIJ, 25767c922b88SBarry Smith MatMultTranspose_MPIBAIJ, 25777c922b88SBarry Smith MatMultTransposeAdd_MPIBAIJ, 2578cc2dc46cSBarry Smith 0, 2579cc2dc46cSBarry Smith 0, 2580cc2dc46cSBarry Smith 0, 258197304618SKris Buschelman /*10*/ 0, 2582cc2dc46cSBarry Smith 0, 2583cc2dc46cSBarry Smith 0, 2584b1a666ecSBarry Smith MatSOR_MPIBAIJ, 2585cc2dc46cSBarry Smith MatTranspose_MPIBAIJ, 258697304618SKris Buschelman /*15*/ MatGetInfo_MPIBAIJ, 25877fc3c18eSBarry Smith MatEqual_MPIBAIJ, 2588cc2dc46cSBarry Smith MatGetDiagonal_MPIBAIJ, 2589cc2dc46cSBarry Smith MatDiagonalScale_MPIBAIJ, 2590cc2dc46cSBarry Smith MatNorm_MPIBAIJ, 259197304618SKris Buschelman /*20*/ MatAssemblyBegin_MPIBAIJ, 2592cc2dc46cSBarry Smith MatAssemblyEnd_MPIBAIJ, 2593cc2dc46cSBarry Smith MatSetOption_MPIBAIJ, 2594cc2dc46cSBarry Smith MatZeroEntries_MPIBAIJ, 2595d519adbfSMatthew Knepley /*24*/ MatZeroRows_MPIBAIJ, 2596cc2dc46cSBarry Smith 0, 2597cc2dc46cSBarry Smith 0, 2598cc2dc46cSBarry Smith 0, 2599cc2dc46cSBarry Smith 0, 26004994cf47SJed Brown /*29*/ MatSetUp_MPIBAIJ, 2601273d9f13SBarry Smith 0, 2602cc2dc46cSBarry Smith 0, 2603cc2dc46cSBarry Smith 0, 2604cc2dc46cSBarry Smith 0, 2605d519adbfSMatthew Knepley /*34*/ MatDuplicate_MPIBAIJ, 2606cc2dc46cSBarry Smith 0, 2607cc2dc46cSBarry Smith 0, 2608cc2dc46cSBarry Smith 0, 2609cc2dc46cSBarry Smith 0, 2610d519adbfSMatthew Knepley /*39*/ MatAXPY_MPIBAIJ, 2611cc2dc46cSBarry Smith MatGetSubMatrices_MPIBAIJ, 2612cc2dc46cSBarry Smith MatIncreaseOverlap_MPIBAIJ, 2613cc2dc46cSBarry Smith MatGetValues_MPIBAIJ, 26143c896bc6SHong Zhang MatCopy_MPIBAIJ, 2615d519adbfSMatthew Knepley /*44*/ 0, 2616cc2dc46cSBarry Smith MatScale_MPIBAIJ, 2617cc2dc46cSBarry Smith 0, 2618cc2dc46cSBarry Smith 0, 26196f0a72daSMatthew G. Knepley MatZeroRowsColumns_MPIBAIJ, 2620f73d5cc4SBarry Smith /*49*/ 0, 2621cc2dc46cSBarry Smith 0, 2622cc2dc46cSBarry Smith 0, 2623cc2dc46cSBarry Smith 0, 2624cc2dc46cSBarry Smith 0, 262593dfae19SHong Zhang /*54*/ MatFDColoringCreate_MPIXAIJ, 2626cc2dc46cSBarry Smith 0, 2627cc2dc46cSBarry Smith MatSetUnfactored_MPIBAIJ, 262882094794SBarry Smith MatPermute_MPIBAIJ, 2629cc2dc46cSBarry Smith MatSetValuesBlocked_MPIBAIJ, 2630d519adbfSMatthew Knepley /*59*/ MatGetSubMatrix_MPIBAIJ, 2631f14a1c24SBarry Smith MatDestroy_MPIBAIJ, 2632f14a1c24SBarry Smith MatView_MPIBAIJ, 2633357abbc8SBarry Smith 0, 26347843d17aSBarry Smith 0, 2635d519adbfSMatthew Knepley /*64*/ 0, 26367843d17aSBarry Smith 0, 26377843d17aSBarry Smith 0, 26387843d17aSBarry Smith 0, 26397843d17aSBarry Smith 0, 2640d519adbfSMatthew Knepley /*69*/ MatGetRowMaxAbs_MPIBAIJ, 26417843d17aSBarry Smith 0, 264297304618SKris Buschelman 0, 264397304618SKris Buschelman 0, 264497304618SKris Buschelman 0, 2645d519adbfSMatthew Knepley /*74*/ 0, 2646f6d58c54SBarry Smith MatFDColoringApply_BAIJ, 264797304618SKris Buschelman 0, 264897304618SKris Buschelman 0, 264997304618SKris Buschelman 0, 2650d519adbfSMatthew Knepley /*79*/ 0, 265197304618SKris Buschelman 0, 265297304618SKris Buschelman 0, 265397304618SKris Buschelman 0, 26545bba2384SShri Abhyankar MatLoad_MPIBAIJ, 2655d519adbfSMatthew Knepley /*84*/ 0, 2656865e5f61SKris Buschelman 0, 2657865e5f61SKris Buschelman 0, 2658865e5f61SKris Buschelman 0, 2659865e5f61SKris Buschelman 0, 2660d519adbfSMatthew Knepley /*89*/ 0, 2661865e5f61SKris Buschelman 0, 2662865e5f61SKris Buschelman 0, 2663865e5f61SKris Buschelman 0, 2664865e5f61SKris Buschelman 0, 2665d519adbfSMatthew Knepley /*94*/ 0, 2666865e5f61SKris Buschelman 0, 2667865e5f61SKris Buschelman 0, 266899cafbc1SBarry Smith 0, 266999cafbc1SBarry Smith 0, 2670d519adbfSMatthew Knepley /*99*/ 0, 267199cafbc1SBarry Smith 0, 267299cafbc1SBarry Smith 0, 267399cafbc1SBarry Smith 0, 267499cafbc1SBarry Smith 0, 2675d519adbfSMatthew Knepley /*104*/0, 267699cafbc1SBarry Smith MatRealPart_MPIBAIJ, 26778c7482ecSBarry Smith MatImaginaryPart_MPIBAIJ, 26788c7482ecSBarry Smith 0, 26798c7482ecSBarry Smith 0, 2680d519adbfSMatthew Knepley /*109*/0, 26818c7482ecSBarry Smith 0, 26828c7482ecSBarry Smith 0, 26838c7482ecSBarry Smith 0, 26848c7482ecSBarry Smith 0, 2685d1adec66SJed Brown /*114*/MatGetSeqNonzeroStructure_MPIBAIJ, 26868c7482ecSBarry Smith 0, 26874683f7a4SShri Abhyankar MatGetGhosts_MPIBAIJ, 26884683f7a4SShri Abhyankar 0, 26894683f7a4SShri Abhyankar 0, 26904683f7a4SShri Abhyankar /*119*/0, 26914683f7a4SShri Abhyankar 0, 26924683f7a4SShri Abhyankar 0, 2693bbead8a2SBarry Smith 0, 2694e8271787SHong Zhang MatGetMultiProcBlock_MPIBAIJ, 2695bbead8a2SBarry Smith /*124*/0, 269647f7623dSRémi Lacroix MatGetColumnNorms_MPIBAIJ, 26973964eb88SJed Brown MatInvertBlockDiagonal_MPIBAIJ, 26983964eb88SJed Brown 0, 26993964eb88SJed Brown 0, 27003964eb88SJed Brown /*129*/ 0, 27013964eb88SJed Brown 0, 27023964eb88SJed Brown 0, 27033964eb88SJed Brown 0, 27043964eb88SJed Brown 0, 27053964eb88SJed Brown /*134*/ 0, 27063964eb88SJed Brown 0, 27073964eb88SJed Brown 0, 27083964eb88SJed Brown 0, 27093964eb88SJed Brown 0, 27103964eb88SJed Brown /*139*/ 0, 2711f9426fe0SMark Adams 0, 27121919a2e2SJed Brown 0, 2713bdf6f3fcSHong Zhang MatFDColoringSetUp_MPIXAIJ, 2714bdf6f3fcSHong Zhang 0, 2715bdf6f3fcSHong Zhang /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ 27168c7482ecSBarry Smith }; 271779bdfe76SSatish Balay 27184a2ae208SSatish Balay #undef __FUNCT__ 27194a2ae208SSatish Balay #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ" 272011bd1e4dSLisandro Dalcin PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a) 27215ef9f2a5SBarry Smith { 27225ef9f2a5SBarry Smith PetscFunctionBegin; 27235ef9f2a5SBarry Smith *a = ((Mat_MPIBAIJ*)A->data)->A; 27245ef9f2a5SBarry Smith PetscFunctionReturn(0); 27255ef9f2a5SBarry Smith } 272679bdfe76SSatish Balay 27278cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*); 2728d94109b8SHong Zhang 2729aac34f13SBarry Smith #undef __FUNCT__ 2730aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ" 2731cf12db73SBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2732aac34f13SBarry Smith { 2733b8d659d7SLisandro Dalcin PetscInt m,rstart,cstart,cend; 2734b8d659d7SLisandro Dalcin PetscInt i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0; 2735b8d659d7SLisandro Dalcin const PetscInt *JJ =0; 2736b8d659d7SLisandro Dalcin PetscScalar *values=0; 2737d47bf9aaSJed Brown PetscBool roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented; 2738aac34f13SBarry Smith PetscErrorCode ierr; 2739aac34f13SBarry Smith 2740aac34f13SBarry Smith PetscFunctionBegin; 274126283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 274226283091SBarry Smith ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 274326283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 274426283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2745e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2746d0f46423SBarry Smith m = B->rmap->n/bs; 2747d0f46423SBarry Smith rstart = B->rmap->rstart/bs; 2748d0f46423SBarry Smith cstart = B->cmap->rstart/bs; 2749d0f46423SBarry Smith cend = B->cmap->rend/bs; 2750b8d659d7SLisandro Dalcin 2751e32f2f54SBarry Smith if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]); 2752dcca6d9dSJed Brown ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 2753aac34f13SBarry Smith for (i=0; i<m; i++) { 2754cf12db73SBarry Smith nz = ii[i+1] - ii[i]; 2755e32f2f54SBarry Smith if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz); 2756b8d659d7SLisandro Dalcin nz_max = PetscMax(nz_max,nz); 2757cf12db73SBarry Smith JJ = jj + ii[i]; 2758b8d659d7SLisandro Dalcin for (j=0; j<nz; j++) { 2759aac34f13SBarry Smith if (*JJ >= cstart) break; 2760aac34f13SBarry Smith JJ++; 2761aac34f13SBarry Smith } 2762aac34f13SBarry Smith d = 0; 2763b8d659d7SLisandro Dalcin for (; j<nz; j++) { 2764aac34f13SBarry Smith if (*JJ++ >= cend) break; 2765aac34f13SBarry Smith d++; 2766aac34f13SBarry Smith } 2767aac34f13SBarry Smith d_nnz[i] = d; 2768b8d659d7SLisandro Dalcin o_nnz[i] = nz - d; 2769aac34f13SBarry Smith } 2770aac34f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2771fca92195SBarry Smith ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 2772aac34f13SBarry Smith 2773b8d659d7SLisandro Dalcin values = (PetscScalar*)V; 2774b8d659d7SLisandro Dalcin if (!values) { 2775785e854fSJed Brown ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr); 2776b8d659d7SLisandro Dalcin ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr); 2777b8d659d7SLisandro Dalcin } 2778b8d659d7SLisandro Dalcin for (i=0; i<m; i++) { 2779b8d659d7SLisandro Dalcin PetscInt row = i + rstart; 2780cf12db73SBarry Smith PetscInt ncols = ii[i+1] - ii[i]; 2781cf12db73SBarry Smith const PetscInt *icols = jj + ii[i]; 27823adadaf3SJed Brown if (!roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2783cf12db73SBarry Smith const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2784b8d659d7SLisandro Dalcin ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 27853adadaf3SJed Brown } else { /* block ordering does not match so we can only insert one block at a time. */ 27863adadaf3SJed Brown PetscInt j; 27873adadaf3SJed Brown for (j=0; j<ncols; j++) { 27883adadaf3SJed Brown const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 27893adadaf3SJed Brown ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 27903adadaf3SJed Brown } 27913adadaf3SJed Brown } 2792aac34f13SBarry Smith } 2793aac34f13SBarry Smith 2794b8d659d7SLisandro Dalcin if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 2795aac34f13SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2796aac34f13SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 27977827cd58SJed Brown ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2798aac34f13SBarry Smith PetscFunctionReturn(0); 2799aac34f13SBarry Smith } 2800aac34f13SBarry Smith 2801aac34f13SBarry Smith #undef __FUNCT__ 2802aac34f13SBarry Smith #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR" 2803aac34f13SBarry Smith /*@C 2804dfb205c3SBarry Smith MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format 2805aac34f13SBarry Smith (the default parallel PETSc format). 2806aac34f13SBarry Smith 2807aac34f13SBarry Smith Collective on MPI_Comm 2808aac34f13SBarry Smith 2809aac34f13SBarry Smith Input Parameters: 28101c4f3114SJed Brown + B - the matrix 2811dfb205c3SBarry Smith . bs - the block size 2812aac34f13SBarry Smith . i - the indices into j for the start of each local row (starts with zero) 2813aac34f13SBarry Smith . j - the column indices for each local row (starts with zero) these must be sorted for each row 2814aac34f13SBarry Smith - v - optional values in the matrix 2815aac34f13SBarry Smith 2816aac34f13SBarry Smith Level: developer 2817aac34f13SBarry Smith 28183adadaf3SJed Brown Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 28193adadaf3SJed Brown may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 28203adadaf3SJed Brown over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 28213adadaf3SJed Brown MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 28223adadaf3SJed Brown block column and the second index is over columns within a block. 28233adadaf3SJed Brown 2824aac34f13SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 2825aac34f13SBarry Smith 28263adadaf3SJed Brown .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ 2827aac34f13SBarry Smith @*/ 28287087cfbeSBarry Smith PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 2829aac34f13SBarry Smith { 28304ac538c5SBarry Smith PetscErrorCode ierr; 2831aac34f13SBarry Smith 2832aac34f13SBarry Smith PetscFunctionBegin; 28336ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 28346ba663aaSJed Brown PetscValidType(B,1); 28356ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 28364ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 2837aac34f13SBarry Smith PetscFunctionReturn(0); 2838aac34f13SBarry Smith } 2839aac34f13SBarry Smith 28404a2ae208SSatish Balay #undef __FUNCT__ 2841a23d5eceSKris Buschelman #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ" 2842b2573a8aSBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz) 2843a23d5eceSKris Buschelman { 2844a23d5eceSKris Buschelman Mat_MPIBAIJ *b; 2845dfbe8321SBarry Smith PetscErrorCode ierr; 2846535b19f3SBarry Smith PetscInt i; 2847a23d5eceSKris Buschelman 2848a23d5eceSKris Buschelman PetscFunctionBegin; 284933d57670SJed Brown ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 285026283091SBarry Smith ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 285126283091SBarry Smith ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2852e02043d6SBarry Smith ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2853899cda47SBarry Smith 2854a23d5eceSKris Buschelman if (d_nnz) { 2855d0f46423SBarry Smith for (i=0; i<B->rmap->n/bs; i++) { 2856e32f2f54SBarry Smith if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]); 2857a23d5eceSKris Buschelman } 2858a23d5eceSKris Buschelman } 2859a23d5eceSKris Buschelman if (o_nnz) { 2860d0f46423SBarry Smith for (i=0; i<B->rmap->n/bs; i++) { 2861e32f2f54SBarry Smith if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]); 2862a23d5eceSKris Buschelman } 2863a23d5eceSKris Buschelman } 2864a23d5eceSKris Buschelman 2865a23d5eceSKris Buschelman b = (Mat_MPIBAIJ*)B->data; 2866a23d5eceSKris Buschelman b->bs2 = bs*bs; 2867d0f46423SBarry Smith b->mbs = B->rmap->n/bs; 2868d0f46423SBarry Smith b->nbs = B->cmap->n/bs; 2869d0f46423SBarry Smith b->Mbs = B->rmap->N/bs; 2870d0f46423SBarry Smith b->Nbs = B->cmap->N/bs; 2871a23d5eceSKris Buschelman 2872a23d5eceSKris Buschelman for (i=0; i<=b->size; i++) { 2873d0f46423SBarry Smith b->rangebs[i] = B->rmap->range[i]/bs; 2874a23d5eceSKris Buschelman } 2875d0f46423SBarry Smith b->rstartbs = B->rmap->rstart/bs; 2876d0f46423SBarry Smith b->rendbs = B->rmap->rend/bs; 2877d0f46423SBarry Smith b->cstartbs = B->cmap->rstart/bs; 2878d0f46423SBarry Smith b->cendbs = B->cmap->rend/bs; 2879a23d5eceSKris Buschelman 2880526dfc15SBarry Smith if (!B->preallocated) { 2881f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2882d0f46423SBarry Smith ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 28839c097c71SKris Buschelman ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr); 28843bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2885f69a0ea3SMatthew Knepley ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2886d0f46423SBarry Smith ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 28879c097c71SKris Buschelman ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr); 28883bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2889ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr); 2890526dfc15SBarry Smith } 2891a23d5eceSKris Buschelman 2892526dfc15SBarry Smith ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr); 2893526dfc15SBarry Smith ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr); 2894526dfc15SBarry Smith B->preallocated = PETSC_TRUE; 2895a23d5eceSKris Buschelman PetscFunctionReturn(0); 2896a23d5eceSKris Buschelman } 2897a23d5eceSKris Buschelman 28987087cfbeSBarry Smith extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec); 28997087cfbeSBarry Smith extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal); 29005bf65638SKris Buschelman 290182094794SBarry Smith #undef __FUNCT__ 290282094794SBarry Smith #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj" 29038cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj) 290482094794SBarry Smith { 290582094794SBarry Smith Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)B->data; 290682094794SBarry Smith PetscErrorCode ierr; 290782094794SBarry Smith Mat_SeqBAIJ *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data; 290882094794SBarry Smith PetscInt M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs; 290982094794SBarry Smith const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 291082094794SBarry Smith 291182094794SBarry Smith PetscFunctionBegin; 2912854ce69bSBarry Smith ierr = PetscMalloc1(M+1,&ii);CHKERRQ(ierr); 291382094794SBarry Smith ii[0] = 0; 291482094794SBarry Smith for (i=0; i<M; i++) { 2915e32f2f54SBarry Smith if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]); 2916e32f2f54SBarry Smith if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]); 291782094794SBarry Smith ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i]; 29185ee9ba1cSJed Brown /* remove one from count of matrix has diagonal */ 29195ee9ba1cSJed Brown for (j=id[i]; j<id[i+1]; j++) { 29205ee9ba1cSJed Brown if (jd[j] == i) {ii[i+1]--;break;} 29215ee9ba1cSJed Brown } 292282094794SBarry Smith } 2923785e854fSJed Brown ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr); 292482094794SBarry Smith cnt = 0; 292582094794SBarry Smith for (i=0; i<M; i++) { 292682094794SBarry Smith for (j=io[i]; j<io[i+1]; j++) { 292782094794SBarry Smith if (garray[jo[j]] > rstart) break; 292882094794SBarry Smith jj[cnt++] = garray[jo[j]]; 292982094794SBarry Smith } 293082094794SBarry Smith for (k=id[i]; k<id[i+1]; k++) { 29315ee9ba1cSJed Brown if (jd[k] != i) { 293282094794SBarry Smith jj[cnt++] = rstart + jd[k]; 293382094794SBarry Smith } 29345ee9ba1cSJed Brown } 293582094794SBarry Smith for (; j<io[i+1]; j++) { 293682094794SBarry Smith jj[cnt++] = garray[jo[j]]; 293782094794SBarry Smith } 293882094794SBarry Smith } 2939ce94432eSBarry Smith ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr); 294082094794SBarry Smith PetscFunctionReturn(0); 294182094794SBarry Smith } 294282094794SBarry Smith 2943c6db04a5SJed Brown #include <../src/mat/impls/aij/mpi/mpiaij.h> 294462471d69SBarry Smith 29458cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*); 2946b2573a8aSBarry Smith 294762471d69SBarry Smith #undef __FUNCT__ 294862471d69SBarry Smith #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ" 29498cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 295062471d69SBarry Smith { 295162471d69SBarry Smith PetscErrorCode ierr; 295262471d69SBarry Smith Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 295362471d69SBarry Smith Mat B; 295485a69837SSatish Balay Mat_MPIAIJ *b; 295562471d69SBarry Smith 295662471d69SBarry Smith PetscFunctionBegin; 2957ce94432eSBarry Smith if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled"); 295862471d69SBarry Smith 2959ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 29606d0a4a0eSHong Zhang ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2961f090d951SRémi Lacroix ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2962f090d951SRémi Lacroix ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 29630298fd71SBarry Smith ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 29640298fd71SBarry Smith ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr); 296562471d69SBarry Smith b = (Mat_MPIAIJ*) B->data; 296662471d69SBarry Smith 29676bf464f9SBarry Smith ierr = MatDestroy(&b->A);CHKERRQ(ierr); 29686bf464f9SBarry Smith ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2969ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr); 297062471d69SBarry Smith ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr); 297162471d69SBarry Smith ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr); 297262471d69SBarry Smith ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 297362471d69SBarry Smith ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 29746a719282SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 29756a719282SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 297662471d69SBarry Smith if (reuse == MAT_REUSE_MATRIX) { 297762471d69SBarry Smith ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 297862471d69SBarry Smith } else { 297962471d69SBarry Smith *newmat = B; 298062471d69SBarry Smith } 298162471d69SBarry Smith PetscFunctionReturn(0); 298262471d69SBarry Smith } 298362471d69SBarry Smith 29840bad9183SKris Buschelman /*MC 2985fafad747SKris Buschelman MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 29860bad9183SKris Buschelman 29870bad9183SKris Buschelman Options Database Keys: 29888c07d4e3SBarry Smith + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions() 29898c07d4e3SBarry Smith . -mat_block_size <bs> - set the blocksize used to store the matrix 29908c07d4e3SBarry Smith - -mat_use_hash_table <fact> 29910bad9183SKris Buschelman 29920bad9183SKris Buschelman Level: beginner 29930bad9183SKris Buschelman 29940bad9183SKris Buschelman .seealso: MatCreateMPIBAIJ 29950bad9183SKris Buschelman M*/ 29960bad9183SKris Buschelman 29978cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*); 2998c0cdd4a1SDahai Guo 2999a23d5eceSKris Buschelman #undef __FUNCT__ 30004a2ae208SSatish Balay #define __FUNCT__ "MatCreate_MPIBAIJ" 30018cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 3002273d9f13SBarry Smith { 3003273d9f13SBarry Smith Mat_MPIBAIJ *b; 3004dfbe8321SBarry Smith PetscErrorCode ierr; 300594ae4db5SBarry Smith PetscBool flg = PETSC_FALSE; 3006273d9f13SBarry Smith 3007273d9f13SBarry Smith PetscFunctionBegin; 3008b00a9115SJed Brown ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 300982502324SSatish Balay B->data = (void*)b; 301082502324SSatish Balay 3011273d9f13SBarry Smith ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3012273d9f13SBarry Smith B->assembled = PETSC_FALSE; 3013273d9f13SBarry Smith 3014273d9f13SBarry Smith B->insertmode = NOT_SET_VALUES; 3015ce94432eSBarry Smith ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 3016ce94432eSBarry Smith ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr); 3017273d9f13SBarry Smith 3018273d9f13SBarry Smith /* build local table of row and column ownerships */ 3019854ce69bSBarry Smith ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr); 3020273d9f13SBarry Smith 3021273d9f13SBarry Smith /* build cache for off array entries formed */ 3022ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 302326fbe8dcSKarl Rupp 3024273d9f13SBarry Smith b->donotstash = PETSC_FALSE; 30250298fd71SBarry Smith b->colmap = NULL; 30260298fd71SBarry Smith b->garray = NULL; 3027273d9f13SBarry Smith b->roworiented = PETSC_TRUE; 3028273d9f13SBarry Smith 3029273d9f13SBarry Smith /* stuff used in block assembly */ 3030273d9f13SBarry Smith b->barray = 0; 3031273d9f13SBarry Smith 3032273d9f13SBarry Smith /* stuff used for matrix vector multiply */ 3033273d9f13SBarry Smith b->lvec = 0; 3034273d9f13SBarry Smith b->Mvctx = 0; 3035273d9f13SBarry Smith 3036273d9f13SBarry Smith /* stuff for MatGetRow() */ 3037273d9f13SBarry Smith b->rowindices = 0; 3038273d9f13SBarry Smith b->rowvalues = 0; 3039273d9f13SBarry Smith b->getrowactive = PETSC_FALSE; 3040273d9f13SBarry Smith 3041273d9f13SBarry Smith /* hash table stuff */ 3042273d9f13SBarry Smith b->ht = 0; 3043273d9f13SBarry Smith b->hd = 0; 3044273d9f13SBarry Smith b->ht_size = 0; 3045273d9f13SBarry Smith b->ht_flag = PETSC_FALSE; 3046273d9f13SBarry Smith b->ht_fact = 0; 3047273d9f13SBarry Smith b->ht_total_ct = 0; 3048273d9f13SBarry Smith b->ht_insert_ct = 0; 3049273d9f13SBarry Smith 30507a868f3eSHong Zhang /* stuff for MatGetSubMatrices_MPIBAIJ_local() */ 30517a868f3eSHong Zhang b->ijonly = PETSC_FALSE; 30527a868f3eSHong Zhang 30538c07d4e3SBarry Smith 3054bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr); 3055bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr); 3056bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr); 3057bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr); 3058bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr); 3059bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr); 3060bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr); 3061bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr); 3062bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr); 3063bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr); 3064bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr); 306517667f90SBarry Smith ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr); 306694ae4db5SBarry Smith 306794ae4db5SBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr); 306894ae4db5SBarry Smith ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr); 306994ae4db5SBarry Smith if (flg) { 307094ae4db5SBarry Smith PetscReal fact = 1.39; 307194ae4db5SBarry Smith ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr); 307294ae4db5SBarry Smith ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr); 307394ae4db5SBarry Smith if (fact <= 1.0) fact = 1.39; 307494ae4db5SBarry Smith ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr); 307594ae4db5SBarry Smith ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr); 307694ae4db5SBarry Smith } 307794ae4db5SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 3078273d9f13SBarry Smith PetscFunctionReturn(0); 3079273d9f13SBarry Smith } 3080273d9f13SBarry Smith 3081209238afSKris Buschelman /*MC 3082002d173eSKris Buschelman MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 3083209238afSKris Buschelman 3084209238afSKris Buschelman This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator, 3085209238afSKris Buschelman and MATMPIBAIJ otherwise. 3086209238afSKris Buschelman 3087209238afSKris Buschelman Options Database Keys: 3088209238afSKris Buschelman . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions() 3089209238afSKris Buschelman 3090209238afSKris Buschelman Level: beginner 3091209238afSKris Buschelman 309269b1f4b7SBarry Smith .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 3093209238afSKris Buschelman M*/ 3094209238afSKris Buschelman 30954a2ae208SSatish Balay #undef __FUNCT__ 30964a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetPreallocation" 3097273d9f13SBarry Smith /*@C 3098aac34f13SBarry Smith MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format 3099273d9f13SBarry Smith (block compressed row). For good matrix assembly performance 3100273d9f13SBarry Smith the user should preallocate the matrix storage by setting the parameters 3101273d9f13SBarry Smith d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3102273d9f13SBarry Smith performance can be increased by more than a factor of 50. 3103273d9f13SBarry Smith 3104273d9f13SBarry Smith Collective on Mat 3105273d9f13SBarry Smith 3106273d9f13SBarry Smith Input Parameters: 31071c4f3114SJed Brown + B - the matrix 3108bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3109bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3110273d9f13SBarry Smith . d_nz - number of block nonzeros per block row in diagonal portion of local 3111273d9f13SBarry Smith submatrix (same for all local rows) 3112273d9f13SBarry Smith . d_nnz - array containing the number of block nonzeros in the various block rows 3113273d9f13SBarry Smith of the in diagonal portion of the local (possibly different for each block 31140298fd71SBarry Smith row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 311595742e49SBarry Smith set it even if it is zero. 3116273d9f13SBarry Smith . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 3117273d9f13SBarry Smith submatrix (same for all local rows). 3118273d9f13SBarry Smith - o_nnz - array containing the number of nonzeros in the various block rows of the 3119273d9f13SBarry Smith off-diagonal portion of the local submatrix (possibly different for 31200298fd71SBarry Smith each block row) or NULL. 3121273d9f13SBarry Smith 312249a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 3123273d9f13SBarry Smith 3124273d9f13SBarry Smith Options Database Keys: 31258c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 31268c07d4e3SBarry Smith - -mat_use_hash_table <fact> 3127273d9f13SBarry Smith 3128273d9f13SBarry Smith Notes: 3129273d9f13SBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3130273d9f13SBarry Smith than it must be used on all processors that share the object for that argument. 3131273d9f13SBarry Smith 3132273d9f13SBarry Smith Storage Information: 3133273d9f13SBarry Smith For a square global matrix we define each processor's diagonal portion 3134273d9f13SBarry Smith to be its local rows and the corresponding columns (a square submatrix); 3135273d9f13SBarry Smith each processor's off-diagonal portion encompasses the remainder of the 3136273d9f13SBarry Smith local matrix (a rectangular submatrix). 3137273d9f13SBarry Smith 3138273d9f13SBarry Smith The user can specify preallocated storage for the diagonal part of 3139273d9f13SBarry Smith the local submatrix with either d_nz or d_nnz (not both). Set 31400298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3141273d9f13SBarry Smith memory allocation. Likewise, specify preallocated storage for the 3142273d9f13SBarry Smith off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3143273d9f13SBarry Smith 3144273d9f13SBarry Smith Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3145273d9f13SBarry Smith the figure below we depict these three local rows and all columns (0-11). 3146273d9f13SBarry Smith 3147273d9f13SBarry Smith .vb 3148273d9f13SBarry Smith 0 1 2 3 4 5 6 7 8 9 10 11 3149a4b1a0f6SJed Brown -------------------------- 3150273d9f13SBarry Smith row 3 |o o o d d d o o o o o o 3151273d9f13SBarry Smith row 4 |o o o d d d o o o o o o 3152273d9f13SBarry Smith row 5 |o o o d d d o o o o o o 3153a4b1a0f6SJed Brown -------------------------- 3154273d9f13SBarry Smith .ve 3155273d9f13SBarry Smith 3156273d9f13SBarry Smith Thus, any entries in the d locations are stored in the d (diagonal) 3157273d9f13SBarry Smith submatrix, and any entries in the o locations are stored in the 3158273d9f13SBarry Smith o (off-diagonal) submatrix. Note that the d and the o submatrices are 3159273d9f13SBarry Smith stored simply in the MATSEQBAIJ format for compressed row storage. 3160273d9f13SBarry Smith 3161273d9f13SBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3162273d9f13SBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 3163273d9f13SBarry Smith In general, for PDE problems in which most nonzeros are near the diagonal, 3164273d9f13SBarry Smith one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3165273d9f13SBarry Smith or you will get TERRIBLE performance; see the users' manual chapter on 3166273d9f13SBarry Smith matrices. 3167273d9f13SBarry Smith 3168aa95bbe8SBarry Smith You can call MatGetInfo() to get information on how effective the preallocation was; 3169aa95bbe8SBarry Smith for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3170aa95bbe8SBarry Smith You can also run with the option -info and look for messages with the string 3171aa95bbe8SBarry Smith malloc in them to see if additional memory allocation was needed. 3172aa95bbe8SBarry Smith 3173273d9f13SBarry Smith Level: intermediate 3174273d9f13SBarry Smith 3175273d9f13SBarry Smith .keywords: matrix, block, aij, compressed row, sparse, parallel 3176273d9f13SBarry Smith 3177ab978733SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership() 3178273d9f13SBarry Smith @*/ 31797087cfbeSBarry Smith PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3180273d9f13SBarry Smith { 31814ac538c5SBarry Smith PetscErrorCode ierr; 3182273d9f13SBarry Smith 3183273d9f13SBarry Smith PetscFunctionBegin; 31846ba663aaSJed Brown PetscValidHeaderSpecific(B,MAT_CLASSID,1); 31856ba663aaSJed Brown PetscValidType(B,1); 31866ba663aaSJed Brown PetscValidLogicalCollectiveInt(B,bs,2); 31874ac538c5SBarry Smith ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3188273d9f13SBarry Smith PetscFunctionReturn(0); 3189273d9f13SBarry Smith } 3190273d9f13SBarry Smith 31914a2ae208SSatish Balay #undef __FUNCT__ 319269b1f4b7SBarry Smith #define __FUNCT__ "MatCreateBAIJ" 319379bdfe76SSatish Balay /*@C 319469b1f4b7SBarry Smith MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format 319579bdfe76SSatish Balay (block compressed row). For good matrix assembly performance 319679bdfe76SSatish Balay the user should preallocate the matrix storage by setting the parameters 319779bdfe76SSatish Balay d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 319879bdfe76SSatish Balay performance can be increased by more than a factor of 50. 319979bdfe76SSatish Balay 3200db81eaa0SLois Curfman McInnes Collective on MPI_Comm 3201db81eaa0SLois Curfman McInnes 320279bdfe76SSatish Balay Input Parameters: 3203db81eaa0SLois Curfman McInnes + comm - MPI communicator 3204bb7ae925SBarry Smith . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3205bb7ae925SBarry Smith blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 320679bdfe76SSatish Balay . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 320792e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 320892e8d321SLois Curfman McInnes y vector for the matrix-vector product y = Ax. 320992e8d321SLois Curfman McInnes . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 321092e8d321SLois Curfman McInnes This value should be the same as the local size used in creating the 321192e8d321SLois Curfman McInnes x vector for the matrix-vector product y = Ax. 3212be79a94dSBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3213be79a94dSBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 321447a75d0bSBarry Smith . d_nz - number of nonzero blocks per block row in diagonal portion of local 321579bdfe76SSatish Balay submatrix (same for all local rows) 321647a75d0bSBarry Smith . d_nnz - array containing the number of nonzero blocks in the various block rows 321792e8d321SLois Curfman McInnes of the in diagonal portion of the local (possibly different for each block 32180298fd71SBarry Smith row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 321995742e49SBarry Smith and set it even if it is zero. 322047a75d0bSBarry Smith . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 322179bdfe76SSatish Balay submatrix (same for all local rows). 322247a75d0bSBarry Smith - o_nnz - array containing the number of nonzero blocks in the various block rows of the 322392e8d321SLois Curfman McInnes off-diagonal portion of the local submatrix (possibly different for 32240298fd71SBarry Smith each block row) or NULL. 322579bdfe76SSatish Balay 322679bdfe76SSatish Balay Output Parameter: 322779bdfe76SSatish Balay . A - the matrix 322879bdfe76SSatish Balay 3229db81eaa0SLois Curfman McInnes Options Database Keys: 32308c07d4e3SBarry Smith + -mat_block_size - size of the blocks to use 32318c07d4e3SBarry Smith - -mat_use_hash_table <fact> 32323ffaccefSLois Curfman McInnes 3233175b88e8SBarry Smith It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3234ae1d86c5SBarry Smith MatXXXXSetPreallocation() paradgm instead of this routine directly. 3235175b88e8SBarry Smith [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3236175b88e8SBarry Smith 3237b259b22eSLois Curfman McInnes Notes: 323849a6f317SBarry Smith If the *_nnz parameter is given then the *_nz parameter is ignored 323949a6f317SBarry Smith 324047a75d0bSBarry Smith A nonzero block is any block that as 1 or more nonzeros in it 324147a75d0bSBarry Smith 324279bdfe76SSatish Balay The user MUST specify either the local or global matrix dimensions 324379bdfe76SSatish Balay (possibly both). 324479bdfe76SSatish Balay 3245be79a94dSBarry Smith If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one processor 3246be79a94dSBarry Smith than it must be used on all processors that share the object for that argument. 3247be79a94dSBarry Smith 324879bdfe76SSatish Balay Storage Information: 324979bdfe76SSatish Balay For a square global matrix we define each processor's diagonal portion 325079bdfe76SSatish Balay to be its local rows and the corresponding columns (a square submatrix); 325179bdfe76SSatish Balay each processor's off-diagonal portion encompasses the remainder of the 325279bdfe76SSatish Balay local matrix (a rectangular submatrix). 325379bdfe76SSatish Balay 325479bdfe76SSatish Balay The user can specify preallocated storage for the diagonal part of 325579bdfe76SSatish Balay the local submatrix with either d_nz or d_nnz (not both). Set 32560298fd71SBarry Smith d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 325779bdfe76SSatish Balay memory allocation. Likewise, specify preallocated storage for the 325879bdfe76SSatish Balay off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 325979bdfe76SSatish Balay 326079bdfe76SSatish Balay Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 326179bdfe76SSatish Balay the figure below we depict these three local rows and all columns (0-11). 326279bdfe76SSatish Balay 3263db81eaa0SLois Curfman McInnes .vb 3264db81eaa0SLois Curfman McInnes 0 1 2 3 4 5 6 7 8 9 10 11 3265a4b1a0f6SJed Brown -------------------------- 3266db81eaa0SLois Curfman McInnes row 3 |o o o d d d o o o o o o 3267db81eaa0SLois Curfman McInnes row 4 |o o o d d d o o o o o o 3268db81eaa0SLois Curfman McInnes row 5 |o o o d d d o o o o o o 3269a4b1a0f6SJed Brown -------------------------- 3270db81eaa0SLois Curfman McInnes .ve 327179bdfe76SSatish Balay 327279bdfe76SSatish Balay Thus, any entries in the d locations are stored in the d (diagonal) 327379bdfe76SSatish Balay submatrix, and any entries in the o locations are stored in the 327479bdfe76SSatish Balay o (off-diagonal) submatrix. Note that the d and the o submatrices are 327557b952d6SSatish Balay stored simply in the MATSEQBAIJ format for compressed row storage. 327679bdfe76SSatish Balay 3277d64ed03dSBarry Smith Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3278d64ed03dSBarry Smith and o_nz should indicate the number of block nonzeros per row in the o matrix. 327979bdfe76SSatish Balay In general, for PDE problems in which most nonzeros are near the diagonal, 328092e8d321SLois Curfman McInnes one expects d_nz >> o_nz. For large problems you MUST preallocate memory 328192e8d321SLois Curfman McInnes or you will get TERRIBLE performance; see the users' manual chapter on 32826da5968aSLois Curfman McInnes matrices. 328379bdfe76SSatish Balay 3284027ccd11SLois Curfman McInnes Level: intermediate 3285027ccd11SLois Curfman McInnes 328692e8d321SLois Curfman McInnes .keywords: matrix, block, aij, compressed row, sparse, parallel 328779bdfe76SSatish Balay 328869b1f4b7SBarry Smith .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR() 328979bdfe76SSatish Balay @*/ 329069b1f4b7SBarry Smith PetscErrorCode MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 329179bdfe76SSatish Balay { 32926849ba73SBarry Smith PetscErrorCode ierr; 3293b24ad042SBarry Smith PetscMPIInt size; 329479bdfe76SSatish Balay 3295d64ed03dSBarry Smith PetscFunctionBegin; 3296f69a0ea3SMatthew Knepley ierr = MatCreate(comm,A);CHKERRQ(ierr); 3297f69a0ea3SMatthew Knepley ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3298d132466eSBarry Smith ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3299273d9f13SBarry Smith if (size > 1) { 3300273d9f13SBarry Smith ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr); 3301273d9f13SBarry Smith ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3302273d9f13SBarry Smith } else { 3303273d9f13SBarry Smith ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3304273d9f13SBarry Smith ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr); 33053914022bSBarry Smith } 33063a40ed3dSBarry Smith PetscFunctionReturn(0); 330779bdfe76SSatish Balay } 3308026e39d0SSatish Balay 33094a2ae208SSatish Balay #undef __FUNCT__ 33104a2ae208SSatish Balay #define __FUNCT__ "MatDuplicate_MPIBAIJ" 33116849ba73SBarry Smith static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 33120ac07820SSatish Balay { 33130ac07820SSatish Balay Mat mat; 33140ac07820SSatish Balay Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data; 3315dfbe8321SBarry Smith PetscErrorCode ierr; 3316b24ad042SBarry Smith PetscInt len=0; 33170ac07820SSatish Balay 3318d64ed03dSBarry Smith PetscFunctionBegin; 33190ac07820SSatish Balay *newmat = 0; 3320ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 3321d0f46423SBarry Smith ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 33227adad957SLisandro Dalcin ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 33231d5dac46SHong Zhang ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 33247fff6886SHong Zhang 3325d5f3da31SBarry Smith mat->factortype = matin->factortype; 3326273d9f13SBarry Smith mat->preallocated = PETSC_TRUE; 33270ac07820SSatish Balay mat->assembled = PETSC_TRUE; 33287fff6886SHong Zhang mat->insertmode = NOT_SET_VALUES; 33297fff6886SHong Zhang 3330273d9f13SBarry Smith a = (Mat_MPIBAIJ*)mat->data; 3331d0f46423SBarry Smith mat->rmap->bs = matin->rmap->bs; 33320ac07820SSatish Balay a->bs2 = oldmat->bs2; 33330ac07820SSatish Balay a->mbs = oldmat->mbs; 33340ac07820SSatish Balay a->nbs = oldmat->nbs; 33350ac07820SSatish Balay a->Mbs = oldmat->Mbs; 33360ac07820SSatish Balay a->Nbs = oldmat->Nbs; 33370ac07820SSatish Balay 33381e1e43feSBarry Smith ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 33391e1e43feSBarry Smith ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 3340899cda47SBarry Smith 33410ac07820SSatish Balay a->size = oldmat->size; 33420ac07820SSatish Balay a->rank = oldmat->rank; 3343aef5e8e0SSatish Balay a->donotstash = oldmat->donotstash; 3344aef5e8e0SSatish Balay a->roworiented = oldmat->roworiented; 3345aef5e8e0SSatish Balay a->rowindices = 0; 33460ac07820SSatish Balay a->rowvalues = 0; 33470ac07820SSatish Balay a->getrowactive = PETSC_FALSE; 334830793edcSSatish Balay a->barray = 0; 3349899cda47SBarry Smith a->rstartbs = oldmat->rstartbs; 3350899cda47SBarry Smith a->rendbs = oldmat->rendbs; 3351899cda47SBarry Smith a->cstartbs = oldmat->cstartbs; 3352899cda47SBarry Smith a->cendbs = oldmat->cendbs; 33530ac07820SSatish Balay 3354133cdb44SSatish Balay /* hash table stuff */ 3355133cdb44SSatish Balay a->ht = 0; 3356133cdb44SSatish Balay a->hd = 0; 3357133cdb44SSatish Balay a->ht_size = 0; 3358133cdb44SSatish Balay a->ht_flag = oldmat->ht_flag; 335925fdafccSSatish Balay a->ht_fact = oldmat->ht_fact; 3360133cdb44SSatish Balay a->ht_total_ct = 0; 3361133cdb44SSatish Balay a->ht_insert_ct = 0; 3362133cdb44SSatish Balay 3363899cda47SBarry Smith ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr); 33640ac07820SSatish Balay if (oldmat->colmap) { 3365aa482453SBarry Smith #if defined(PETSC_USE_CTABLE) 33660f5bd95cSBarry Smith ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 336748e59246SSatish Balay #else 3368854ce69bSBarry Smith ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr); 33693bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 3370b24ad042SBarry Smith ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr); 337148e59246SSatish Balay #endif 33720ac07820SSatish Balay } else a->colmap = 0; 33734beb1cfeSHong Zhang 33740ac07820SSatish Balay if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) { 3375785e854fSJed Brown ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr); 33763bb1ff40SBarry Smith ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 3377b24ad042SBarry Smith ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); 33780ac07820SSatish Balay } else a->garray = 0; 33790ac07820SSatish Balay 3380ce94432eSBarry Smith ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr); 33810ac07820SSatish Balay ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 33823bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 33830ac07820SSatish Balay ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 33843bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 33857fff6886SHong Zhang 33862e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 33873bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 33882e8a6d31SBarry Smith ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 33893bb1ff40SBarry Smith ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 3390140e18c1SBarry Smith ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 33910ac07820SSatish Balay *newmat = mat; 33923a40ed3dSBarry Smith PetscFunctionReturn(0); 33930ac07820SSatish Balay } 339457b952d6SSatish Balay 33954a2ae208SSatish Balay #undef __FUNCT__ 33965bba2384SShri Abhyankar #define __FUNCT__ "MatLoad_MPIBAIJ" 3397112444f4SShri Abhyankar PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer) 33984683f7a4SShri Abhyankar { 33994683f7a4SShri Abhyankar PetscErrorCode ierr; 34004683f7a4SShri Abhyankar int fd; 34014683f7a4SShri Abhyankar PetscInt i,nz,j,rstart,rend; 34024683f7a4SShri Abhyankar PetscScalar *vals,*buf; 3403ce94432eSBarry Smith MPI_Comm comm; 34044683f7a4SShri Abhyankar MPI_Status status; 34054683f7a4SShri Abhyankar PetscMPIInt rank,size,maxnz; 34064683f7a4SShri Abhyankar PetscInt header[4],*rowlengths = 0,M,N,m,*rowners,*cols; 34070298fd71SBarry Smith PetscInt *locrowlens = NULL,*procsnz = NULL,*browners = NULL; 34083059b6faSBarry Smith PetscInt jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax; 34094683f7a4SShri Abhyankar PetscMPIInt tag = ((PetscObject)viewer)->tag; 34100298fd71SBarry Smith PetscInt *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount; 34114683f7a4SShri Abhyankar PetscInt dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols; 34124683f7a4SShri Abhyankar 34134683f7a4SShri Abhyankar PetscFunctionBegin; 3414*c98fd787SBarry Smith /* force binary viewer to load .info file if it has not yet done so */ 3415*c98fd787SBarry Smith ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3416ce94432eSBarry Smith ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 34170298fd71SBarry Smith ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr); 34180298fd71SBarry Smith ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 34194683f7a4SShri Abhyankar ierr = PetscOptionsEnd();CHKERRQ(ierr); 34203059b6faSBarry Smith if (bs < 0) bs = 1; 34214683f7a4SShri Abhyankar 34224683f7a4SShri Abhyankar ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 34234683f7a4SShri Abhyankar ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 34244683f7a4SShri Abhyankar ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 34255872f025SBarry Smith if (!rank) { 34264683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 34274683f7a4SShri Abhyankar if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 34284683f7a4SShri Abhyankar } 34294683f7a4SShri Abhyankar 34304683f7a4SShri Abhyankar if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0; 34314683f7a4SShri Abhyankar 34324683f7a4SShri Abhyankar ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 34334683f7a4SShri Abhyankar M = header[1]; N = header[2]; 34344683f7a4SShri Abhyankar 34354683f7a4SShri Abhyankar /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */ 34364683f7a4SShri Abhyankar if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M; 34374683f7a4SShri Abhyankar if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N; 34384683f7a4SShri Abhyankar 34394683f7a4SShri Abhyankar /* If global sizes are set, check if they are consistent with that given in the file */ 34404683f7a4SShri Abhyankar if (sizesset) { 34414683f7a4SShri Abhyankar ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr); 34424683f7a4SShri Abhyankar } 3443abd38a8fSBarry Smith if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows); 3444abd38a8fSBarry Smith if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols); 34454683f7a4SShri Abhyankar 3446ce94432eSBarry Smith if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices"); 34474683f7a4SShri Abhyankar 34484683f7a4SShri Abhyankar /* 34494683f7a4SShri Abhyankar This code adds extra rows to make sure the number of rows is 34504683f7a4SShri Abhyankar divisible by the blocksize 34514683f7a4SShri Abhyankar */ 34524683f7a4SShri Abhyankar Mbs = M/bs; 34534683f7a4SShri Abhyankar extra_rows = bs - M + bs*Mbs; 34544683f7a4SShri Abhyankar if (extra_rows == bs) extra_rows = 0; 34554683f7a4SShri Abhyankar else Mbs++; 34564683f7a4SShri Abhyankar if (extra_rows && !rank) { 34574683f7a4SShri Abhyankar ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 34584683f7a4SShri Abhyankar } 34594683f7a4SShri Abhyankar 34604683f7a4SShri Abhyankar /* determine ownership of all rows */ 34614683f7a4SShri Abhyankar if (newmat->rmap->n < 0) { /* PETSC_DECIDE */ 34624683f7a4SShri Abhyankar mbs = Mbs/size + ((Mbs % size) > rank); 34634683f7a4SShri Abhyankar m = mbs*bs; 34644683f7a4SShri Abhyankar } else { /* User set */ 34654683f7a4SShri Abhyankar m = newmat->rmap->n; 34664683f7a4SShri Abhyankar mbs = m/bs; 34674683f7a4SShri Abhyankar } 3468dcca6d9dSJed Brown ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr); 34694683f7a4SShri Abhyankar ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 34704683f7a4SShri Abhyankar 34714683f7a4SShri Abhyankar /* process 0 needs enough room for process with most rows */ 34724683f7a4SShri Abhyankar if (!rank) { 34734683f7a4SShri Abhyankar mmax = rowners[1]; 34741251c579SMatthew G Knepley for (i=2; i<=size; i++) { 34754683f7a4SShri Abhyankar mmax = PetscMax(mmax,rowners[i]); 34764683f7a4SShri Abhyankar } 34774683f7a4SShri Abhyankar mmax*=bs; 34783964eb88SJed Brown } else mmax = -1; /* unused, but compiler warns anyway */ 34794683f7a4SShri Abhyankar 34804683f7a4SShri Abhyankar rowners[0] = 0; 34814683f7a4SShri Abhyankar for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 34824683f7a4SShri Abhyankar for (i=0; i<=size; i++) browners[i] = rowners[i]*bs; 34834683f7a4SShri Abhyankar rstart = rowners[rank]; 34844683f7a4SShri Abhyankar rend = rowners[rank+1]; 34854683f7a4SShri Abhyankar 34864683f7a4SShri Abhyankar /* distribute row lengths to all processors */ 3487785e854fSJed Brown ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr); 34884683f7a4SShri Abhyankar if (!rank) { 34894683f7a4SShri Abhyankar mend = m; 34904683f7a4SShri Abhyankar if (size == 1) mend = mend - extra_rows; 34914683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr); 34924683f7a4SShri Abhyankar for (j=mend; j<m; j++) locrowlens[j] = 1; 3493785e854fSJed Brown ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 34941795a4d1SJed Brown ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 34954683f7a4SShri Abhyankar for (j=0; j<m; j++) { 34964683f7a4SShri Abhyankar procsnz[0] += locrowlens[j]; 34974683f7a4SShri Abhyankar } 34984683f7a4SShri Abhyankar for (i=1; i<size; i++) { 34994683f7a4SShri Abhyankar mend = browners[i+1] - browners[i]; 35004683f7a4SShri Abhyankar if (i == size-1) mend = mend - extra_rows; 35014683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr); 35024683f7a4SShri Abhyankar for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1; 35034683f7a4SShri Abhyankar /* calculate the number of nonzeros on each processor */ 35044683f7a4SShri Abhyankar for (j=0; j<browners[i+1]-browners[i]; j++) { 35054683f7a4SShri Abhyankar procsnz[i] += rowlengths[j]; 35064683f7a4SShri Abhyankar } 35074683f7a4SShri Abhyankar ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 35084683f7a4SShri Abhyankar } 35094683f7a4SShri Abhyankar ierr = PetscFree(rowlengths);CHKERRQ(ierr); 35104683f7a4SShri Abhyankar } else { 35114683f7a4SShri Abhyankar ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 35124683f7a4SShri Abhyankar } 35134683f7a4SShri Abhyankar 35144683f7a4SShri Abhyankar if (!rank) { 35154683f7a4SShri Abhyankar /* determine max buffer needed and allocate it */ 35164683f7a4SShri Abhyankar maxnz = procsnz[0]; 35174683f7a4SShri Abhyankar for (i=1; i<size; i++) { 35184683f7a4SShri Abhyankar maxnz = PetscMax(maxnz,procsnz[i]); 35194683f7a4SShri Abhyankar } 3520785e854fSJed Brown ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 35214683f7a4SShri Abhyankar 35224683f7a4SShri Abhyankar /* read in my part of the matrix column indices */ 35234683f7a4SShri Abhyankar nz = procsnz[0]; 3524854ce69bSBarry Smith ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 35254683f7a4SShri Abhyankar mycols = ibuf; 35264683f7a4SShri Abhyankar if (size == 1) nz -= extra_rows; 35274683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 352826fbe8dcSKarl Rupp if (size == 1) { 352926fbe8dcSKarl Rupp for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i; 353026fbe8dcSKarl Rupp } 35314683f7a4SShri Abhyankar 35324683f7a4SShri Abhyankar /* read in every ones (except the last) and ship off */ 35334683f7a4SShri Abhyankar for (i=1; i<size-1; i++) { 35344683f7a4SShri Abhyankar nz = procsnz[i]; 35354683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 35364683f7a4SShri Abhyankar ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 35374683f7a4SShri Abhyankar } 35384683f7a4SShri Abhyankar /* read in the stuff for the last proc */ 35394683f7a4SShri Abhyankar if (size != 1) { 35404683f7a4SShri Abhyankar nz = procsnz[size-1] - extra_rows; /* the extra rows are not on the disk */ 35414683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 35424683f7a4SShri Abhyankar for (i=0; i<extra_rows; i++) cols[nz+i] = M+i; 35434683f7a4SShri Abhyankar ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr); 35444683f7a4SShri Abhyankar } 35454683f7a4SShri Abhyankar ierr = PetscFree(cols);CHKERRQ(ierr); 35464683f7a4SShri Abhyankar } else { 35474683f7a4SShri Abhyankar /* determine buffer space needed for message */ 35484683f7a4SShri Abhyankar nz = 0; 35494683f7a4SShri Abhyankar for (i=0; i<m; i++) { 35504683f7a4SShri Abhyankar nz += locrowlens[i]; 35514683f7a4SShri Abhyankar } 3552854ce69bSBarry Smith ierr = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr); 35534683f7a4SShri Abhyankar mycols = ibuf; 35544683f7a4SShri Abhyankar /* receive message of column indices*/ 35554683f7a4SShri Abhyankar ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 35564683f7a4SShri Abhyankar ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr); 35574683f7a4SShri Abhyankar if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file"); 35584683f7a4SShri Abhyankar } 35594683f7a4SShri Abhyankar 35604683f7a4SShri Abhyankar /* loop over local rows, determining number of off diagonal entries */ 3561dcca6d9dSJed Brown ierr = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr); 35621795a4d1SJed Brown ierr = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr); 35634683f7a4SShri Abhyankar rowcount = 0; nzcount = 0; 35644683f7a4SShri Abhyankar for (i=0; i<mbs; i++) { 35654683f7a4SShri Abhyankar dcount = 0; 35664683f7a4SShri Abhyankar odcount = 0; 35674683f7a4SShri Abhyankar for (j=0; j<bs; j++) { 35684683f7a4SShri Abhyankar kmax = locrowlens[rowcount]; 35694683f7a4SShri Abhyankar for (k=0; k<kmax; k++) { 35704683f7a4SShri Abhyankar tmp = mycols[nzcount++]/bs; 35714683f7a4SShri Abhyankar if (!mask[tmp]) { 35724683f7a4SShri Abhyankar mask[tmp] = 1; 35734683f7a4SShri Abhyankar if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; 35744683f7a4SShri Abhyankar else masked1[dcount++] = tmp; 35754683f7a4SShri Abhyankar } 35764683f7a4SShri Abhyankar } 35774683f7a4SShri Abhyankar rowcount++; 35784683f7a4SShri Abhyankar } 35794683f7a4SShri Abhyankar 35804683f7a4SShri Abhyankar dlens[i] = dcount; 35814683f7a4SShri Abhyankar odlens[i] = odcount; 35824683f7a4SShri Abhyankar 35834683f7a4SShri Abhyankar /* zero out the mask elements we set */ 35844683f7a4SShri Abhyankar for (j=0; j<dcount; j++) mask[masked1[j]] = 0; 35854683f7a4SShri Abhyankar for (j=0; j<odcount; j++) mask[masked2[j]] = 0; 35864683f7a4SShri Abhyankar } 35874683f7a4SShri Abhyankar 35884683f7a4SShri Abhyankar 35894683f7a4SShri Abhyankar if (!sizesset) { 35904683f7a4SShri Abhyankar ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 35914683f7a4SShri Abhyankar } 35924683f7a4SShri Abhyankar ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr); 35934683f7a4SShri Abhyankar 35944683f7a4SShri Abhyankar if (!rank) { 3595854ce69bSBarry Smith ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr); 35964683f7a4SShri Abhyankar /* read in my part of the matrix numerical values */ 35974683f7a4SShri Abhyankar nz = procsnz[0]; 35984683f7a4SShri Abhyankar vals = buf; 35994683f7a4SShri Abhyankar mycols = ibuf; 36004683f7a4SShri Abhyankar if (size == 1) nz -= extra_rows; 36014683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 360226fbe8dcSKarl Rupp if (size == 1) { 360326fbe8dcSKarl Rupp for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0; 360426fbe8dcSKarl Rupp } 36054683f7a4SShri Abhyankar 36064683f7a4SShri Abhyankar /* insert into matrix */ 36074683f7a4SShri Abhyankar jj = rstart*bs; 36084683f7a4SShri Abhyankar for (i=0; i<m; i++) { 36094683f7a4SShri Abhyankar ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 36104683f7a4SShri Abhyankar mycols += locrowlens[i]; 36114683f7a4SShri Abhyankar vals += locrowlens[i]; 36124683f7a4SShri Abhyankar jj++; 36134683f7a4SShri Abhyankar } 36144683f7a4SShri Abhyankar /* read in other processors (except the last one) and ship out */ 36154683f7a4SShri Abhyankar for (i=1; i<size-1; i++) { 36164683f7a4SShri Abhyankar nz = procsnz[i]; 36174683f7a4SShri Abhyankar vals = buf; 36184683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3619479e424cSMichael Lange ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 36204683f7a4SShri Abhyankar } 36214683f7a4SShri Abhyankar /* the last proc */ 36224683f7a4SShri Abhyankar if (size != 1) { 36234683f7a4SShri Abhyankar nz = procsnz[i] - extra_rows; 36244683f7a4SShri Abhyankar vals = buf; 36254683f7a4SShri Abhyankar ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 36264683f7a4SShri Abhyankar for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0; 3627479e424cSMichael Lange ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 36284683f7a4SShri Abhyankar } 36294683f7a4SShri Abhyankar ierr = PetscFree(procsnz);CHKERRQ(ierr); 36304683f7a4SShri Abhyankar } else { 36314683f7a4SShri Abhyankar /* receive numeric values */ 3632854ce69bSBarry Smith ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr); 36334683f7a4SShri Abhyankar 36344683f7a4SShri Abhyankar /* receive message of values*/ 36354683f7a4SShri Abhyankar vals = buf; 36364683f7a4SShri Abhyankar mycols = ibuf; 3637479e424cSMichael Lange ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr); 36384683f7a4SShri Abhyankar 36394683f7a4SShri Abhyankar /* insert into matrix */ 36404683f7a4SShri Abhyankar jj = rstart*bs; 36414683f7a4SShri Abhyankar for (i=0; i<m; i++) { 36424683f7a4SShri Abhyankar ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr); 36434683f7a4SShri Abhyankar mycols += locrowlens[i]; 36444683f7a4SShri Abhyankar vals += locrowlens[i]; 36454683f7a4SShri Abhyankar jj++; 36464683f7a4SShri Abhyankar } 36474683f7a4SShri Abhyankar } 36484683f7a4SShri Abhyankar ierr = PetscFree(locrowlens);CHKERRQ(ierr); 36494683f7a4SShri Abhyankar ierr = PetscFree(buf);CHKERRQ(ierr); 36504683f7a4SShri Abhyankar ierr = PetscFree(ibuf);CHKERRQ(ierr); 36514683f7a4SShri Abhyankar ierr = PetscFree2(rowners,browners);CHKERRQ(ierr); 36524683f7a4SShri Abhyankar ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr); 36534683f7a4SShri Abhyankar ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr); 36544683f7a4SShri Abhyankar ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 36554683f7a4SShri Abhyankar ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 36564683f7a4SShri Abhyankar PetscFunctionReturn(0); 36574683f7a4SShri Abhyankar } 36584683f7a4SShri Abhyankar 36594683f7a4SShri Abhyankar #undef __FUNCT__ 36604a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJSetHashTableFactor" 3661133cdb44SSatish Balay /*@ 3662133cdb44SSatish Balay MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable. 3663133cdb44SSatish Balay 3664133cdb44SSatish Balay Input Parameters: 3665133cdb44SSatish Balay . mat - the matrix 3666133cdb44SSatish Balay . fact - factor 3667133cdb44SSatish Balay 3668c5eb9154SBarry Smith Not Collective, each process can use a different factor 3669fee21e36SBarry Smith 36708c890885SBarry Smith Level: advanced 36718c890885SBarry Smith 3672133cdb44SSatish Balay Notes: 36738c07d4e3SBarry Smith This can also be set by the command line option: -mat_use_hash_table <fact> 3674133cdb44SSatish Balay 3675133cdb44SSatish Balay .keywords: matrix, hashtable, factor, HT 3676133cdb44SSatish Balay 3677133cdb44SSatish Balay .seealso: MatSetOption() 3678133cdb44SSatish Balay @*/ 36797087cfbeSBarry Smith PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact) 3680133cdb44SSatish Balay { 36814ac538c5SBarry Smith PetscErrorCode ierr; 36825bf65638SKris Buschelman 36835bf65638SKris Buschelman PetscFunctionBegin; 36844ac538c5SBarry Smith ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr); 36855bf65638SKris Buschelman PetscFunctionReturn(0); 36865bf65638SKris Buschelman } 36875bf65638SKris Buschelman 36885bf65638SKris Buschelman #undef __FUNCT__ 36895bf65638SKris Buschelman #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ" 36907087cfbeSBarry Smith PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact) 36915bf65638SKris Buschelman { 369225fdafccSSatish Balay Mat_MPIBAIJ *baij; 3693133cdb44SSatish Balay 3694133cdb44SSatish Balay PetscFunctionBegin; 3695133cdb44SSatish Balay baij = (Mat_MPIBAIJ*)mat->data; 3696133cdb44SSatish Balay baij->ht_fact = fact; 3697133cdb44SSatish Balay PetscFunctionReturn(0); 3698133cdb44SSatish Balay } 3699f2a5309cSSatish Balay 37004a2ae208SSatish Balay #undef __FUNCT__ 37014a2ae208SSatish Balay #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ" 37029230625dSJed Brown PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3703f2a5309cSSatish Balay { 3704f2a5309cSSatish Balay Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data; 37055fd66863SKarl Rupp 3706f2a5309cSSatish Balay PetscFunctionBegin; 370721e72a00SBarry Smith if (Ad) *Ad = a->A; 370821e72a00SBarry Smith if (Ao) *Ao = a->B; 370921e72a00SBarry Smith if (colmap) *colmap = a->garray; 3710f2a5309cSSatish Balay PetscFunctionReturn(0); 3711f2a5309cSSatish Balay } 371285535b8eSBarry Smith 371385535b8eSBarry Smith /* 371485535b8eSBarry Smith Special version for direct calls from Fortran (to eliminate two function call overheads 371585535b8eSBarry Smith */ 371685535b8eSBarry Smith #if defined(PETSC_HAVE_FORTRAN_CAPS) 371785535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 371885535b8eSBarry Smith #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 371985535b8eSBarry Smith #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 372085535b8eSBarry Smith #endif 372185535b8eSBarry Smith 372285535b8eSBarry Smith #undef __FUNCT__ 372385535b8eSBarry Smith #define __FUNCT__ "matmpibiajsetvaluesblocked" 372485535b8eSBarry Smith /*@C 372585535b8eSBarry Smith MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked() 372685535b8eSBarry Smith 372785535b8eSBarry Smith Collective on Mat 372885535b8eSBarry Smith 372985535b8eSBarry Smith Input Parameters: 373085535b8eSBarry Smith + mat - the matrix 373185535b8eSBarry Smith . min - number of input rows 373285535b8eSBarry Smith . im - input rows 373385535b8eSBarry Smith . nin - number of input columns 373485535b8eSBarry Smith . in - input columns 373585535b8eSBarry Smith . v - numerical values input 373685535b8eSBarry Smith - addvin - INSERT_VALUES or ADD_VALUES 373785535b8eSBarry Smith 373885535b8eSBarry Smith Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse. 373985535b8eSBarry Smith 374085535b8eSBarry Smith Level: advanced 374185535b8eSBarry Smith 374285535b8eSBarry Smith .seealso: MatSetValuesBlocked() 374385535b8eSBarry Smith @*/ 374485535b8eSBarry Smith PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin) 374585535b8eSBarry Smith { 374685535b8eSBarry Smith /* convert input arguments to C version */ 374785535b8eSBarry Smith Mat mat = *matin; 374885535b8eSBarry Smith PetscInt m = *min, n = *nin; 374985535b8eSBarry Smith InsertMode addv = *addvin; 375085535b8eSBarry Smith 375185535b8eSBarry Smith Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 375285535b8eSBarry Smith const MatScalar *value; 375385535b8eSBarry Smith MatScalar *barray = baij->barray; 3754ace3abfcSBarry Smith PetscBool roworiented = baij->roworiented; 375585535b8eSBarry Smith PetscErrorCode ierr; 375685535b8eSBarry Smith PetscInt i,j,ii,jj,row,col,rstart=baij->rstartbs; 375785535b8eSBarry Smith PetscInt rend=baij->rendbs,cstart=baij->cstartbs,stepval; 3758d0f46423SBarry Smith PetscInt cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2; 375985535b8eSBarry Smith 376085535b8eSBarry Smith PetscFunctionBegin; 376185535b8eSBarry Smith /* tasks normally handled by MatSetValuesBlocked() */ 376226fbe8dcSKarl Rupp if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 376385535b8eSBarry Smith #if defined(PETSC_USE_DEBUG) 3764e7e72b3dSBarry Smith else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 3765e32f2f54SBarry Smith if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 376685535b8eSBarry Smith #endif 376785535b8eSBarry Smith if (mat->assembled) { 376885535b8eSBarry Smith mat->was_assembled = PETSC_TRUE; 376985535b8eSBarry Smith mat->assembled = PETSC_FALSE; 377085535b8eSBarry Smith } 377185535b8eSBarry Smith ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 377285535b8eSBarry Smith 377385535b8eSBarry Smith 377485535b8eSBarry Smith if (!barray) { 3775785e854fSJed Brown ierr = PetscMalloc1(bs2,&barray);CHKERRQ(ierr); 377685535b8eSBarry Smith baij->barray = barray; 377785535b8eSBarry Smith } 377885535b8eSBarry Smith 377926fbe8dcSKarl Rupp if (roworiented) stepval = (n-1)*bs; 378026fbe8dcSKarl Rupp else stepval = (m-1)*bs; 378126fbe8dcSKarl Rupp 378285535b8eSBarry Smith for (i=0; i<m; i++) { 378385535b8eSBarry Smith if (im[i] < 0) continue; 378485535b8eSBarry Smith #if defined(PETSC_USE_DEBUG) 3785e32f2f54SBarry Smith if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1); 378685535b8eSBarry Smith #endif 378785535b8eSBarry Smith if (im[i] >= rstart && im[i] < rend) { 378885535b8eSBarry Smith row = im[i] - rstart; 378985535b8eSBarry Smith for (j=0; j<n; j++) { 379085535b8eSBarry Smith /* If NumCol = 1 then a copy is not required */ 379185535b8eSBarry Smith if ((roworiented) && (n == 1)) { 379285535b8eSBarry Smith barray = (MatScalar*)v + i*bs2; 379385535b8eSBarry Smith } else if ((!roworiented) && (m == 1)) { 379485535b8eSBarry Smith barray = (MatScalar*)v + j*bs2; 379585535b8eSBarry Smith } else { /* Here a copy is required */ 379685535b8eSBarry Smith if (roworiented) { 379785535b8eSBarry Smith value = v + i*(stepval+bs)*bs + j*bs; 379885535b8eSBarry Smith } else { 379985535b8eSBarry Smith value = v + j*(stepval+bs)*bs + i*bs; 380085535b8eSBarry Smith } 380185535b8eSBarry Smith for (ii=0; ii<bs; ii++,value+=stepval) { 380285535b8eSBarry Smith for (jj=0; jj<bs; jj++) { 380385535b8eSBarry Smith *barray++ = *value++; 380485535b8eSBarry Smith } 380585535b8eSBarry Smith } 380685535b8eSBarry Smith barray -=bs2; 380785535b8eSBarry Smith } 380885535b8eSBarry Smith 380985535b8eSBarry Smith if (in[j] >= cstart && in[j] < cend) { 381085535b8eSBarry Smith col = in[j] - cstart; 381197e5c40aSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 381226fbe8dcSKarl Rupp } else if (in[j] < 0) continue; 381385535b8eSBarry Smith #if defined(PETSC_USE_DEBUG) 3814cb9801acSJed Brown else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1); 381585535b8eSBarry Smith #endif 381685535b8eSBarry Smith else { 381785535b8eSBarry Smith if (mat->was_assembled) { 381885535b8eSBarry Smith if (!baij->colmap) { 3819ab9863d7SBarry Smith ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr); 382085535b8eSBarry Smith } 382185535b8eSBarry Smith 382285535b8eSBarry Smith #if defined(PETSC_USE_DEBUG) 382385535b8eSBarry Smith #if defined(PETSC_USE_CTABLE) 382485535b8eSBarry Smith { PetscInt data; 382585535b8eSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr); 3826e32f2f54SBarry Smith if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 382785535b8eSBarry Smith } 382885535b8eSBarry Smith #else 3829e32f2f54SBarry Smith if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap"); 383085535b8eSBarry Smith #endif 383185535b8eSBarry Smith #endif 383285535b8eSBarry Smith #if defined(PETSC_USE_CTABLE) 383385535b8eSBarry Smith ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr); 383485535b8eSBarry Smith col = (col - 1)/bs; 383585535b8eSBarry Smith #else 383685535b8eSBarry Smith col = (baij->colmap[in[j]] - 1)/bs; 383785535b8eSBarry Smith #endif 383885535b8eSBarry Smith if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) { 3839ab9863d7SBarry Smith ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr); 384085535b8eSBarry Smith col = in[j]; 384185535b8eSBarry Smith } 384226fbe8dcSKarl Rupp } else col = in[j]; 384397e5c40aSBarry Smith ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr); 384485535b8eSBarry Smith } 384585535b8eSBarry Smith } 384685535b8eSBarry Smith } else { 384785535b8eSBarry Smith if (!baij->donotstash) { 384885535b8eSBarry Smith if (roworiented) { 384985535b8eSBarry Smith ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 385085535b8eSBarry Smith } else { 385185535b8eSBarry Smith ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr); 385285535b8eSBarry Smith } 385385535b8eSBarry Smith } 385485535b8eSBarry Smith } 385585535b8eSBarry Smith } 385685535b8eSBarry Smith 385785535b8eSBarry Smith /* task normally handled by MatSetValuesBlocked() */ 385885535b8eSBarry Smith ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr); 385985535b8eSBarry Smith PetscFunctionReturn(0); 386085535b8eSBarry Smith } 3861dfb205c3SBarry Smith 3862dfb205c3SBarry Smith #undef __FUNCT__ 3863dfb205c3SBarry Smith #define __FUNCT__ "MatCreateMPIBAIJWithArrays" 3864dfb205c3SBarry Smith /*@ 3865dfb205c3SBarry Smith MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard 3866dfb205c3SBarry Smith CSR format the local rows. 3867dfb205c3SBarry Smith 3868dfb205c3SBarry Smith Collective on MPI_Comm 3869dfb205c3SBarry Smith 3870dfb205c3SBarry Smith Input Parameters: 3871dfb205c3SBarry Smith + comm - MPI communicator 3872dfb205c3SBarry Smith . bs - the block size, only a block size of 1 is supported 3873dfb205c3SBarry Smith . m - number of local rows (Cannot be PETSC_DECIDE) 3874dfb205c3SBarry Smith . n - This value should be the same as the local size used in creating the 3875dfb205c3SBarry Smith x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3876dfb205c3SBarry Smith calculated if N is given) For square matrices n is almost always m. 3877dfb205c3SBarry Smith . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3878dfb205c3SBarry Smith . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3879dfb205c3SBarry Smith . i - row indices 3880dfb205c3SBarry Smith . j - column indices 3881dfb205c3SBarry Smith - a - matrix values 3882dfb205c3SBarry Smith 3883dfb205c3SBarry Smith Output Parameter: 3884dfb205c3SBarry Smith . mat - the matrix 3885dfb205c3SBarry Smith 3886dfb205c3SBarry Smith Level: intermediate 3887dfb205c3SBarry Smith 3888dfb205c3SBarry Smith Notes: 3889dfb205c3SBarry Smith The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3890dfb205c3SBarry Smith thus you CANNOT change the matrix entries by changing the values of a[] after you have 3891dfb205c3SBarry Smith called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3892dfb205c3SBarry Smith 38933adadaf3SJed Brown The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 38943adadaf3SJed Brown the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 38953adadaf3SJed Brown block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 38963adadaf3SJed Brown with column-major ordering within blocks. 38973adadaf3SJed Brown 3898dfb205c3SBarry Smith The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3899dfb205c3SBarry Smith 3900dfb205c3SBarry Smith .keywords: matrix, aij, compressed row, sparse, parallel 3901dfb205c3SBarry Smith 3902dfb205c3SBarry Smith .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 390369b1f4b7SBarry Smith MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3904dfb205c3SBarry Smith @*/ 39057087cfbeSBarry Smith PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3906dfb205c3SBarry Smith { 3907dfb205c3SBarry Smith PetscErrorCode ierr; 3908dfb205c3SBarry Smith 3909dfb205c3SBarry Smith PetscFunctionBegin; 3910f23aa3ddSBarry Smith if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3911dfb205c3SBarry Smith if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3912dfb205c3SBarry Smith ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3913dfb205c3SBarry Smith ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3914dfb205c3SBarry Smith ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr); 3915d47bf9aaSJed Brown ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 3916dfb205c3SBarry Smith ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr); 3917d47bf9aaSJed Brown ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 3918dfb205c3SBarry Smith PetscFunctionReturn(0); 3919dfb205c3SBarry Smith } 3920e561ad89SHong Zhang 3921e561ad89SHong Zhang #undef __FUNCT__ 3922bd153df0SHong Zhang #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ" 3923bd153df0SHong Zhang PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3924e561ad89SHong Zhang { 3925e561ad89SHong Zhang PetscErrorCode ierr; 3926bd153df0SHong Zhang PetscInt m,N,i,rstart,nnz,Ii,bs,cbs; 3927bd153df0SHong Zhang PetscInt *indx; 3928bd153df0SHong Zhang PetscScalar *values; 3929e561ad89SHong Zhang 3930e561ad89SHong Zhang PetscFunctionBegin; 3931e561ad89SHong Zhang ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3932bd153df0SHong Zhang if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3933bd153df0SHong Zhang Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inmat->data; 3934bd153df0SHong Zhang PetscInt *dnz,*onz,sum,mbs,Nbs; 3935bd153df0SHong Zhang PetscInt *bindx,rmax=a->rmax,j; 3936e561ad89SHong Zhang 3937bd153df0SHong Zhang ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3938bd153df0SHong Zhang mbs = m/bs; Nbs = N/cbs; 3939bd153df0SHong Zhang if (n == PETSC_DECIDE) { 3940bd153df0SHong Zhang ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr); 3941bd153df0SHong Zhang } 3942bd153df0SHong Zhang /* Check sum(n) = Nbs */ 3943bd153df0SHong Zhang ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3944bd153df0SHong Zhang if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs); 3945bd153df0SHong Zhang 3946bd153df0SHong Zhang ierr = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3947bd153df0SHong Zhang rstart -= mbs; 3948e561ad89SHong Zhang 3949647a6520SHong Zhang ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr); 3950bd153df0SHong Zhang ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr); 3951bd153df0SHong Zhang for (i=0; i<mbs; i++) { 3952647a6520SHong Zhang ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */ 3953647a6520SHong Zhang nnz = nnz/bs; 3954647a6520SHong Zhang for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs; 3955647a6520SHong Zhang ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr); 3956647a6520SHong Zhang ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); 3957e561ad89SHong Zhang } 3958647a6520SHong Zhang ierr = PetscFree(bindx);CHKERRQ(ierr); 3959e561ad89SHong Zhang 3960e561ad89SHong Zhang ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3961bd153df0SHong Zhang ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3962e561ad89SHong Zhang ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3963e561ad89SHong Zhang ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr); 3964e561ad89SHong Zhang ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr); 3965e561ad89SHong Zhang ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3966e561ad89SHong Zhang } 3967e561ad89SHong Zhang 3968bd153df0SHong Zhang /* numeric phase */ 3969647a6520SHong Zhang ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3970bd153df0SHong Zhang ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3971e561ad89SHong Zhang 3972e561ad89SHong Zhang for (i=0; i<m; i++) { 3973e561ad89SHong Zhang ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3974e561ad89SHong Zhang Ii = i + rstart; 3975bd153df0SHong Zhang ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3976e561ad89SHong Zhang ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3977e561ad89SHong Zhang } 3978bd153df0SHong Zhang ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3979bd153df0SHong Zhang ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3980e561ad89SHong Zhang PetscFunctionReturn(0); 3981e561ad89SHong Zhang } 3982