xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision f69a0ea3504314d029ee423e0de2950ece2dab86)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
2be1d678aSKris Buschelman 
38016bdd1SSatish Balay /*
4d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
58016bdd1SSatish Balay */
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
7bba1ac68SSatish Balay 
8b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
98016bdd1SSatish Balay 
104a2ae208SSatish Balay #undef __FUNCT__
114a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
12dfbe8321SBarry Smith PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
138016bdd1SSatish Balay {
14d9653453SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
15d9653453SSatish Balay   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
166849ba73SBarry Smith   PetscErrorCode ierr;
17b24ad042SBarry Smith   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
18521d7252SBarry Smith   PetscInt       bs = mat->bs,*stmp;
198016bdd1SSatish Balay   IS             from,to;
208016bdd1SSatish Balay   Vec            gvec;
21aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
220f5bd95cSBarry Smith   PetscTable     gid1_lid1;
230f5bd95cSBarry Smith   PetscTablePosition tpos;
24b24ad042SBarry Smith   PetscInt       gid,lid;
256f531f54SSatish Balay #else
26b24ad042SBarry Smith   PetscInt       Nbs = baij->Nbs,*indices;
2773a2e727SSatish Balay #endif
288016bdd1SSatish Balay 
293a40ed3dSBarry Smith   PetscFunctionBegin;
3073a2e727SSatish Balay 
31aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
3273a2e727SSatish Balay   /* use a table - Mark Adams */
33273d9f13SBarry Smith   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
3473a2e727SSatish Balay   for (i=0; i<B->mbs; i++) {
3573a2e727SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
36b24ad042SBarry Smith       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
370f5bd95cSBarry Smith       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
38fa46199cSSatish Balay       if (!data) {
3973a2e727SSatish Balay         /* one based table */
400f5bd95cSBarry Smith         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
4173a2e727SSatish Balay       }
4273a2e727SSatish Balay     }
4373a2e727SSatish Balay   }
4473a2e727SSatish Balay   /* form array of columns we need */
45b24ad042SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
460f5bd95cSBarry Smith   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
4773a2e727SSatish Balay   while (tpos) {
480f5bd95cSBarry Smith     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
4973a2e727SSatish Balay     gid--; lid--;
5073a2e727SSatish Balay     garray[lid] = gid;
5173a2e727SSatish Balay   }
520064e2bbSSatish Balay   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
530f5bd95cSBarry Smith   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
5473a2e727SSatish Balay   for (i=0; i<ec; i++) {
550f5bd95cSBarry Smith     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
5673a2e727SSatish Balay   }
5773a2e727SSatish Balay   /* compact out the extra columns in B */
5873a2e727SSatish Balay   for (i=0; i<B->mbs; i++) {
5973a2e727SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
60b24ad042SBarry Smith       PetscInt gid1 = aj[B->i[i] + j] + 1;
610f5bd95cSBarry Smith       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
62fa46199cSSatish Balay       lid --;
6373a2e727SSatish Balay       aj[B->i[i]+j] = lid;
6473a2e727SSatish Balay     }
6573a2e727SSatish Balay   }
6673a2e727SSatish Balay   B->nbs     = ec;
67521d7252SBarry Smith   baij->B->n = ec*mat->bs;
680f5bd95cSBarry Smith   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
6973a2e727SSatish Balay   /* Mark Adams */
7073a2e727SSatish Balay #else
71435da068SBarry Smith   /* Make an array as long as the number of columns */
72d9653453SSatish Balay   /* mark those columns that are in baij->B */
73b24ad042SBarry Smith   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
74b24ad042SBarry Smith   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
75d9653453SSatish Balay   for (i=0; i<B->mbs; i++) {
768016bdd1SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
77d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
78d9653453SSatish Balay       indices[aj[B->i[i] + j]] = 1;
798016bdd1SSatish Balay     }
808016bdd1SSatish Balay   }
818016bdd1SSatish Balay 
828016bdd1SSatish Balay   /* form array of columns we need */
83b24ad042SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
848016bdd1SSatish Balay   ec = 0;
85d9653453SSatish Balay   for (i=0; i<Nbs; i++) {
860bdbc534SSatish Balay     if (indices[i]) {
870bdbc534SSatish Balay       garray[ec++] = i;
880bdbc534SSatish Balay     }
898016bdd1SSatish Balay   }
908016bdd1SSatish Balay 
918016bdd1SSatish Balay   /* make indices now point into garray */
928016bdd1SSatish Balay   for (i=0; i<ec; i++) {
93d9653453SSatish Balay     indices[garray[i]] = i;
948016bdd1SSatish Balay   }
958016bdd1SSatish Balay 
968016bdd1SSatish Balay   /* compact out the extra columns in B */
97d9653453SSatish Balay   for (i=0; i<B->mbs; i++) {
988016bdd1SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
99d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
1008016bdd1SSatish Balay     }
1018016bdd1SSatish Balay   }
102d9653453SSatish Balay   B->nbs       = ec;
103521d7252SBarry Smith   baij->B->n   = ec*mat->bs;
104606d414cSSatish Balay   ierr = PetscFree(indices);CHKERRQ(ierr);
10573a2e727SSatish Balay #endif
1068016bdd1SSatish Balay 
1078016bdd1SSatish Balay   /* create local vector that is used to scatter into */
108029af93fSBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
1098016bdd1SSatish Balay 
110c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
1113d3cf644SBarry Smith   for (i=0; i<ec; i++) {
112c16cb8f2SBarry Smith     garray[i] = bs*garray[i];
113c16cb8f2SBarry Smith   }
114029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
1153d3cf644SBarry Smith   for (i=0; i<ec; i++) {
116c16cb8f2SBarry Smith     garray[i] = garray[i]/bs;
117c16cb8f2SBarry Smith   }
118c16cb8f2SBarry Smith 
119b24ad042SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
120537820f0SBarry Smith   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
121029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
122606d414cSSatish Balay   ierr = PetscFree(stmp);CHKERRQ(ierr);
1238016bdd1SSatish Balay 
1248016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
1258016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
1268016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
1278016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
128273d9f13SBarry Smith   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
1298016bdd1SSatish Balay 
1308016bdd1SSatish Balay   /* gnerate the scatter context */
131d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
13290f02eecSBarry Smith 
13390f02eecSBarry Smith   /*
13490f02eecSBarry Smith       Post the receives for the first matrix vector product. We sync-chronize after
13590f02eecSBarry Smith     this on the chance that the user immediately calls MatMult() after assemblying
13690f02eecSBarry Smith     the matrix.
13790f02eecSBarry Smith   */
13843a90d84SBarry Smith   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
139ca161407SBarry Smith   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
14090f02eecSBarry Smith 
14152e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
14252e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
14352e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
14452e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
145d9653453SSatish Balay   baij->garray = garray;
14652e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
1478016bdd1SSatish Balay   ierr = ISDestroy(from);CHKERRQ(ierr);
1488016bdd1SSatish Balay   ierr = ISDestroy(to);CHKERRQ(ierr);
149888f2ed8SSatish Balay   ierr = VecDestroy(gvec);CHKERRQ(ierr);
1503a40ed3dSBarry Smith   PetscFunctionReturn(0);
1518016bdd1SSatish Balay }
1528016bdd1SSatish Balay 
1538016bdd1SSatish Balay /*
154d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1558016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1568016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1578016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1588016bdd1SSatish Balay 
1598016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1608016bdd1SSatish Balay    they are sloppy.
1618016bdd1SSatish Balay */
1624a2ae208SSatish Balay #undef __FUNCT__
1634a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPIBAIJ"
164dfbe8321SBarry Smith PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
1658016bdd1SSatish Balay {
166d9653453SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
167d9653453SSatish Balay   Mat            B = baij->B,Bnew;
168d9653453SSatish Balay   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
169dfbe8321SBarry Smith   PetscErrorCode ierr;
170b24ad042SBarry Smith   PetscInt       i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
171b24ad042SBarry Smith   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->m;
1723eda8832SBarry Smith   MatScalar      *a = Bbaij->a;
17387828ca2SBarry Smith   PetscScalar    *atmp;
17476117effSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
175b24ad042SBarry Smith   PetscInt       k;
17676117effSSatish Balay #endif
1778016bdd1SSatish Balay 
1783a40ed3dSBarry Smith   PetscFunctionBegin;
1798016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
180b0a32e0cSBarry Smith   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
181d9653453SSatish Balay   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
182d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
183d9653453SSatish Balay   if (baij->colmap) {
184aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
1850f5bd95cSBarry Smith     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
18648e59246SSatish Balay #else
187606d414cSSatish Balay     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
188606d414cSSatish Balay     baij->colmap = 0;
18952e6d16bSBarry Smith     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
19048e59246SSatish Balay #endif
1918016bdd1SSatish Balay   }
1928016bdd1SSatish Balay 
1938016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1946d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1953eda8832SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968016bdd1SSatish Balay 
1978016bdd1SSatish Balay   /* invent new B and copy stuff over */
198b24ad042SBarry Smith   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
199d9653453SSatish Balay   for (i=0; i<mbs; i++) {
200d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
2018016bdd1SSatish Balay   }
202*f69a0ea3SMatthew Knepley   ierr = MatCreate(B->comm,&Bnew);CHKERRQ(ierr);
203*f69a0ea3SMatthew Knepley   ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr);
204f204ca49SKris Buschelman   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
205521d7252SBarry Smith   ierr = MatSeqBAIJSetPreallocation(Bnew,B->bs,0,nz);CHKERRQ(ierr);
206bba1ac68SSatish Balay   ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr);
207d9653453SSatish Balay 
2085010ffefSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
209bba1ac68SSatish Balay   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
210bba1ac68SSatish Balay #endif
211bba1ac68SSatish Balay     for (i=0; i<mbs; i++) {
212bba1ac68SSatish Balay       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
213bba1ac68SSatish Balay         col  = garray[Bbaij->j[j]];
214bba1ac68SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
215bba1ac68SSatish Balay         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
2163eda8832SBarry Smith #else
2173eda8832SBarry Smith         atmp = a + j*bs2;
2183eda8832SBarry Smith #endif
219bba1ac68SSatish Balay         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
2208016bdd1SSatish Balay       }
2218016bdd1SSatish Balay     }
222bba1ac68SSatish Balay   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr);
223bba1ac68SSatish Balay 
2243eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2253eda8832SBarry Smith   ierr = PetscFree(atmp);CHKERRQ(ierr);
2263eda8832SBarry Smith #endif
227bba1ac68SSatish Balay 
228bba1ac68SSatish Balay   ierr = PetscFree(nz);CHKERRQ(ierr);
229606d414cSSatish Balay   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
230606d414cSSatish Balay   baij->garray = 0;
23152e6d16bSBarry Smith   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
2328016bdd1SSatish Balay   ierr = MatDestroy(B);CHKERRQ(ierr);
23352e6d16bSBarry Smith   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
234d9653453SSatish Balay   baij->B = Bnew;
2358016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
2363a40ed3dSBarry Smith   PetscFunctionReturn(0);
2378016bdd1SSatish Balay }
2388016bdd1SSatish Balay 
23904f1ad80SBarry Smith /*      ugly stuff added for Glenn someday we should fix this up */
24004f1ad80SBarry Smith 
241b24ad042SBarry Smith static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
24204f1ad80SBarry Smith                                       parts of the local matrix */
2432cd6534aSBarry Smith static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
24404f1ad80SBarry Smith 
24504f1ad80SBarry Smith 
246348d1a9dSSatish Balay #undef __FUNCT__
247348d1a9dSSatish Balay #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
248dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
24904f1ad80SBarry Smith {
25004f1ad80SBarry Smith   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
25104f1ad80SBarry Smith   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
252dfbe8321SBarry Smith   PetscErrorCode ierr;
253521d7252SBarry Smith   PetscInt       bs = inA->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
254b24ad042SBarry Smith   PetscInt       *r_rmapd,*r_rmapo;
25504f1ad80SBarry Smith 
25604f1ad80SBarry Smith   PetscFunctionBegin;
25704f1ad80SBarry Smith   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
25804f1ad80SBarry Smith   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
259b24ad042SBarry Smith   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
260b24ad042SBarry Smith   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
26104f1ad80SBarry Smith   nt   = 0;
26204f1ad80SBarry Smith   for (i=0; i<inA->bmapping->n; i++) {
26304f1ad80SBarry Smith     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
26404f1ad80SBarry Smith       nt++;
26504f1ad80SBarry Smith       r_rmapd[i] = inA->bmapping->indices[i] + 1;
26604f1ad80SBarry Smith     }
26704f1ad80SBarry Smith   }
26877431f27SBarry Smith   if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
269b24ad042SBarry Smith   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
27004f1ad80SBarry Smith   for (i=0; i<inA->bmapping->n; i++) {
27104f1ad80SBarry Smith     if (r_rmapd[i]){
27204f1ad80SBarry Smith       for (j=0; j<bs; j++) {
27304f1ad80SBarry Smith         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
27404f1ad80SBarry Smith       }
27504f1ad80SBarry Smith     }
27604f1ad80SBarry Smith   }
27704f1ad80SBarry Smith   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
27804f1ad80SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
27904f1ad80SBarry Smith 
280b24ad042SBarry Smith   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
281b24ad042SBarry Smith   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
28204f1ad80SBarry Smith   for (i=0; i<B->nbs; i++) {
28304f1ad80SBarry Smith     lindices[garray[i]] = i+1;
28404f1ad80SBarry Smith   }
28504f1ad80SBarry Smith   no   = inA->bmapping->n - nt;
286b24ad042SBarry Smith   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
287b24ad042SBarry Smith   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
28804f1ad80SBarry Smith   nt   = 0;
28904f1ad80SBarry Smith   for (i=0; i<inA->bmapping->n; i++) {
29004f1ad80SBarry Smith     if (lindices[inA->bmapping->indices[i]]) {
29104f1ad80SBarry Smith       nt++;
29204f1ad80SBarry Smith       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
29304f1ad80SBarry Smith     }
29404f1ad80SBarry Smith   }
29577431f27SBarry Smith   if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
29604f1ad80SBarry Smith   ierr = PetscFree(lindices);CHKERRQ(ierr);
297b24ad042SBarry Smith   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
29804f1ad80SBarry Smith   for (i=0; i<inA->bmapping->n; i++) {
29904f1ad80SBarry Smith     if (r_rmapo[i]){
30004f1ad80SBarry Smith       for (j=0; j<bs; j++) {
30104f1ad80SBarry Smith         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
30204f1ad80SBarry Smith       }
30304f1ad80SBarry Smith     }
30404f1ad80SBarry Smith   }
30504f1ad80SBarry Smith   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
30604f1ad80SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
30704f1ad80SBarry Smith 
30804f1ad80SBarry Smith   PetscFunctionReturn(0);
30904f1ad80SBarry Smith }
31004f1ad80SBarry Smith 
311348d1a9dSSatish Balay #undef __FUNCT__
312348d1a9dSSatish Balay #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
313be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
31404f1ad80SBarry Smith {
31592b32695SKris Buschelman   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
316dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(Mat,Vec);
31792b32695SKris Buschelman 
31892b32695SKris Buschelman   PetscFunctionBegin;
31992b32695SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
32092b32695SKris Buschelman   if (f) {
32192b32695SKris Buschelman     ierr = (*f)(A,scale);CHKERRQ(ierr);
32292b32695SKris Buschelman   }
32392b32695SKris Buschelman   PetscFunctionReturn(0);
32492b32695SKris Buschelman }
32592b32695SKris Buschelman 
32692b32695SKris Buschelman EXTERN_C_BEGIN
32792b32695SKris Buschelman #undef __FUNCT__
32892b32695SKris Buschelman #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
329be1d678aSKris Buschelman PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
33092b32695SKris Buschelman {
33104f1ad80SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
332dfbe8321SBarry Smith   PetscErrorCode ierr;
333b24ad042SBarry Smith   PetscInt       n,i;
33404f1ad80SBarry Smith   PetscScalar    *d,*o,*s;
33504f1ad80SBarry Smith 
33604f1ad80SBarry Smith   PetscFunctionBegin;
3372cd6534aSBarry Smith   if (!uglyrmapd) {
3382cd6534aSBarry Smith     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
3392cd6534aSBarry Smith   }
3402cd6534aSBarry Smith 
3411ebc52fbSHong Zhang   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
34204f1ad80SBarry Smith 
34304f1ad80SBarry Smith   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
3441ebc52fbSHong Zhang   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
34504f1ad80SBarry Smith   for (i=0; i<n; i++) {
34604f1ad80SBarry Smith     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
34704f1ad80SBarry Smith   }
3481ebc52fbSHong Zhang   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
34904f1ad80SBarry Smith   /* column scale "diagonal" portion of local matrix */
35004f1ad80SBarry Smith   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
35104f1ad80SBarry Smith 
35204f1ad80SBarry Smith   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
3531ebc52fbSHong Zhang   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
35404f1ad80SBarry Smith   for (i=0; i<n; i++) {
35504f1ad80SBarry Smith     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
35604f1ad80SBarry Smith   }
3571ebc52fbSHong Zhang   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
3581ebc52fbSHong Zhang   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
35904f1ad80SBarry Smith   /* column scale "off-diagonal" portion of local matrix */
36004f1ad80SBarry Smith   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
36104f1ad80SBarry Smith 
36204f1ad80SBarry Smith   PetscFunctionReturn(0);
36304f1ad80SBarry Smith }
36492b32695SKris Buschelman EXTERN_C_END
3658016bdd1SSatish Balay 
3663eda8832SBarry Smith 
367