xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision 52e6d16ba989af9362d0fcebb12e73dd7c9666ed)
18016bdd1SSatish Balay /*
2d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
38016bdd1SSatish Balay */
470f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
5bba1ac68SSatish Balay 
6b24ad042SBarry Smith EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
78016bdd1SSatish Balay 
84a2ae208SSatish Balay #undef __FUNCT__
94a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPIBAIJ"
10dfbe8321SBarry Smith PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
118016bdd1SSatish Balay {
12d9653453SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
13d9653453SSatish Balay   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)(baij->B->data);
146849ba73SBarry Smith   PetscErrorCode ierr;
15b24ad042SBarry Smith   PetscInt       i,j,*aj = B->j,ec = 0,*garray;
16521d7252SBarry Smith   PetscInt       bs = mat->bs,*stmp;
178016bdd1SSatish Balay   IS             from,to;
188016bdd1SSatish Balay   Vec            gvec;
19aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
200f5bd95cSBarry Smith   PetscTable     gid1_lid1;
210f5bd95cSBarry Smith   PetscTablePosition tpos;
22b24ad042SBarry Smith   PetscInt       gid,lid;
236f531f54SSatish Balay #else
24b24ad042SBarry Smith   PetscInt       Nbs = baij->Nbs,*indices;
2573a2e727SSatish Balay #endif
268016bdd1SSatish Balay 
273a40ed3dSBarry Smith   PetscFunctionBegin;
2873a2e727SSatish Balay 
29aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
3073a2e727SSatish Balay   /* use a table - Mark Adams */
31273d9f13SBarry Smith   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
3273a2e727SSatish Balay   for (i=0; i<B->mbs; i++) {
3373a2e727SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
34b24ad042SBarry Smith       PetscInt data,gid1 = aj[B->i[i]+j] + 1;
350f5bd95cSBarry Smith       ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr);
36fa46199cSSatish Balay       if (!data) {
3773a2e727SSatish Balay         /* one based table */
380f5bd95cSBarry Smith         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
3973a2e727SSatish Balay       }
4073a2e727SSatish Balay     }
4173a2e727SSatish Balay   }
4273a2e727SSatish Balay   /* form array of columns we need */
43b24ad042SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
440f5bd95cSBarry Smith   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
4573a2e727SSatish Balay   while (tpos) {
460f5bd95cSBarry Smith     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
4773a2e727SSatish Balay     gid--; lid--;
4873a2e727SSatish Balay     garray[lid] = gid;
4973a2e727SSatish Balay   }
500064e2bbSSatish Balay   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
510f5bd95cSBarry Smith   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
5273a2e727SSatish Balay   for (i=0; i<ec; i++) {
530f5bd95cSBarry Smith     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
5473a2e727SSatish Balay   }
5573a2e727SSatish Balay   /* compact out the extra columns in B */
5673a2e727SSatish Balay   for (i=0; i<B->mbs; i++) {
5773a2e727SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
58b24ad042SBarry Smith       PetscInt gid1 = aj[B->i[i] + j] + 1;
590f5bd95cSBarry Smith       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60fa46199cSSatish Balay       lid --;
6173a2e727SSatish Balay       aj[B->i[i]+j] = lid;
6273a2e727SSatish Balay     }
6373a2e727SSatish Balay   }
6473a2e727SSatish Balay   B->nbs     = ec;
65521d7252SBarry Smith   baij->B->n = ec*mat->bs;
660f5bd95cSBarry Smith   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
6773a2e727SSatish Balay   /* Mark Adams */
6873a2e727SSatish Balay #else
69435da068SBarry Smith   /* Make an array as long as the number of columns */
70d9653453SSatish Balay   /* mark those columns that are in baij->B */
71b24ad042SBarry Smith   ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr);
72b24ad042SBarry Smith   ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr);
73d9653453SSatish Balay   for (i=0; i<B->mbs; i++) {
748016bdd1SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
75d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
76d9653453SSatish Balay       indices[aj[B->i[i] + j]] = 1;
778016bdd1SSatish Balay     }
788016bdd1SSatish Balay   }
798016bdd1SSatish Balay 
808016bdd1SSatish Balay   /* form array of columns we need */
81b24ad042SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr);
828016bdd1SSatish Balay   ec = 0;
83d9653453SSatish Balay   for (i=0; i<Nbs; i++) {
840bdbc534SSatish Balay     if (indices[i]) {
850bdbc534SSatish Balay       garray[ec++] = i;
860bdbc534SSatish Balay     }
878016bdd1SSatish Balay   }
888016bdd1SSatish Balay 
898016bdd1SSatish Balay   /* make indices now point into garray */
908016bdd1SSatish Balay   for (i=0; i<ec; i++) {
91d9653453SSatish Balay     indices[garray[i]] = i;
928016bdd1SSatish Balay   }
938016bdd1SSatish Balay 
948016bdd1SSatish Balay   /* compact out the extra columns in B */
95d9653453SSatish Balay   for (i=0; i<B->mbs; i++) {
968016bdd1SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
97d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
988016bdd1SSatish Balay     }
998016bdd1SSatish Balay   }
100d9653453SSatish Balay   B->nbs       = ec;
101521d7252SBarry Smith   baij->B->n   = ec*mat->bs;
102606d414cSSatish Balay   ierr = PetscFree(indices);CHKERRQ(ierr);
10373a2e727SSatish Balay #endif
1048016bdd1SSatish Balay 
1058016bdd1SSatish Balay   /* create local vector that is used to scatter into */
106029af93fSBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
1078016bdd1SSatish Balay 
108c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
1093d3cf644SBarry Smith   for (i=0; i<ec; i++) {
110c16cb8f2SBarry Smith     garray[i] = bs*garray[i];
111c16cb8f2SBarry Smith   }
112029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
1133d3cf644SBarry Smith   for (i=0; i<ec; i++) {
114c16cb8f2SBarry Smith     garray[i] = garray[i]/bs;
115c16cb8f2SBarry Smith   }
116c16cb8f2SBarry Smith 
117b24ad042SBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr);
118537820f0SBarry Smith   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
119029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
120606d414cSSatish Balay   ierr = PetscFree(stmp);CHKERRQ(ierr);
1218016bdd1SSatish Balay 
1228016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
1238016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
1248016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
1258016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
126273d9f13SBarry Smith   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
1278016bdd1SSatish Balay 
1288016bdd1SSatish Balay   /* gnerate the scatter context */
129d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
13090f02eecSBarry Smith 
13190f02eecSBarry Smith   /*
13290f02eecSBarry Smith       Post the receives for the first matrix vector product. We sync-chronize after
13390f02eecSBarry Smith     this on the chance that the user immediately calls MatMult() after assemblying
13490f02eecSBarry Smith     the matrix.
13590f02eecSBarry Smith   */
13643a90d84SBarry Smith   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
137ca161407SBarry Smith   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
13890f02eecSBarry Smith 
139*52e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr);
140*52e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr);
141*52e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr);
142*52e6d16bSBarry Smith   ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr);
143d9653453SSatish Balay   baij->garray = garray;
144*52e6d16bSBarry Smith   ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr);
1458016bdd1SSatish Balay   ierr = ISDestroy(from);CHKERRQ(ierr);
1468016bdd1SSatish Balay   ierr = ISDestroy(to);CHKERRQ(ierr);
147888f2ed8SSatish Balay   ierr = VecDestroy(gvec);CHKERRQ(ierr);
1483a40ed3dSBarry Smith   PetscFunctionReturn(0);
1498016bdd1SSatish Balay }
1508016bdd1SSatish Balay 
1518016bdd1SSatish Balay /*
152d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1538016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1548016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1558016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1568016bdd1SSatish Balay 
1578016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1588016bdd1SSatish Balay    they are sloppy.
1598016bdd1SSatish Balay */
1604a2ae208SSatish Balay #undef __FUNCT__
1614a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPIBAIJ"
162dfbe8321SBarry Smith PetscErrorCode DisAssemble_MPIBAIJ(Mat A)
1638016bdd1SSatish Balay {
164d9653453SSatish Balay   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
165d9653453SSatish Balay   Mat            B = baij->B,Bnew;
166d9653453SSatish Balay   Mat_SeqBAIJ    *Bbaij = (Mat_SeqBAIJ*)B->data;
167dfbe8321SBarry Smith   PetscErrorCode ierr;
168b24ad042SBarry Smith   PetscInt       i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
169b24ad042SBarry Smith   PetscInt       bs2 = baij->bs2,*nz,ec,m = A->m;
1703eda8832SBarry Smith   MatScalar      *a = Bbaij->a;
17187828ca2SBarry Smith   PetscScalar    *atmp;
17276117effSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
173b24ad042SBarry Smith   PetscInt       k;
17476117effSSatish Balay #endif
1758016bdd1SSatish Balay 
1763a40ed3dSBarry Smith   PetscFunctionBegin;
1778016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
178b0a32e0cSBarry Smith   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
179d9653453SSatish Balay   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
180d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
181d9653453SSatish Balay   if (baij->colmap) {
182aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
1830f5bd95cSBarry Smith     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
18448e59246SSatish Balay #else
185606d414cSSatish Balay     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
186606d414cSSatish Balay     baij->colmap = 0;
187*52e6d16bSBarry Smith     ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr);
18848e59246SSatish Balay #endif
1898016bdd1SSatish Balay   }
1908016bdd1SSatish Balay 
1918016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1926d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1933eda8832SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1948016bdd1SSatish Balay 
1958016bdd1SSatish Balay   /* invent new B and copy stuff over */
196b24ad042SBarry Smith   ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr);
197d9653453SSatish Balay   for (i=0; i<mbs; i++) {
198d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
1998016bdd1SSatish Balay   }
200f204ca49SKris Buschelman   ierr = MatCreate(B->comm,m,n,m,n,&Bnew);CHKERRQ(ierr);
201f204ca49SKris Buschelman   ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr);
202521d7252SBarry Smith   ierr = MatSeqBAIJSetPreallocation(Bnew,B->bs,0,nz);CHKERRQ(ierr);
203bba1ac68SSatish Balay   ierr = MatSetOption(Bnew,MAT_COLUMN_ORIENTED);CHKERRQ(ierr);
204d9653453SSatish Balay 
2055010ffefSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
206bba1ac68SSatish Balay   ierr = PetscMalloc(bs2*sizeof(PetscScalar),&atmp);CHKERRQ(ierr);
207bba1ac68SSatish Balay #endif
208bba1ac68SSatish Balay     for (i=0; i<mbs; i++) {
209bba1ac68SSatish Balay       for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
210bba1ac68SSatish Balay         col  = garray[Bbaij->j[j]];
211bba1ac68SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
212bba1ac68SSatish Balay         for (k=0; k<bs2; k++) atmp[k] = a[j*bs2+k];
2133eda8832SBarry Smith #else
2143eda8832SBarry Smith         atmp = a + j*bs2;
2153eda8832SBarry Smith #endif
216bba1ac68SSatish Balay         ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
2178016bdd1SSatish Balay       }
2188016bdd1SSatish Balay     }
219bba1ac68SSatish Balay   ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED);CHKERRQ(ierr);
220bba1ac68SSatish Balay 
2213eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2223eda8832SBarry Smith   ierr = PetscFree(atmp);CHKERRQ(ierr);
2233eda8832SBarry Smith #endif
224bba1ac68SSatish Balay 
225bba1ac68SSatish Balay   ierr = PetscFree(nz);CHKERRQ(ierr);
226606d414cSSatish Balay   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
227606d414cSSatish Balay   baij->garray = 0;
228*52e6d16bSBarry Smith   ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr);
2298016bdd1SSatish Balay   ierr = MatDestroy(B);CHKERRQ(ierr);
230*52e6d16bSBarry Smith   ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr);
231d9653453SSatish Balay   baij->B = Bnew;
2328016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
2333a40ed3dSBarry Smith   PetscFunctionReturn(0);
2348016bdd1SSatish Balay }
2358016bdd1SSatish Balay 
23604f1ad80SBarry Smith /*      ugly stuff added for Glenn someday we should fix this up */
23704f1ad80SBarry Smith 
238b24ad042SBarry Smith static PetscInt *uglyrmapd = 0,*uglyrmapo = 0;  /* mapping from the local ordering to the "diagonal" and "off-diagonal"
23904f1ad80SBarry Smith                                       parts of the local matrix */
2402cd6534aSBarry Smith static Vec uglydd = 0,uglyoo = 0;   /* work vectors used to scale the two parts of the local matrix */
24104f1ad80SBarry Smith 
24204f1ad80SBarry Smith 
243348d1a9dSSatish Balay #undef __FUNCT__
244348d1a9dSSatish Balay #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocalSetUp"
245dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale)
24604f1ad80SBarry Smith {
24704f1ad80SBarry Smith   Mat_MPIBAIJ    *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */
24804f1ad80SBarry Smith   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)ina->B->data;
249dfbe8321SBarry Smith   PetscErrorCode ierr;
250521d7252SBarry Smith   PetscInt       bs = inA->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices;
251b24ad042SBarry Smith   PetscInt       *r_rmapd,*r_rmapo;
25204f1ad80SBarry Smith 
25304f1ad80SBarry Smith   PetscFunctionBegin;
25404f1ad80SBarry Smith   ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr);
25504f1ad80SBarry Smith   ierr = MatGetSize(ina->A,PETSC_NULL,&n);CHKERRQ(ierr);
256b24ad042SBarry Smith   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapd);CHKERRQ(ierr);
257b24ad042SBarry Smith   ierr = PetscMemzero(r_rmapd,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
25804f1ad80SBarry Smith   nt   = 0;
25904f1ad80SBarry Smith   for (i=0; i<inA->bmapping->n; i++) {
26004f1ad80SBarry Smith     if (inA->bmapping->indices[i]*bs >= cstart && inA->bmapping->indices[i]*bs < cend) {
26104f1ad80SBarry Smith       nt++;
26204f1ad80SBarry Smith       r_rmapd[i] = inA->bmapping->indices[i] + 1;
26304f1ad80SBarry Smith     }
26404f1ad80SBarry Smith   }
26577431f27SBarry Smith   if (nt*bs != n) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n);
266b24ad042SBarry Smith   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&uglyrmapd);CHKERRQ(ierr);
26704f1ad80SBarry Smith   for (i=0; i<inA->bmapping->n; i++) {
26804f1ad80SBarry Smith     if (r_rmapd[i]){
26904f1ad80SBarry Smith       for (j=0; j<bs; j++) {
27004f1ad80SBarry Smith         uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j;
27104f1ad80SBarry Smith       }
27204f1ad80SBarry Smith     }
27304f1ad80SBarry Smith   }
27404f1ad80SBarry Smith   ierr = PetscFree(r_rmapd);CHKERRQ(ierr);
27504f1ad80SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr);
27604f1ad80SBarry Smith 
277b24ad042SBarry Smith   ierr = PetscMalloc((ina->Nbs+1)*sizeof(PetscInt),&lindices);CHKERRQ(ierr);
278b24ad042SBarry Smith   ierr = PetscMemzero(lindices,ina->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
27904f1ad80SBarry Smith   for (i=0; i<B->nbs; i++) {
28004f1ad80SBarry Smith     lindices[garray[i]] = i+1;
28104f1ad80SBarry Smith   }
28204f1ad80SBarry Smith   no   = inA->bmapping->n - nt;
283b24ad042SBarry Smith   ierr = PetscMalloc((inA->bmapping->n+1)*sizeof(PetscInt),&r_rmapo);CHKERRQ(ierr);
284b24ad042SBarry Smith   ierr = PetscMemzero(r_rmapo,inA->bmapping->n*sizeof(PetscInt));CHKERRQ(ierr);
28504f1ad80SBarry Smith   nt   = 0;
28604f1ad80SBarry Smith   for (i=0; i<inA->bmapping->n; i++) {
28704f1ad80SBarry Smith     if (lindices[inA->bmapping->indices[i]]) {
28804f1ad80SBarry Smith       nt++;
28904f1ad80SBarry Smith       r_rmapo[i] = lindices[inA->bmapping->indices[i]];
29004f1ad80SBarry Smith     }
29104f1ad80SBarry Smith   }
29277431f27SBarry Smith   if (nt > no) SETERRQ2(PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n);
29304f1ad80SBarry Smith   ierr = PetscFree(lindices);CHKERRQ(ierr);
294b24ad042SBarry Smith   ierr = PetscMalloc((nt*bs+1)*sizeof(PetscInt),&uglyrmapo);CHKERRQ(ierr);
29504f1ad80SBarry Smith   for (i=0; i<inA->bmapping->n; i++) {
29604f1ad80SBarry Smith     if (r_rmapo[i]){
29704f1ad80SBarry Smith       for (j=0; j<bs; j++) {
29804f1ad80SBarry Smith         uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j;
29904f1ad80SBarry Smith       }
30004f1ad80SBarry Smith     }
30104f1ad80SBarry Smith   }
30204f1ad80SBarry Smith   ierr = PetscFree(r_rmapo);CHKERRQ(ierr);
30304f1ad80SBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr);
30404f1ad80SBarry Smith 
30504f1ad80SBarry Smith   PetscFunctionReturn(0);
30604f1ad80SBarry Smith }
30704f1ad80SBarry Smith 
308348d1a9dSSatish Balay #undef __FUNCT__
309348d1a9dSSatish Balay #define __FUNCT__ "MatMPIBAIJDiagonalScaleLocal"
310dfbe8321SBarry Smith PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale)
31104f1ad80SBarry Smith {
31292b32695SKris Buschelman   /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
313dfbe8321SBarry Smith   PetscErrorCode ierr,(*f)(Mat,Vec);
31492b32695SKris Buschelman 
31592b32695SKris Buschelman   PetscFunctionBegin;
31692b32695SKris Buschelman   ierr = PetscObjectQueryFunction((PetscObject)A,"MatDiagonalScaleLocal_C",(void (**)(void))&f);CHKERRQ(ierr);
31792b32695SKris Buschelman   if (f) {
31892b32695SKris Buschelman     ierr = (*f)(A,scale);CHKERRQ(ierr);
31992b32695SKris Buschelman   }
32092b32695SKris Buschelman   PetscFunctionReturn(0);
32192b32695SKris Buschelman }
32292b32695SKris Buschelman 
32392b32695SKris Buschelman EXTERN_C_BEGIN
32492b32695SKris Buschelman #undef __FUNCT__
32592b32695SKris Buschelman #define __FUNCT__ "MatDiagonalScaleLocal_MPIBAIJ"
326dfbe8321SBarry Smith PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale)
32792b32695SKris Buschelman {
32804f1ad80SBarry Smith   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */
329dfbe8321SBarry Smith   PetscErrorCode ierr;
330b24ad042SBarry Smith   PetscInt       n,i;
33104f1ad80SBarry Smith   PetscScalar    *d,*o,*s;
33204f1ad80SBarry Smith 
33304f1ad80SBarry Smith   PetscFunctionBegin;
3342cd6534aSBarry Smith   if (!uglyrmapd) {
3352cd6534aSBarry Smith     ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr);
3362cd6534aSBarry Smith   }
3372cd6534aSBarry Smith 
3381ebc52fbSHong Zhang   ierr = VecGetArray(scale,&s);CHKERRQ(ierr);
33904f1ad80SBarry Smith 
34004f1ad80SBarry Smith   ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr);
3411ebc52fbSHong Zhang   ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr);
34204f1ad80SBarry Smith   for (i=0; i<n; i++) {
34304f1ad80SBarry Smith     d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */
34404f1ad80SBarry Smith   }
3451ebc52fbSHong Zhang   ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr);
34604f1ad80SBarry Smith   /* column scale "diagonal" portion of local matrix */
34704f1ad80SBarry Smith   ierr = MatDiagonalScale(a->A,PETSC_NULL,uglydd);CHKERRQ(ierr);
34804f1ad80SBarry Smith 
34904f1ad80SBarry Smith   ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr);
3501ebc52fbSHong Zhang   ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr);
35104f1ad80SBarry Smith   for (i=0; i<n; i++) {
35204f1ad80SBarry Smith     o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */
35304f1ad80SBarry Smith   }
3541ebc52fbSHong Zhang   ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr);
3551ebc52fbSHong Zhang   ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr);
35604f1ad80SBarry Smith   /* column scale "off-diagonal" portion of local matrix */
35704f1ad80SBarry Smith   ierr = MatDiagonalScale(a->B,PETSC_NULL,uglyoo);CHKERRQ(ierr);
35804f1ad80SBarry Smith 
35904f1ad80SBarry Smith   PetscFunctionReturn(0);
36004f1ad80SBarry Smith }
36192b32695SKris Buschelman EXTERN_C_END
3628016bdd1SSatish Balay 
3633eda8832SBarry Smith 
364