xref: /petsc/src/mat/impls/baij/mpi/mmbaij.c (revision b0a32e0c6855ee6a6cd3495fa7da12ea9885bc5d)
1*b0a32e0cSBarry Smith /*$Id: mmbaij.c,v 1.36 2000/10/24 20:25:55 bsmith Exp bsmith $*/
28016bdd1SSatish Balay 
38016bdd1SSatish Balay /*
4d9653453SSatish Balay    Support for the parallel BAIJ matrix vector multiply
58016bdd1SSatish Balay */
670f55243SBarry Smith #include "src/mat/impls/baij/mpi/mpibaij.h"
78016bdd1SSatish Balay #include "src/vec/vecimpl.h"
8ca44d042SBarry Smith EXTERN int MatSetValues_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
98016bdd1SSatish Balay 
105615d1e5SSatish Balay #undef __FUNC__
11*b0a32e0cSBarry Smith #define __FUNC__ "MatSetUpMultiply_MPIBAIJ"
12d9653453SSatish Balay int MatSetUpMultiply_MPIBAIJ(Mat mat)
138016bdd1SSatish Balay {
14d9653453SSatish Balay   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
15d9653453SSatish Balay   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
16d9653453SSatish Balay   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
173d3cf644SBarry Smith   int                bs = baij->bs,*stmp;
188016bdd1SSatish Balay   IS                 from,to;
198016bdd1SSatish Balay   Vec                gvec;
20aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
210f5bd95cSBarry Smith   PetscTable         gid1_lid1;
220f5bd95cSBarry Smith   PetscTablePosition tpos;
2373a2e727SSatish Balay   int                gid,lid;
2473a2e727SSatish Balay #endif
258016bdd1SSatish Balay 
263a40ed3dSBarry Smith   PetscFunctionBegin;
2773a2e727SSatish Balay 
28aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
2973a2e727SSatish Balay   /* use a table - Mark Adams */
30273d9f13SBarry Smith   ierr = PetscTableCreate(B->mbs,&gid1_lid1);CHKERRQ(ierr);
3173a2e727SSatish Balay   for (i=0; i<B->mbs; i++) {
3273a2e727SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
33fa46199cSSatish Balay       int data,gid1 = aj[B->i[i]+j] + 1;
340f5bd95cSBarry Smith       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
35fa46199cSSatish Balay       if (!data) {
3673a2e727SSatish Balay         /* one based table */
370f5bd95cSBarry Smith         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
3873a2e727SSatish Balay       }
3973a2e727SSatish Balay     }
4073a2e727SSatish Balay   }
4173a2e727SSatish Balay   /* form array of columns we need */
42*b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&  garray );CHKERRQ(ierr);
430f5bd95cSBarry Smith   ierr   = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
4473a2e727SSatish Balay   while (tpos) {
450f5bd95cSBarry Smith     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
4673a2e727SSatish Balay     gid--; lid--;
4773a2e727SSatish Balay     garray[lid] = gid;
4873a2e727SSatish Balay   }
490064e2bbSSatish Balay   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
500064e2bbSSatish Balay   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
510f5bd95cSBarry Smith   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
5273a2e727SSatish Balay   for (i=0; i<ec; i++) {
530f5bd95cSBarry Smith     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
5473a2e727SSatish Balay   }
5573a2e727SSatish Balay   /* compact out the extra columns in B */
5673a2e727SSatish Balay   for (i=0; i<B->mbs; i++) {
5773a2e727SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
5873a2e727SSatish Balay       int gid1 = aj[B->i[i] + j] + 1;
590f5bd95cSBarry Smith       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60fa46199cSSatish Balay       lid --;
6173a2e727SSatish Balay       aj[B->i[i]+j] = lid;
6273a2e727SSatish Balay     }
6373a2e727SSatish Balay   }
6473a2e727SSatish Balay   B->nbs     = ec;
65273d9f13SBarry Smith   baij->B->n = ec*B->bs;
660f5bd95cSBarry Smith   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
6773a2e727SSatish Balay   /* Mark Adams */
6873a2e727SSatish Balay #else
698016bdd1SSatish Balay   /* For the first stab we make an array as long as the number of columns */
70d9653453SSatish Balay   /* mark those columns that are in baij->B */
71*b0a32e0cSBarry Smith ierr = PetscMalloc((Nbs+1)*sizeof(int),&  indices );CHKERRQ(ierr);
72549d3d68SSatish Balay   ierr    = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
73d9653453SSatish Balay   for (i=0; i<B->mbs; i++) {
748016bdd1SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
75d9653453SSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
76d9653453SSatish Balay       indices[aj[B->i[i] + j]] = 1;
778016bdd1SSatish Balay     }
788016bdd1SSatish Balay   }
798016bdd1SSatish Balay 
808016bdd1SSatish Balay   /* form array of columns we need */
81*b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&  garray );CHKERRQ(ierr);
828016bdd1SSatish Balay   ec = 0;
83d9653453SSatish Balay   for (i=0; i<Nbs; i++) {
840bdbc534SSatish Balay     if (indices[i]) {
850bdbc534SSatish Balay       garray[ec++] = i;
860bdbc534SSatish Balay     }
878016bdd1SSatish Balay   }
888016bdd1SSatish Balay 
898016bdd1SSatish Balay   /* make indices now point into garray */
908016bdd1SSatish Balay   for (i=0; i<ec; i++) {
91d9653453SSatish Balay     indices[garray[i]] = i;
928016bdd1SSatish Balay   }
938016bdd1SSatish Balay 
948016bdd1SSatish Balay   /* compact out the extra columns in B */
95d9653453SSatish Balay   for (i=0; i<B->mbs; i++) {
968016bdd1SSatish Balay     for (j=0; j<B->ilen[i]; j++) {
97d9653453SSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
988016bdd1SSatish Balay     }
998016bdd1SSatish Balay   }
100d9653453SSatish Balay   B->nbs = ec;
101273d9f13SBarry Smith   baij->B->n   = ec*B->bs;
102606d414cSSatish Balay   ierr = PetscFree(indices);CHKERRQ(ierr);
10373a2e727SSatish Balay #endif
1048016bdd1SSatish Balay 
1058016bdd1SSatish Balay   /* create local vector that is used to scatter into */
106029af93fSBarry Smith   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
1078016bdd1SSatish Balay 
108c16cb8f2SBarry Smith   /* create two temporary index sets for building scatter-gather */
1093d3cf644SBarry Smith   for (i=0; i<ec; i++) {
110c16cb8f2SBarry Smith     garray[i] = bs*garray[i];
111c16cb8f2SBarry Smith   }
112029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
1133d3cf644SBarry Smith   for (i=0; i<ec; i++) {
114c16cb8f2SBarry Smith     garray[i] = garray[i]/bs;
115c16cb8f2SBarry Smith   }
116c16cb8f2SBarry Smith 
117*b0a32e0cSBarry Smith ierr = PetscMalloc((ec+1)*sizeof(int),&  stmp );CHKERRQ(ierr);
118537820f0SBarry Smith   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
119029af93fSBarry Smith   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
120606d414cSSatish Balay   ierr = PetscFree(stmp);CHKERRQ(ierr);
1218016bdd1SSatish Balay 
1228016bdd1SSatish Balay   /* create temporary global vector to generate scatter context */
1238016bdd1SSatish Balay   /* this is inefficient, but otherwise we must do either
1248016bdd1SSatish Balay      1) save garray until the first actual scatter when the vector is known or
1258016bdd1SSatish Balay      2) have another way of generating a scatter context without a vector.*/
126273d9f13SBarry Smith   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
1278016bdd1SSatish Balay 
1288016bdd1SSatish Balay   /* gnerate the scatter context */
129d9653453SSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
13090f02eecSBarry Smith 
13190f02eecSBarry Smith   /*
13290f02eecSBarry Smith       Post the receives for the first matrix vector product. We sync-chronize after
13390f02eecSBarry Smith     this on the chance that the user immediately calls MatMult() after assemblying
13490f02eecSBarry Smith     the matrix.
13590f02eecSBarry Smith   */
13643a90d84SBarry Smith   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
137ca161407SBarry Smith   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
13890f02eecSBarry Smith 
139*b0a32e0cSBarry Smith   PetscLogObjectParent(mat,baij->Mvctx);
140*b0a32e0cSBarry Smith   PetscLogObjectParent(mat,baij->lvec);
141*b0a32e0cSBarry Smith   PetscLogObjectParent(mat,from);
142*b0a32e0cSBarry Smith   PetscLogObjectParent(mat,to);
143d9653453SSatish Balay   baij->garray = garray;
144*b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
1458016bdd1SSatish Balay   ierr = ISDestroy(from);CHKERRQ(ierr);
1468016bdd1SSatish Balay   ierr = ISDestroy(to);CHKERRQ(ierr);
147888f2ed8SSatish Balay   ierr = VecDestroy(gvec);CHKERRQ(ierr);
1483a40ed3dSBarry Smith   PetscFunctionReturn(0);
1498016bdd1SSatish Balay }
1508016bdd1SSatish Balay 
1518016bdd1SSatish Balay 
1528016bdd1SSatish Balay /*
153d9653453SSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
1548016bdd1SSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
1558016bdd1SSatish Balay    that require more communication in the matrix vector multiply.
1568016bdd1SSatish Balay    Thus certain data-structures must be rebuilt.
1578016bdd1SSatish Balay 
1588016bdd1SSatish Balay    Kind of slow! But that's what application programmers get when
1598016bdd1SSatish Balay    they are sloppy.
1608016bdd1SSatish Balay */
1615615d1e5SSatish Balay #undef __FUNC__
162*b0a32e0cSBarry Smith #define __FUNC__ "DisAssemble_MPIBAIJ"
163d9653453SSatish Balay int DisAssemble_MPIBAIJ(Mat A)
1648016bdd1SSatish Balay {
165d9653453SSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
166d9653453SSatish Balay   Mat         B = baij->B,Bnew;
167d9653453SSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
168273d9f13SBarry Smith   int         ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
169273d9f13SBarry Smith   int         k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m = A->m;
1703eda8832SBarry Smith   MatScalar   *a = Bbaij->a;
1713eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
1723eda8832SBarry Smith   Scalar      *atmp = (Scalar*)PetscMalloc(baij->bs*sizeof(Scalar));
1733eda8832SBarry Smith   int         l;
17495d5f7c2SBarry Smith #else
17595d5f7c2SBarry Smith   Scalar      *atmp;
1763eda8832SBarry Smith #endif
1778016bdd1SSatish Balay 
1783a40ed3dSBarry Smith   PetscFunctionBegin;
1798016bdd1SSatish Balay   /* free stuff related to matrix-vec multiply */
180*b0a32e0cSBarry Smith   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
181d9653453SSatish Balay   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
182d9653453SSatish Balay   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
183d9653453SSatish Balay   if (baij->colmap) {
184aa482453SBarry Smith #if defined (PETSC_USE_CTABLE)
1850f5bd95cSBarry Smith     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
18648e59246SSatish Balay #else
187606d414cSSatish Balay     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
188606d414cSSatish Balay     baij->colmap = 0;
189*b0a32e0cSBarry Smith     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
19048e59246SSatish Balay #endif
1918016bdd1SSatish Balay   }
1928016bdd1SSatish Balay 
1938016bdd1SSatish Balay   /* make sure that B is assembled so we can access its values */
1946d4a8577SBarry Smith   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1953eda8832SBarry Smith   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968016bdd1SSatish Balay 
1978016bdd1SSatish Balay   /* invent new B and copy stuff over */
198*b0a32e0cSBarry Smith ierr = PetscMalloc(mbs*sizeof(int),&(  nz ));CHKERRQ(ierr);
199d9653453SSatish Balay   for (i=0; i<mbs; i++) {
200d9653453SSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
2018016bdd1SSatish Balay   }
202029af93fSBarry Smith   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
203606d414cSSatish Balay   ierr = PetscFree(nz);CHKERRQ(ierr);
204d9653453SSatish Balay 
205*b0a32e0cSBarry Smith ierr = PetscMalloc(bs*sizeof(int),&(  rvals ));CHKERRQ(ierr);
206d9653453SSatish Balay   for (i=0; i<mbs; i++) {
207d9653453SSatish Balay     rvals[0] = bs*i;
208d9653453SSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
209d9653453SSatish Balay     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
21057b952d6SSatish Balay       col = garray[Bbaij->j[j]]*bs;
211d9653453SSatish Balay       for (k=0; k<bs; k++) {
2123eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2133eda8832SBarry Smith         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
2143eda8832SBarry Smith #else
2153eda8832SBarry Smith         atmp = a+j*bs2;
2163eda8832SBarry Smith #endif
21795d5f7c2SBarry Smith         ierr = MatSetValues_SeqBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
218d9653453SSatish Balay         col++;
2198016bdd1SSatish Balay       }
2208016bdd1SSatish Balay     }
221d9653453SSatish Balay   }
2223eda8832SBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
2233eda8832SBarry Smith   ierr = PetscFree(atmp);CHKERRQ(ierr);
2243eda8832SBarry Smith #endif
225606d414cSSatish Balay   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
226606d414cSSatish Balay   baij->garray = 0;
227606d414cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
228*b0a32e0cSBarry Smith   PetscLogObjectMemory(A,-ec*sizeof(int));
2298016bdd1SSatish Balay   ierr = MatDestroy(B);CHKERRQ(ierr);
230*b0a32e0cSBarry Smith   PetscLogObjectParent(A,Bnew);
231d9653453SSatish Balay   baij->B = Bnew;
2328016bdd1SSatish Balay   A->was_assembled = PETSC_FALSE;
2333a40ed3dSBarry Smith   PetscFunctionReturn(0);
2348016bdd1SSatish Balay }
2358016bdd1SSatish Balay 
2368016bdd1SSatish Balay 
2373eda8832SBarry Smith 
238