xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 633e10c7d6abfd6941821e5537b00e6ee3e3ec0a)
173f4d377SMatthew Knepley /*$Id: mmsbaij.c,v 1.10 2001/08/07 03:03:05 balay Exp $*/
2a30f8f8cSSatish Balay 
3a30f8f8cSSatish Balay /*
4a30f8f8cSSatish Balay    Support for the parallel SBAIJ matrix vector multiply
5a30f8f8cSSatish Balay */
62896befeSSatish Balay #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
7a30f8f8cSSatish Balay #include "src/vec/vecimpl.h"
887828ca2SBarry Smith extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
9a30f8f8cSSatish Balay 
104a2ae208SSatish Balay #undef __FUNCT__
114a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12a30f8f8cSSatish Balay int MatSetUpMultiply_MPISBAIJ(Mat mat)
13a30f8f8cSSatish Balay {
142896befeSSatish Balay   Mat_MPISBAIJ       *baij = (Mat_MPISBAIJ*)mat->data;
15a30f8f8cSSatish Balay   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
16a30f8f8cSSatish Balay   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
1786e15631SSatish Balay   int                bs = baij->bs,*stmp;
18a30f8f8cSSatish Balay   IS                 from,to;
19a30f8f8cSSatish Balay   Vec                gvec;
20a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
21a30f8f8cSSatish Balay   PetscTable         gid1_lid1;
22a30f8f8cSSatish Balay   PetscTablePosition tpos;
23a30f8f8cSSatish Balay   int                gid,lid;
24a30f8f8cSSatish Balay #endif
25a30f8f8cSSatish Balay 
26a30f8f8cSSatish Balay   PetscFunctionBegin;
27a30f8f8cSSatish Balay 
28a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
29a30f8f8cSSatish Balay   /* use a table - Mark Adams */
30a30f8f8cSSatish Balay   PetscTableCreate(B->mbs,&gid1_lid1);
31a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
32a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
33a30f8f8cSSatish Balay       int data,gid1 = aj[B->i[i]+j] + 1;
34a30f8f8cSSatish Balay       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
35a30f8f8cSSatish Balay       if (!data) {
36a30f8f8cSSatish Balay         /* one based table */
37a30f8f8cSSatish Balay         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
38a30f8f8cSSatish Balay       }
39a30f8f8cSSatish Balay     }
40a30f8f8cSSatish Balay   }
41a30f8f8cSSatish Balay   /* form array of columns we need */
42b0a32e0cSBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
43a30f8f8cSSatish Balay   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
44a30f8f8cSSatish Balay   while (tpos) {
45a30f8f8cSSatish Balay     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
46a30f8f8cSSatish Balay     gid--; lid--;
47a30f8f8cSSatish Balay     garray[lid] = gid;
48a30f8f8cSSatish Balay   }
49a30f8f8cSSatish Balay   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
50a30f8f8cSSatish Balay   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
51a30f8f8cSSatish Balay   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
52a30f8f8cSSatish Balay   for (i=0; i<ec; i++) {
53a30f8f8cSSatish Balay     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
54a30f8f8cSSatish Balay   }
55a30f8f8cSSatish Balay   /* compact out the extra columns in B */
56a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
57a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
58a30f8f8cSSatish Balay       int gid1 = aj[B->i[i] + j] + 1;
59a30f8f8cSSatish Balay       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
60a30f8f8cSSatish Balay       lid --;
61a30f8f8cSSatish Balay       aj[B->i[i]+j] = lid;
62a30f8f8cSSatish Balay     }
63a30f8f8cSSatish Balay   }
64a30f8f8cSSatish Balay   B->nbs     = ec;
65273d9f13SBarry Smith   baij->B->n = ec*B->bs;
66a30f8f8cSSatish Balay   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
67a30f8f8cSSatish Balay   /* Mark Adams */
68a30f8f8cSSatish Balay #else
69a30f8f8cSSatish Balay   /* For the first stab we make an array as long as the number of columns */
70a30f8f8cSSatish Balay   /* mark those columns that are in baij->B */
71b0a32e0cSBarry Smith   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
72a30f8f8cSSatish Balay   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
73a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
74a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
75a30f8f8cSSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
76a30f8f8cSSatish Balay       indices[aj[B->i[i] + j] ] = 1;
77a30f8f8cSSatish Balay     }
78a30f8f8cSSatish Balay   }
79a30f8f8cSSatish Balay 
80a30f8f8cSSatish Balay   /* form array of columns we need */
81b0a32e0cSBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
82f3ef73ceSBarry Smith   ec = 0;
83f3ef73ceSBarry Smith   for (i=0; i<Nbs; i++) {
84f3ef73ceSBarry Smith     if (indices[i]) {
85f3ef73ceSBarry Smith       garray[ec++] = i;
86f3ef73ceSBarry Smith     }
87f3ef73ceSBarry Smith   }
88a30f8f8cSSatish Balay 
89a30f8f8cSSatish Balay   /* make indices now point into garray */
90a30f8f8cSSatish Balay   for (i=0; i<ec; i++) {
91a30f8f8cSSatish Balay     indices[garray[i]] = i;
92a30f8f8cSSatish Balay   }
93a30f8f8cSSatish Balay 
94a30f8f8cSSatish Balay   /* compact out the extra columns in B */
95a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
96a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
97a30f8f8cSSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
98a30f8f8cSSatish Balay     }
99a30f8f8cSSatish Balay   }
100a30f8f8cSSatish Balay   B->nbs       = ec;
101273d9f13SBarry Smith   baij->B->n   = ec*B->bs;
102a30f8f8cSSatish Balay   ierr = PetscFree(indices);CHKERRQ(ierr);
103a30f8f8cSSatish Balay #endif
104*633e10c7SHong Zhang 
105a30f8f8cSSatish Balay   /* create local vector that is used to scatter into */
106a30f8f8cSSatish Balay   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
107a30f8f8cSSatish Balay 
108a30f8f8cSSatish Balay   /* create two temporary index sets for building scatter-gather */
109eb7adc28SSatish Balay   for (i=0; i<ec; i++) {
110a30f8f8cSSatish Balay     garray[i] = bs*garray[i];
111a30f8f8cSSatish Balay   }
112a30f8f8cSSatish Balay   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
11386e15631SSatish Balay   for (i=0; i<ec; i++) {
114a30f8f8cSSatish Balay     garray[i] = garray[i]/bs;
115a30f8f8cSSatish Balay   }
116a30f8f8cSSatish Balay 
117b0a32e0cSBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
118a30f8f8cSSatish Balay   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
119a30f8f8cSSatish Balay   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
120a30f8f8cSSatish Balay   ierr = PetscFree(stmp);CHKERRQ(ierr);
121a30f8f8cSSatish Balay 
122a30f8f8cSSatish Balay   /* create temporary global vector to generate scatter context */
123a30f8f8cSSatish Balay   /* this is inefficient, but otherwise we must do either
124a30f8f8cSSatish Balay      1) save garray until the first actual scatter when the vector is known or
125a30f8f8cSSatish Balay      2) have another way of generating a scatter context without a vector.*/
126273d9f13SBarry Smith   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
127a30f8f8cSSatish Balay 
128a30f8f8cSSatish Balay   /* gnerate the scatter context */
129a30f8f8cSSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
130a30f8f8cSSatish Balay 
131a30f8f8cSSatish Balay   /*
132a30f8f8cSSatish Balay       Post the receives for the first matrix vector product. We sync-chronize after
133a30f8f8cSSatish Balay     this on the chance that the user immediately calls MatMult() after assemblying
134a30f8f8cSSatish Balay     the matrix.
135a30f8f8cSSatish Balay   */
136a30f8f8cSSatish Balay   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
137a30f8f8cSSatish Balay   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
138a30f8f8cSSatish Balay 
139b0a32e0cSBarry Smith   PetscLogObjectParent(mat,baij->Mvctx);
140b0a32e0cSBarry Smith   PetscLogObjectParent(mat,baij->lvec);
141b0a32e0cSBarry Smith   PetscLogObjectParent(mat,from);
142b0a32e0cSBarry Smith   PetscLogObjectParent(mat,to);
143a30f8f8cSSatish Balay   baij->garray = garray;
144b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
145a30f8f8cSSatish Balay   ierr = ISDestroy(from);CHKERRQ(ierr);
146a30f8f8cSSatish Balay   ierr = ISDestroy(to);CHKERRQ(ierr);
147a30f8f8cSSatish Balay   ierr = VecDestroy(gvec);CHKERRQ(ierr);
148a30f8f8cSSatish Balay   PetscFunctionReturn(0);
149a30f8f8cSSatish Balay }
150a30f8f8cSSatish Balay 
151a30f8f8cSSatish Balay 
152a30f8f8cSSatish Balay /*
153a30f8f8cSSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
154a30f8f8cSSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
155a30f8f8cSSatish Balay    that require more communication in the matrix vector multiply.
156a30f8f8cSSatish Balay    Thus certain data-structures must be rebuilt.
157a30f8f8cSSatish Balay 
158a30f8f8cSSatish Balay    Kind of slow! But that's what application programmers get when
159a30f8f8cSSatish Balay    they are sloppy.
160a30f8f8cSSatish Balay */
1614a2ae208SSatish Balay #undef __FUNCT__
1624a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPISBAIJ"
163a30f8f8cSSatish Balay int DisAssemble_MPISBAIJ(Mat A)
164a30f8f8cSSatish Balay {
1652896befeSSatish Balay   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)A->data;
166a30f8f8cSSatish Balay   Mat           B = baij->B,Bnew;
167a30f8f8cSSatish Balay   Mat_SeqBAIJ   *Bbaij = (Mat_SeqBAIJ*)B->data;
168273d9f13SBarry Smith   int           ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
169273d9f13SBarry Smith   int           k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
170a30f8f8cSSatish Balay   MatScalar     *a = Bbaij->a;
17187828ca2SBarry Smith   PetscScalar   *atmp;
17282502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
17382502324SSatish Balay   int           l;
174a30f8f8cSSatish Balay #endif
175a30f8f8cSSatish Balay 
176a30f8f8cSSatish Balay   PetscFunctionBegin;
17782502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
17887828ca2SBarry Smith   ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
17982502324SSatish Balay #endif
180a30f8f8cSSatish Balay   /* free stuff related to matrix-vec multiply */
181b0a32e0cSBarry Smith   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
182a30f8f8cSSatish Balay   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
183a30f8f8cSSatish Balay   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
184a30f8f8cSSatish Balay   if (baij->colmap) {
185a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
186a30f8f8cSSatish Balay     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
187a30f8f8cSSatish Balay #else
188a30f8f8cSSatish Balay     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
189a30f8f8cSSatish Balay     baij->colmap = 0;
190b0a32e0cSBarry Smith     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
191a30f8f8cSSatish Balay #endif
192a30f8f8cSSatish Balay   }
193a30f8f8cSSatish Balay 
194a30f8f8cSSatish Balay   /* make sure that B is assembled so we can access its values */
195a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
196a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
197a30f8f8cSSatish Balay 
198a30f8f8cSSatish Balay   /* invent new B and copy stuff over */
19982502324SSatish Balay   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
200a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
201a30f8f8cSSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
202a30f8f8cSSatish Balay   }
203a30f8f8cSSatish Balay   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
204a30f8f8cSSatish Balay   ierr = PetscFree(nz);CHKERRQ(ierr);
205a30f8f8cSSatish Balay 
20682502324SSatish Balay   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
207a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
208a30f8f8cSSatish Balay     rvals[0] = bs*i;
209a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
210a30f8f8cSSatish Balay     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
211a30f8f8cSSatish Balay       col = garray[Bbaij->j[j]]*bs;
212a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
213a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
214a30f8f8cSSatish Balay         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
215a30f8f8cSSatish Balay #else
21671730473SSatish Balay         atmp = a+j*bs2 + k*bs;
217a30f8f8cSSatish Balay #endif
218c8407628SSatish Balay         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
219a30f8f8cSSatish Balay         col++;
220a30f8f8cSSatish Balay       }
221a30f8f8cSSatish Balay     }
222a30f8f8cSSatish Balay   }
223a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
224a30f8f8cSSatish Balay   ierr = PetscFree(atmp);CHKERRQ(ierr);
225a30f8f8cSSatish Balay #endif
226a30f8f8cSSatish Balay   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
227a30f8f8cSSatish Balay   baij->garray = 0;
228a30f8f8cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
229b0a32e0cSBarry Smith   PetscLogObjectMemory(A,-ec*sizeof(int));
230a30f8f8cSSatish Balay   ierr = MatDestroy(B);CHKERRQ(ierr);
231b0a32e0cSBarry Smith   PetscLogObjectParent(A,Bnew);
232a30f8f8cSSatish Balay   baij->B = Bnew;
233a30f8f8cSSatish Balay   A->was_assembled = PETSC_FALSE;
234a30f8f8cSSatish Balay   PetscFunctionReturn(0);
235a30f8f8cSSatish Balay }
236a30f8f8cSSatish Balay 
237a30f8f8cSSatish Balay 
238a30f8f8cSSatish Balay 
239