xref: /petsc/src/mat/impls/sbaij/mpi/mmsbaij.c (revision 87828ca270d8140797fd4271705413c4ecfcb535)
1*87828ca2SBarry Smith /*$Id: mmsbaij.c,v 1.8 2001/03/23 23:22:26 balay Exp bsmith $*/
2a30f8f8cSSatish Balay 
3a30f8f8cSSatish Balay /*
4a30f8f8cSSatish Balay    Support for the parallel SBAIJ matrix vector multiply
5a30f8f8cSSatish Balay */
6a30f8f8cSSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"
7a30f8f8cSSatish Balay #include "src/vec/vecimpl.h"
8*87828ca2SBarry Smith extern int MatSetValues_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
9a30f8f8cSSatish Balay 
104a2ae208SSatish Balay #undef __FUNCT__
114a2ae208SSatish Balay #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ"
12a30f8f8cSSatish Balay int MatSetUpMultiply_MPISBAIJ(Mat mat)
13a30f8f8cSSatish Balay {
14a30f8f8cSSatish Balay   Mat_MPIBAIJ        *baij = (Mat_MPIBAIJ*)mat->data;
15a30f8f8cSSatish Balay   Mat_SeqBAIJ        *B = (Mat_SeqBAIJ*)(baij->B->data);
16a30f8f8cSSatish Balay   int                Nbs = baij->Nbs,i,j,*indices,*aj = B->j,ierr,ec = 0,*garray;
17a30f8f8cSSatish Balay   int                col,bs = baij->bs,*tmp,*stmp;
18a30f8f8cSSatish Balay   IS                 from,to;
19a30f8f8cSSatish Balay   Vec                gvec;
20a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
21a30f8f8cSSatish Balay   PetscTable         gid1_lid1;
22a30f8f8cSSatish Balay   PetscTablePosition tpos;
23a30f8f8cSSatish Balay   int                gid,lid;
24a30f8f8cSSatish Balay #endif
25a30f8f8cSSatish Balay 
26a30f8f8cSSatish Balay   PetscFunctionBegin;
27a30f8f8cSSatish Balay 
28a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
29a30f8f8cSSatish Balay   /* use a table - Mark Adams */
30a30f8f8cSSatish Balay   PetscTableCreate(B->mbs,&gid1_lid1);
31a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
32a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
33a30f8f8cSSatish Balay       int data,gid1 = aj[B->i[i]+j] + 1;
34a30f8f8cSSatish Balay       ierr = PetscTableFind(gid1_lid1,gid1,&data) ;CHKERRQ(ierr);
35a30f8f8cSSatish Balay       if (!data) {
36a30f8f8cSSatish Balay         /* one based table */
37a30f8f8cSSatish Balay         ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr);
38a30f8f8cSSatish Balay       }
39a30f8f8cSSatish Balay     }
40a30f8f8cSSatish Balay   }
41a30f8f8cSSatish Balay   /* form array of columns we need */
42b0a32e0cSBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
43b0a32e0cSBarry Smith   ierr = PetscMalloc((ec*bs+1)*sizeof(int),&tmp);CHKERRQ(ierr);
44a30f8f8cSSatish Balay   ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
45a30f8f8cSSatish Balay   while (tpos) {
46a30f8f8cSSatish Balay     ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr);
47a30f8f8cSSatish Balay     gid--; lid--;
48a30f8f8cSSatish Balay     garray[lid] = gid;
49a30f8f8cSSatish Balay   }
50a30f8f8cSSatish Balay   ierr = PetscSortInt(ec,garray);CHKERRQ(ierr);
51a30f8f8cSSatish Balay   /* qsort(garray, ec, sizeof(int), intcomparcarc); */
52a30f8f8cSSatish Balay   ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
53a30f8f8cSSatish Balay   for (i=0; i<ec; i++) {
54a30f8f8cSSatish Balay     ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr);
55a30f8f8cSSatish Balay   }
56a30f8f8cSSatish Balay   /* compact out the extra columns in B */
57a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
58a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
59a30f8f8cSSatish Balay       int gid1 = aj[B->i[i] + j] + 1;
60a30f8f8cSSatish Balay       ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr);
61a30f8f8cSSatish Balay       lid --;
62a30f8f8cSSatish Balay       aj[B->i[i]+j] = lid;
63a30f8f8cSSatish Balay     }
64a30f8f8cSSatish Balay   }
65a30f8f8cSSatish Balay   B->nbs     = ec;
66273d9f13SBarry Smith   baij->B->n = ec*B->bs;
67a30f8f8cSSatish Balay   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
68a30f8f8cSSatish Balay   /* Mark Adams */
69a30f8f8cSSatish Balay #else
70a30f8f8cSSatish Balay   /* For the first stab we make an array as long as the number of columns */
71a30f8f8cSSatish Balay   /* mark those columns that are in baij->B */
72b0a32e0cSBarry Smith   ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr);
73a30f8f8cSSatish Balay   ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr);
74a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
75a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
76a30f8f8cSSatish Balay       if (!indices[aj[B->i[i] + j]]) ec++;
77a30f8f8cSSatish Balay       indices[aj[B->i[i] + j] ] = 1;
78a30f8f8cSSatish Balay     }
79a30f8f8cSSatish Balay   }
80a30f8f8cSSatish Balay 
81a30f8f8cSSatish Balay   /* form array of columns we need */
82b0a32e0cSBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr);
83b0a32e0cSBarry Smith   ierr = PetscMalloc((ec*bs+1)*sizeof(int),&tmp);CHKERRQ(ierr);
84f3ef73ceSBarry Smith   ec = 0;
85f3ef73ceSBarry Smith   for (i=0; i<Nbs; i++) {
86f3ef73ceSBarry Smith     if (indices[i]) {
87f3ef73ceSBarry Smith       garray[ec++] = i;
88f3ef73ceSBarry Smith     }
89f3ef73ceSBarry Smith   }
90a30f8f8cSSatish Balay 
91a30f8f8cSSatish Balay   /* make indices now point into garray */
92a30f8f8cSSatish Balay   for (i=0; i<ec; i++) {
93a30f8f8cSSatish Balay     indices[garray[i]] = i;
94a30f8f8cSSatish Balay   }
95a30f8f8cSSatish Balay 
96a30f8f8cSSatish Balay   /* compact out the extra columns in B */
97a30f8f8cSSatish Balay   for (i=0; i<B->mbs; i++) {
98a30f8f8cSSatish Balay     for (j=0; j<B->ilen[i]; j++) {
99a30f8f8cSSatish Balay       aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
100a30f8f8cSSatish Balay     }
101a30f8f8cSSatish Balay   }
102a30f8f8cSSatish Balay   B->nbs       = ec;
103273d9f13SBarry Smith   baij->B->n   = ec*B->bs;
104a30f8f8cSSatish Balay   ierr = PetscFree(indices);CHKERRQ(ierr);
105a30f8f8cSSatish Balay #endif
106a30f8f8cSSatish Balay 
107a30f8f8cSSatish Balay   for (i=0,col=0; i<ec; i++) {
108a30f8f8cSSatish Balay     for (j=0; j<bs; j++,col++) tmp[col] = garray[i]*bs+j;
109a30f8f8cSSatish Balay   }
110a30f8f8cSSatish Balay   /* create local vector that is used to scatter into */
111a30f8f8cSSatish Balay   ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr);
112a30f8f8cSSatish Balay 
113a30f8f8cSSatish Balay   /* create two temporary index sets for building scatter-gather */
114a30f8f8cSSatish Balay 
115a30f8f8cSSatish Balay   /* ierr = ISCreateGeneral(PETSC_COMM_SELF,ec*bs,tmp,&from);CHKERRQ(ierr); */
116eb7adc28SSatish Balay   for (i=0; i<ec; i++) {
117a30f8f8cSSatish Balay     garray[i] = bs*garray[i];
118a30f8f8cSSatish Balay   }
119a30f8f8cSSatish Balay   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr);
120a30f8f8cSSatish Balay   for (i=0,col=0; i<ec; i++) {
121a30f8f8cSSatish Balay     garray[i] = garray[i]/bs;
122a30f8f8cSSatish Balay   }
123a30f8f8cSSatish Balay 
124b0a32e0cSBarry Smith   ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr);
125a30f8f8cSSatish Balay   for (i=0; i<ec; i++) { stmp[i] = bs*i; }
126a30f8f8cSSatish Balay   ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr);
127a30f8f8cSSatish Balay   ierr = PetscFree(stmp);CHKERRQ(ierr);
128a30f8f8cSSatish Balay 
129a30f8f8cSSatish Balay   /* create temporary global vector to generate scatter context */
130a30f8f8cSSatish Balay   /* this is inefficient, but otherwise we must do either
131a30f8f8cSSatish Balay      1) save garray until the first actual scatter when the vector is known or
132a30f8f8cSSatish Balay      2) have another way of generating a scatter context without a vector.*/
133273d9f13SBarry Smith   ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr);
134a30f8f8cSSatish Balay 
135a30f8f8cSSatish Balay   /* gnerate the scatter context */
136a30f8f8cSSatish Balay   ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr);
137a30f8f8cSSatish Balay 
138a30f8f8cSSatish Balay   /*
139a30f8f8cSSatish Balay       Post the receives for the first matrix vector product. We sync-chronize after
140a30f8f8cSSatish Balay     this on the chance that the user immediately calls MatMult() after assemblying
141a30f8f8cSSatish Balay     the matrix.
142a30f8f8cSSatish Balay   */
143a30f8f8cSSatish Balay   ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
144a30f8f8cSSatish Balay   ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr);
145a30f8f8cSSatish Balay 
146b0a32e0cSBarry Smith   PetscLogObjectParent(mat,baij->Mvctx);
147b0a32e0cSBarry Smith   PetscLogObjectParent(mat,baij->lvec);
148b0a32e0cSBarry Smith   PetscLogObjectParent(mat,from);
149b0a32e0cSBarry Smith   PetscLogObjectParent(mat,to);
150a30f8f8cSSatish Balay   baij->garray = garray;
151b0a32e0cSBarry Smith   PetscLogObjectMemory(mat,(ec+1)*sizeof(int));
152a30f8f8cSSatish Balay   ierr = ISDestroy(from);CHKERRQ(ierr);
153a30f8f8cSSatish Balay   ierr = ISDestroy(to);CHKERRQ(ierr);
154a30f8f8cSSatish Balay   ierr = VecDestroy(gvec);CHKERRQ(ierr);
155a30f8f8cSSatish Balay   ierr = PetscFree(tmp);CHKERRQ(ierr);
156a30f8f8cSSatish Balay   PetscFunctionReturn(0);
157a30f8f8cSSatish Balay }
158a30f8f8cSSatish Balay 
159a30f8f8cSSatish Balay 
160a30f8f8cSSatish Balay /*
161a30f8f8cSSatish Balay      Takes the local part of an already assembled MPIBAIJ matrix
162a30f8f8cSSatish Balay    and disassembles it. This is to allow new nonzeros into the matrix
163a30f8f8cSSatish Balay    that require more communication in the matrix vector multiply.
164a30f8f8cSSatish Balay    Thus certain data-structures must be rebuilt.
165a30f8f8cSSatish Balay 
166a30f8f8cSSatish Balay    Kind of slow! But that's what application programmers get when
167a30f8f8cSSatish Balay    they are sloppy.
168a30f8f8cSSatish Balay */
1694a2ae208SSatish Balay #undef __FUNCT__
1704a2ae208SSatish Balay #define __FUNCT__ "DisAssemble_MPISBAIJ"
171a30f8f8cSSatish Balay int DisAssemble_MPISBAIJ(Mat A)
172a30f8f8cSSatish Balay {
173a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
174a30f8f8cSSatish Balay   Mat         B = baij->B,Bnew;
175a30f8f8cSSatish Balay   Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data;
176273d9f13SBarry Smith   int         ierr,i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray;
177273d9f13SBarry Smith   int         k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m;
178a30f8f8cSSatish Balay   MatScalar   *a = Bbaij->a;
179*87828ca2SBarry Smith   PetscScalar      *atmp;
18082502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
18182502324SSatish Balay   int         l;
182a30f8f8cSSatish Balay #endif
183a30f8f8cSSatish Balay 
184a30f8f8cSSatish Balay   PetscFunctionBegin;
18582502324SSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
186*87828ca2SBarry Smith   ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp);
18782502324SSatish Balay #endif
188a30f8f8cSSatish Balay   /* free stuff related to matrix-vec multiply */
189b0a32e0cSBarry Smith   ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */
190a30f8f8cSSatish Balay   ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0;
191a30f8f8cSSatish Balay   ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0;
192a30f8f8cSSatish Balay   if (baij->colmap) {
193a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
194a30f8f8cSSatish Balay     ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr);
195a30f8f8cSSatish Balay #else
196a30f8f8cSSatish Balay     ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
197a30f8f8cSSatish Balay     baij->colmap = 0;
198b0a32e0cSBarry Smith     PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int));
199a30f8f8cSSatish Balay #endif
200a30f8f8cSSatish Balay   }
201a30f8f8cSSatish Balay 
202a30f8f8cSSatish Balay   /* make sure that B is assembled so we can access its values */
203a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
204a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
205a30f8f8cSSatish Balay 
206a30f8f8cSSatish Balay   /* invent new B and copy stuff over */
20782502324SSatish Balay   ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr);
208a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
209a30f8f8cSSatish Balay     nz[i] = Bbaij->i[i+1]-Bbaij->i[i];
210a30f8f8cSSatish Balay   }
211a30f8f8cSSatish Balay   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,baij->bs,m,n,0,nz,&Bnew);CHKERRQ(ierr);
212a30f8f8cSSatish Balay   ierr = PetscFree(nz);CHKERRQ(ierr);
213a30f8f8cSSatish Balay 
21482502324SSatish Balay   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
215a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
216a30f8f8cSSatish Balay     rvals[0] = bs*i;
217a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
218a30f8f8cSSatish Balay     for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) {
219a30f8f8cSSatish Balay       col = garray[Bbaij->j[j]]*bs;
220a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
221a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
222a30f8f8cSSatish Balay         for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l];
223a30f8f8cSSatish Balay #else
224a30f8f8cSSatish Balay         atmp = a+j*bs2;
225a30f8f8cSSatish Balay #endif
226c8407628SSatish Balay         ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr);
227a30f8f8cSSatish Balay         col++;
228a30f8f8cSSatish Balay       }
229a30f8f8cSSatish Balay     }
230a30f8f8cSSatish Balay   }
231a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
232a30f8f8cSSatish Balay   ierr = PetscFree(atmp);CHKERRQ(ierr);
233a30f8f8cSSatish Balay #endif
234a30f8f8cSSatish Balay   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
235a30f8f8cSSatish Balay   baij->garray = 0;
236a30f8f8cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
237b0a32e0cSBarry Smith   PetscLogObjectMemory(A,-ec*sizeof(int));
238a30f8f8cSSatish Balay   ierr = MatDestroy(B);CHKERRQ(ierr);
239b0a32e0cSBarry Smith   PetscLogObjectParent(A,Bnew);
240a30f8f8cSSatish Balay   baij->B = Bnew;
241a30f8f8cSSatish Balay   A->was_assembled = PETSC_FALSE;
242a30f8f8cSSatish Balay   PetscFunctionReturn(0);
243a30f8f8cSSatish Balay }
244a30f8f8cSSatish Balay 
245a30f8f8cSSatish Balay 
246a30f8f8cSSatish Balay 
247