xref: /petsc/src/mat/impls/sbaij/mpi/sbaijov.c (revision 632d0f974322d6bb33bc38a983f071754d158c6d)
1*632d0f97SHong Zhang /*$Id: sbaijov.c,v 1.65 2001/08/06 21:15:42 bsmith Exp $*/
2*632d0f97SHong Zhang 
3*632d0f97SHong Zhang /*
4*632d0f97SHong Zhang    Routines to compute overlapping regions of a parallel MPI matrix.
5*632d0f97SHong Zhang    Used for finding submatrices that were shared across processors.
6*632d0f97SHong Zhang */
7*632d0f97SHong Zhang #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
8*632d0f97SHong Zhang #include "petscbt.h"
9*632d0f97SHong Zhang 
10*632d0f97SHong Zhang static int MatIncreaseOverlap_MPISBAIJ_Once(Mat,int,IS *);
11*632d0f97SHong Zhang static int MatIncreaseOverlap_MPISBAIJ_Local(Mat,int,char **,int*,int**);
12*632d0f97SHong Zhang static int MatIncreaseOverlap_MPISBAIJ_Receive(Mat,int,int **,int**,int*);
13*632d0f97SHong Zhang 
14*632d0f97SHong Zhang /* this function is sasme as MatCompressIndicesGeneral_MPIBAIJ -- should be removed! */
15*632d0f97SHong Zhang #undef __FUNCT__
16*632d0f97SHong Zhang #define __FUNCT__ "MatCompressIndicesGeneral_MPISBAIJ"
17*632d0f97SHong Zhang static int MatCompressIndicesGeneral_MPISBAIJ(Mat C,int imax,const IS is_in[],IS is_out[])
18*632d0f97SHong Zhang {
19*632d0f97SHong Zhang   Mat_MPISBAIJ        *baij = (Mat_MPISBAIJ*)C->data;
20*632d0f97SHong Zhang   int                ierr,isz,bs = baij->bs,n,i,j,*idx,ival;
21*632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
22*632d0f97SHong Zhang   PetscTable         gid1_lid1;
23*632d0f97SHong Zhang   int                tt, gid1, *nidx;
24*632d0f97SHong Zhang   PetscTablePosition tpos;
25*632d0f97SHong Zhang #else
26*632d0f97SHong Zhang   int                Nbs,*nidx;
27*632d0f97SHong Zhang   PetscBT            table;
28*632d0f97SHong Zhang #endif
29*632d0f97SHong Zhang 
30*632d0f97SHong Zhang   PetscFunctionBegin;
31*632d0f97SHong Zhang   /* printf(" ...MatCompressIndicesGeneral_MPISBAIJ is called ...\n"); */
32*632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
33*632d0f97SHong Zhang   ierr = PetscTableCreate(baij->mbs,&gid1_lid1);CHKERRQ(ierr);
34*632d0f97SHong Zhang #else
35*632d0f97SHong Zhang   Nbs  = baij->Nbs;
36*632d0f97SHong Zhang   ierr = PetscMalloc((Nbs+1)*sizeof(int),&nidx);CHKERRQ(ierr);
37*632d0f97SHong Zhang   ierr = PetscBTCreate(Nbs,table);CHKERRQ(ierr);
38*632d0f97SHong Zhang #endif
39*632d0f97SHong Zhang   for (i=0; i<imax; i++) {
40*632d0f97SHong Zhang     isz  = 0;
41*632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
42*632d0f97SHong Zhang     ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
43*632d0f97SHong Zhang #else
44*632d0f97SHong Zhang     ierr = PetscBTMemzero(Nbs,table);CHKERRQ(ierr);
45*632d0f97SHong Zhang #endif
46*632d0f97SHong Zhang     ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr);
47*632d0f97SHong Zhang     ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr);
48*632d0f97SHong Zhang     for (j=0; j<n ; j++) {
49*632d0f97SHong Zhang       ival = idx[j]/bs; /* convert the indices into block indices */
50*632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
51*632d0f97SHong Zhang       ierr = PetscTableFind(gid1_lid1,ival+1,&tt);CHKERRQ(ierr);
52*632d0f97SHong Zhang       if (!tt) {
53*632d0f97SHong Zhang 	ierr = PetscTableAdd(gid1_lid1,ival+1,isz+1);CHKERRQ(ierr);
54*632d0f97SHong Zhang         isz++;
55*632d0f97SHong Zhang       }
56*632d0f97SHong Zhang #else
57*632d0f97SHong Zhang       if (ival>Nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
58*632d0f97SHong Zhang       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
59*632d0f97SHong Zhang #endif
60*632d0f97SHong Zhang     }
61*632d0f97SHong Zhang     ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr);
62*632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
63*632d0f97SHong Zhang     ierr = PetscMalloc((isz+1)*sizeof(int),&nidx);CHKERRQ(ierr);
64*632d0f97SHong Zhang     ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
65*632d0f97SHong Zhang     j = 0;
66*632d0f97SHong Zhang     while (tpos) {
67*632d0f97SHong Zhang       ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid1,&tt);CHKERRQ(ierr);
68*632d0f97SHong Zhang       if (tt-- > isz) { SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than array-dim"); }
69*632d0f97SHong Zhang       nidx[tt] = gid1 - 1;
70*632d0f97SHong Zhang       j++;
71*632d0f97SHong Zhang     }
72*632d0f97SHong Zhang     if (j != isz) { SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"table error: jj != isz"); }
73*632d0f97SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,(is_out+i));CHKERRQ(ierr);
74*632d0f97SHong Zhang     ierr = PetscFree(nidx);CHKERRQ(ierr);
75*632d0f97SHong Zhang #else
76*632d0f97SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,(is_out+i));CHKERRQ(ierr);
77*632d0f97SHong Zhang #endif
78*632d0f97SHong Zhang   }
79*632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
80*632d0f97SHong Zhang   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
81*632d0f97SHong Zhang #else
82*632d0f97SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
83*632d0f97SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
84*632d0f97SHong Zhang #endif
85*632d0f97SHong Zhang   PetscFunctionReturn(0);
86*632d0f97SHong Zhang }
87*632d0f97SHong Zhang 
88*632d0f97SHong Zhang #undef __FUNCT__
89*632d0f97SHong Zhang #define __FUNCT__ "MatCompressIndicesSorted_MPISBAIJ"
90*632d0f97SHong Zhang static int MatCompressIndicesSorted_MPISBAIJ(Mat C,int imax,const IS is_in[],IS is_out[])
91*632d0f97SHong Zhang {
92*632d0f97SHong Zhang   Mat_MPISBAIJ  *baij = (Mat_MPISBAIJ*)C->data;
93*632d0f97SHong Zhang   int          ierr,bs=baij->bs,i,j,k,val,n,*idx,*nidx,*idx_local;
94*632d0f97SHong Zhang   PetscTruth   flg;
95*632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
96*632d0f97SHong Zhang   int maxsz;
97*632d0f97SHong Zhang #else
98*632d0f97SHong Zhang   int Nbs=baij->Nbs;
99*632d0f97SHong Zhang #endif
100*632d0f97SHong Zhang   PetscFunctionBegin;
101*632d0f97SHong Zhang   printf(" ... MatCompressIndicesSorted_MPISBAIJ is called ...\n");
102*632d0f97SHong Zhang   for (i=0; i<imax; i++) {
103*632d0f97SHong Zhang     ierr = ISSorted(is_in[i],&flg);CHKERRQ(ierr);
104*632d0f97SHong Zhang     if (!flg) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Indices are not sorted");
105*632d0f97SHong Zhang   }
106*632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
107*632d0f97SHong Zhang   /* Now check max size */
108*632d0f97SHong Zhang   for (i=0,maxsz=0; i<imax; i++) {
109*632d0f97SHong Zhang     ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr);
110*632d0f97SHong Zhang     ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr);
111*632d0f97SHong Zhang     if (n%bs !=0) SETERRQ(1,"Indices are not block ordered");
112*632d0f97SHong Zhang     n = n/bs; /* The reduced index size */
113*632d0f97SHong Zhang     if (n > maxsz) maxsz = n;
114*632d0f97SHong Zhang   }
115*632d0f97SHong Zhang   ierr = PetscMalloc((maxsz+1)*sizeof(int),&nidx);CHKERRQ(ierr);
116*632d0f97SHong Zhang #else
117*632d0f97SHong Zhang   ierr = PetscMalloc((Nbs+1)*sizeof(int),&nidx);CHKERRQ(ierr);
118*632d0f97SHong Zhang #endif
119*632d0f97SHong Zhang   /* Now check if the indices are in block order */
120*632d0f97SHong Zhang   for (i=0; i<imax; i++) {
121*632d0f97SHong Zhang     ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr);
122*632d0f97SHong Zhang     ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr);
123*632d0f97SHong Zhang     if (n%bs !=0) SETERRQ(1,"Indices are not block ordered");
124*632d0f97SHong Zhang 
125*632d0f97SHong Zhang     n = n/bs; /* The reduced index size */
126*632d0f97SHong Zhang     idx_local = idx;
127*632d0f97SHong Zhang     for (j=0; j<n ; j++) {
128*632d0f97SHong Zhang       val = idx_local[0];
129*632d0f97SHong Zhang       if (val%bs != 0) SETERRQ(1,"Indices are not block ordered");
130*632d0f97SHong Zhang       for (k=0; k<bs; k++) {
131*632d0f97SHong Zhang         if (val+k != idx_local[k]) SETERRQ(1,"Indices are not block ordered");
132*632d0f97SHong Zhang       }
133*632d0f97SHong Zhang       nidx[j] = val/bs;
134*632d0f97SHong Zhang       idx_local +=bs;
135*632d0f97SHong Zhang     }
136*632d0f97SHong Zhang     ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr);
137*632d0f97SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,nidx,(is_out+i));CHKERRQ(ierr);
138*632d0f97SHong Zhang   }
139*632d0f97SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
140*632d0f97SHong Zhang 
141*632d0f97SHong Zhang   PetscFunctionReturn(0);
142*632d0f97SHong Zhang }
143*632d0f97SHong Zhang 
144*632d0f97SHong Zhang #undef __FUNCT__
145*632d0f97SHong Zhang #define __FUNCT__ "MatExpandIndices_MPISBAIJ"
146*632d0f97SHong Zhang static int MatExpandIndices_MPISBAIJ(Mat C,int imax,const IS is_in[],IS is_out[])
147*632d0f97SHong Zhang {
148*632d0f97SHong Zhang   Mat_MPISBAIJ  *baij = (Mat_MPISBAIJ*)C->data;
149*632d0f97SHong Zhang   int          ierr,bs = baij->bs,n,i,j,k,*idx,*nidx;
150*632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
151*632d0f97SHong Zhang   int          maxsz;
152*632d0f97SHong Zhang #else
153*632d0f97SHong Zhang   int          Nbs = baij->Nbs;
154*632d0f97SHong Zhang #endif
155*632d0f97SHong Zhang 
156*632d0f97SHong Zhang   PetscFunctionBegin;
157*632d0f97SHong Zhang   /* printf(" ... MatExpandIndices_MPISBAIJ is called ...\n"); */
158*632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
159*632d0f97SHong Zhang   /* Now check max size */
160*632d0f97SHong Zhang   for (i=0,maxsz=0; i<imax; i++) {
161*632d0f97SHong Zhang     ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr);
162*632d0f97SHong Zhang     ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr);
163*632d0f97SHong Zhang     if (n*bs > maxsz) maxsz = n*bs;
164*632d0f97SHong Zhang   }
165*632d0f97SHong Zhang   ierr = PetscMalloc((maxsz+1)*sizeof(int),&nidx);CHKERRQ(ierr);
166*632d0f97SHong Zhang #else
167*632d0f97SHong Zhang   ierr = PetscMalloc((Nbs*bs+1)*sizeof(int),&nidx);CHKERRQ(ierr);
168*632d0f97SHong Zhang #endif
169*632d0f97SHong Zhang 
170*632d0f97SHong Zhang   for (i=0; i<imax; i++) {
171*632d0f97SHong Zhang     ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr);
172*632d0f97SHong Zhang     ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr);
173*632d0f97SHong Zhang     for (j=0; j<n ; ++j){
174*632d0f97SHong Zhang       for (k=0; k<bs; k++)
175*632d0f97SHong Zhang         nidx[j*bs+k] = idx[j]*bs+k;
176*632d0f97SHong Zhang     }
177*632d0f97SHong Zhang     ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr);
178*632d0f97SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,n*bs,nidx,is_out+i);CHKERRQ(ierr);
179*632d0f97SHong Zhang   }
180*632d0f97SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
181*632d0f97SHong Zhang   PetscFunctionReturn(0);
182*632d0f97SHong Zhang }
183*632d0f97SHong Zhang 
184*632d0f97SHong Zhang 
185*632d0f97SHong Zhang #undef __FUNCT__
186*632d0f97SHong Zhang #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ"
187*632d0f97SHong Zhang int MatIncreaseOverlap_MPISBAIJ(Mat C,int imax,IS is[],int ov)
188*632d0f97SHong Zhang {
189*632d0f97SHong Zhang   int i,ierr;
190*632d0f97SHong Zhang   IS  *is_new;
191*632d0f97SHong Zhang 
192*632d0f97SHong Zhang   PetscFunctionBegin;
193*632d0f97SHong Zhang   ierr = PetscMalloc(imax*sizeof(IS),&is_new);CHKERRQ(ierr);
194*632d0f97SHong Zhang   /* Convert the indices into block format */
195*632d0f97SHong Zhang   ierr = MatCompressIndicesGeneral_MPISBAIJ(C,imax,is,is_new);CHKERRQ(ierr);
196*632d0f97SHong Zhang   if (ov < 0){ SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n");}
197*632d0f97SHong Zhang   for (i=0; i<ov; ++i) {
198*632d0f97SHong Zhang     ierr = MatIncreaseOverlap_MPISBAIJ_Once(C,imax,is_new);CHKERRQ(ierr);
199*632d0f97SHong Zhang   }
200*632d0f97SHong Zhang   for (i=0; i<imax; i++) {ierr = ISDestroy(is[i]);CHKERRQ(ierr);}
201*632d0f97SHong Zhang   ierr = MatExpandIndices_MPISBAIJ(C,imax,is_new,is);CHKERRQ(ierr);
202*632d0f97SHong Zhang   for (i=0; i<imax; i++) {ierr = ISDestroy(is_new[i]);CHKERRQ(ierr);}
203*632d0f97SHong Zhang   ierr = PetscFree(is_new);CHKERRQ(ierr);
204*632d0f97SHong Zhang   PetscFunctionReturn(0);
205*632d0f97SHong Zhang }
206*632d0f97SHong Zhang 
207*632d0f97SHong Zhang #undef __FUNCT__
208*632d0f97SHong Zhang #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Once"
209*632d0f97SHong Zhang static int MatIncreaseOverlap_MPISBAIJ_Once(Mat C,int imax,IS is[])
210*632d0f97SHong Zhang {
211*632d0f97SHong Zhang   Mat_MPISBAIJ  *c = (Mat_MPISBAIJ*)C->data;
212*632d0f97SHong Zhang   int         **idx,*n,len,*idx_i;
213*632d0f97SHong Zhang   int         size,rank,Mbs,i,j,k,ierr,**rbuf,row,proc,nrqs,msz,*outdat,*indat;
214*632d0f97SHong Zhang   int         *onodes1,*olengths1,tag1,tag2,*onodes2,*olengths2,flag,proc_id;
215*632d0f97SHong Zhang   PetscBT     *table;
216*632d0f97SHong Zhang   MPI_Comm    comm;
217*632d0f97SHong Zhang   MPI_Request *s_waits1,*r_waits1,*s_waits2,*r_waits2;
218*632d0f97SHong Zhang   MPI_Status  *s_status,r_status;
219*632d0f97SHong Zhang 
220*632d0f97SHong Zhang   PetscFunctionBegin;
221*632d0f97SHong Zhang 
222*632d0f97SHong Zhang   comm = C->comm;
223*632d0f97SHong Zhang   size = c->size;
224*632d0f97SHong Zhang   rank = c->rank;
225*632d0f97SHong Zhang   Mbs  = c->Mbs;
226*632d0f97SHong Zhang 
227*632d0f97SHong Zhang   /* 1. Send is[] to all other processors */
228*632d0f97SHong Zhang   /*--------------------------------------*/
229*632d0f97SHong Zhang   /* This processor sends its is[] to all other processors in the format:
230*632d0f97SHong Zhang        outdat[0]          = is_max, no of is in this processor
231*632d0f97SHong Zhang        outdat[1]          = n[0], size of is[0]
232*632d0f97SHong Zhang         ...
233*632d0f97SHong Zhang        outdat[is_max]     = n[is_max-1], size of is[is_max-1]
234*632d0f97SHong Zhang        outdat[is_max + 1] = data(is[0])
235*632d0f97SHong Zhang         ...
236*632d0f97SHong Zhang        outdat[is_max + i] = data(is[i])
237*632d0f97SHong Zhang         ...
238*632d0f97SHong Zhang   */
239*632d0f97SHong Zhang    len  = (imax+1)*sizeof(int*)+ (imax)*sizeof(int);
240*632d0f97SHong Zhang    ierr = PetscMalloc(len,&idx);CHKERRQ(ierr);
241*632d0f97SHong Zhang    n    = (int*)(idx + imax);
242*632d0f97SHong Zhang 
243*632d0f97SHong Zhang   /* Allocate Memory for outgoing messages */
244*632d0f97SHong Zhang   len = 1 + imax;
245*632d0f97SHong Zhang   for (i=0; i<imax; i++) {
246*632d0f97SHong Zhang     ierr = ISGetIndices(is[i],&idx[i]);CHKERRQ(ierr);
247*632d0f97SHong Zhang     ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr);
248*632d0f97SHong Zhang     len += n[i];
249*632d0f97SHong Zhang   }
250*632d0f97SHong Zhang   ierr = PetscMalloc(len*sizeof(int),&outdat);CHKERRQ(ierr);
251*632d0f97SHong Zhang 
252*632d0f97SHong Zhang   /* Form the outgoing messages */
253*632d0f97SHong Zhang   outdat[0] = imax;
254*632d0f97SHong Zhang   for (i=0; i<imax; i++) {
255*632d0f97SHong Zhang     outdat[i+1] = n[i];
256*632d0f97SHong Zhang   }
257*632d0f97SHong Zhang   k = imax + 1;
258*632d0f97SHong Zhang   for (i=0; i<imax; i++) { /* for is[i] */
259*632d0f97SHong Zhang     idx_i = idx[i];
260*632d0f97SHong Zhang     for (j=0; j<n[i]; j++){
261*632d0f97SHong Zhang       outdat[k] = *(idx_i);
262*632d0f97SHong Zhang       /* if (!rank) printf(" outdat[%d] = %d\n",k,outdat[k] ); */
263*632d0f97SHong Zhang       k++; idx_i++;
264*632d0f97SHong Zhang     }
265*632d0f97SHong Zhang     /* printf(" [%d] n[%d]=%d, k: %d, \n",rank,i,n[i],k); */
266*632d0f97SHong Zhang   }
267*632d0f97SHong Zhang   if (k != len) SETERRQ3(1,"[%d] Error on forming the outgoing messages: k %d != len %d",rank,k,len);
268*632d0f97SHong Zhang 
269*632d0f97SHong Zhang   /*  Now  post the sends */
270*632d0f97SHong Zhang   ierr = PetscMalloc(size*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr);
271*632d0f97SHong Zhang 
272*632d0f97SHong Zhang   k = 0;
273*632d0f97SHong Zhang   for (i=0; i<size; ++i) { /* send outdat to processor [i] */
274*632d0f97SHong Zhang     if (i != rank){
275*632d0f97SHong Zhang       ierr = MPI_Isend(outdat,len,MPI_INT,i,rank,comm,&s_waits1[k]);CHKERRQ(ierr);
276*632d0f97SHong Zhang       printf(" [%d] send %d msg to [%d] \n",rank,len,i);
277*632d0f97SHong Zhang       k++;
278*632d0f97SHong Zhang     }
279*632d0f97SHong Zhang   }
280*632d0f97SHong Zhang 
281*632d0f97SHong Zhang   /* 2. Do local work */
282*632d0f97SHong Zhang   /*------------------*/
283*632d0f97SHong Zhang 
284*632d0f97SHong Zhang   /* No longer need the original indices*/
285*632d0f97SHong Zhang   for (i=0; i<imax; ++i) {
286*632d0f97SHong Zhang     ierr = ISRestoreIndices(is[i],idx+i);CHKERRQ(ierr);
287*632d0f97SHong Zhang   }
288*632d0f97SHong Zhang   ierr = PetscFree(idx);CHKERRQ(ierr);
289*632d0f97SHong Zhang 
290*632d0f97SHong Zhang   /* 3. Receive other's is[] and process. Then send back */
291*632d0f97SHong Zhang   /*----------------------------------------------------*/
292*632d0f97SHong Zhang   /* Send is done */
293*632d0f97SHong Zhang   nrqs = size-1;
294*632d0f97SHong Zhang   ierr = PetscMalloc(size*sizeof(MPI_Status),&s_status);CHKERRQ(ierr);
295*632d0f97SHong Zhang   ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr);
296*632d0f97SHong Zhang   ierr = PetscFree(outdat);CHKERRQ(ierr);
297*632d0f97SHong Zhang 
298*632d0f97SHong Zhang   ierr = PetscMalloc(size*sizeof(MPI_Request),&r_waits1);CHKERRQ(ierr);
299*632d0f97SHong Zhang   ierr = PetscMalloc(size*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr);
300*632d0f97SHong Zhang   k = 0;
301*632d0f97SHong Zhang   do {
302*632d0f97SHong Zhang     /* Receive messages */
303*632d0f97SHong Zhang     ierr = MPI_Iprobe(MPI_ANY_SOURCE,MPI_ANY_TAG,comm,&flag,&r_status);
304*632d0f97SHong Zhang     if (flag){
305*632d0f97SHong Zhang       ierr = MPI_Get_count(&r_status,MPI_INT,&len);
306*632d0f97SHong Zhang       proc_id = r_status.MPI_SOURCE;
307*632d0f97SHong Zhang       ierr = ierr = PetscMalloc(len*sizeof(int),&indat);CHKERRQ(ierr);
308*632d0f97SHong Zhang       ierr = MPI_Irecv(indat,len,MPI_INT,proc_id,r_status.MPI_TAG,comm,&r_waits1[k]);
309*632d0f97SHong Zhang       printf(" [%d] recv %d msg from [%d] \n",rank,len,proc_id);
310*632d0f97SHong Zhang 
311*632d0f97SHong Zhang       /*  Process messages -- not done yet */
312*632d0f97SHong Zhang       len = indat[0];
313*632d0f97SHong Zhang       ierr = PetscMalloc(len*sizeof(int),&outdat);CHKERRQ(ierr);
314*632d0f97SHong Zhang       for (i=0; i<len; i++){outdat[i] = indat[i+1];}
315*632d0f97SHong Zhang 
316*632d0f97SHong Zhang       /* Send messages back */
317*632d0f97SHong Zhang       printf(" [%d] send %d msg back to [%d] \n",rank,len,proc_id);
318*632d0f97SHong Zhang       ierr = MPI_Isend(outdat,len,MPI_INT,proc_id,rank,comm,&s_waits2[k]);CHKERRQ(ierr);
319*632d0f97SHong Zhang 
320*632d0f97SHong Zhang       k++;
321*632d0f97SHong Zhang       ierr = PetscFree(outdat);CHKERRQ(ierr);
322*632d0f97SHong Zhang       ierr = PetscFree(indat);CHKERRQ(ierr);
323*632d0f97SHong Zhang     }
324*632d0f97SHong Zhang   } while (k < nrqs);
325*632d0f97SHong Zhang 
326*632d0f97SHong Zhang   /* 4. Receive work done on other processors, then process */
327*632d0f97SHong Zhang   /*--------------------------------------------------------*/
328*632d0f97SHong Zhang   ierr = MPI_Waitall(nrqs,s_waits2,s_status);CHKERRQ(ierr);
329*632d0f97SHong Zhang   ierr = PetscMalloc(size*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr);
330*632d0f97SHong Zhang   k = 0;
331*632d0f97SHong Zhang   do {
332*632d0f97SHong Zhang     /* Receive messages */
333*632d0f97SHong Zhang     ierr = MPI_Iprobe(MPI_ANY_SOURCE,MPI_ANY_TAG,comm,&flag,&r_status);
334*632d0f97SHong Zhang     if (flag){
335*632d0f97SHong Zhang       ierr = MPI_Get_count(&r_status,MPI_INT,&len);
336*632d0f97SHong Zhang       proc_id = r_status.MPI_SOURCE;
337*632d0f97SHong Zhang       ierr = ierr = PetscMalloc(len*sizeof(int),&indat);CHKERRQ(ierr);
338*632d0f97SHong Zhang       ierr = MPI_Irecv(indat,len,MPI_INT,proc_id,r_status.MPI_TAG,comm,&r_waits2[k]);
339*632d0f97SHong Zhang       printf(" [%d] recv %d msg from [%d] \n",rank,len,proc_id);
340*632d0f97SHong Zhang 
341*632d0f97SHong Zhang       /*  Process messages -- not done yet */
342*632d0f97SHong Zhang 
343*632d0f97SHong Zhang 
344*632d0f97SHong Zhang       k++;
345*632d0f97SHong Zhang       ierr = PetscFree(indat);CHKERRQ(ierr);
346*632d0f97SHong Zhang     }
347*632d0f97SHong Zhang   } while (k < nrqs);
348*632d0f97SHong Zhang 
349*632d0f97SHong Zhang #ifdef OLD
350*632d0f97SHong Zhang   for (i=0; i<imax; ++i) {
351*632d0f97SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],is+i);CHKERRQ(ierr);
352*632d0f97SHong Zhang   }
353*632d0f97SHong Zhang 
354*632d0f97SHong Zhang 
355*632d0f97SHong Zhang   ierr = PetscFree(onodes2);CHKERRQ(ierr);
356*632d0f97SHong Zhang   ierr = PetscFree(olengths2);CHKERRQ(ierr);
357*632d0f97SHong Zhang 
358*632d0f97SHong Zhang   ierr = PetscFree(pa);CHKERRQ(ierr);
359*632d0f97SHong Zhang   ierr = PetscFree(rbuf2);CHKERRQ(ierr);
360*632d0f97SHong Zhang   ierr = PetscFree(s_waits1);CHKERRQ(ierr);
361*632d0f97SHong Zhang   ierr = PetscFree(r_waits1);CHKERRQ(ierr);
362*632d0f97SHong Zhang   ierr = PetscFree(s_waits2);CHKERRQ(ierr);
363*632d0f97SHong Zhang   ierr = PetscFree(r_waits2);CHKERRQ(ierr);
364*632d0f97SHong Zhang   ierr = PetscFree(table);CHKERRQ(ierr);
365*632d0f97SHong Zhang   ierr = PetscFree(s_status);CHKERRQ(ierr);
366*632d0f97SHong Zhang   ierr = PetscFree(recv_status);CHKERRQ(ierr);
367*632d0f97SHong Zhang   ierr = PetscFree(xdata[0]);CHKERRQ(ierr);
368*632d0f97SHong Zhang   ierr = PetscFree(xdata);CHKERRQ(ierr);
369*632d0f97SHong Zhang   ierr = PetscFree(isz1);CHKERRQ(ierr);
370*632d0f97SHong Zhang #endif /* OLD */
371*632d0f97SHong Zhang   ierr = PetscFree(s_waits1);CHKERRQ(ierr);
372*632d0f97SHong Zhang   ierr = PetscFree(r_waits1);CHKERRQ(ierr);
373*632d0f97SHong Zhang   ierr = PetscFree(s_waits2);CHKERRQ(ierr);
374*632d0f97SHong Zhang   ierr = PetscFree(r_waits2);CHKERRQ(ierr);
375*632d0f97SHong Zhang   ierr = PetscFree(s_status);CHKERRQ(ierr);
376*632d0f97SHong Zhang   PetscFunctionReturn(0);
377*632d0f97SHong Zhang }
378*632d0f97SHong Zhang 
379*632d0f97SHong Zhang #undef __FUNCT__
380*632d0f97SHong Zhang #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Local"
381*632d0f97SHong Zhang /*
382*632d0f97SHong Zhang    MatIncreaseOverlap_MPISBAIJ_Local - Called by MatincreaseOverlap, to do
383*632d0f97SHong Zhang        the work on the local processor.
384*632d0f97SHong Zhang 
385*632d0f97SHong Zhang      Inputs:
386*632d0f97SHong Zhang       C      - MAT_MPISBAIJ;
387*632d0f97SHong Zhang       imax - total no of index sets processed at a time;
388*632d0f97SHong Zhang       table  - an array of char - size = Mbs bits.
389*632d0f97SHong Zhang 
390*632d0f97SHong Zhang      Output:
391*632d0f97SHong Zhang       isz    - array containing the count of the solution elements correspondign
392*632d0f97SHong Zhang                to each index set;
393*632d0f97SHong Zhang       data   - pointer to the solutions
394*632d0f97SHong Zhang */
395*632d0f97SHong Zhang static int MatIncreaseOverlap_MPISBAIJ_Local(Mat C,int imax,PetscBT *table,int *isz,int **data)
396*632d0f97SHong Zhang {
397*632d0f97SHong Zhang   Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data;
398*632d0f97SHong Zhang   Mat         A = c->A,B = c->B;
399*632d0f97SHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
400*632d0f97SHong Zhang   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)B->data;
401*632d0f97SHong Zhang   int         start,end,val,max,rstart,cstart,*ai,*aj;
402*632d0f97SHong Zhang   int         *bi,*bj,*garray,i,j,k,row,*data_i,isz_i;
403*632d0f97SHong Zhang   PetscBT     table_i;
404*632d0f97SHong Zhang 
405*632d0f97SHong Zhang   PetscFunctionBegin;
406*632d0f97SHong Zhang   rstart = c->rstart;
407*632d0f97SHong Zhang   cstart = c->cstart;
408*632d0f97SHong Zhang   ai     = a->i;
409*632d0f97SHong Zhang   aj     = a->j;
410*632d0f97SHong Zhang   bi     = b->i;
411*632d0f97SHong Zhang   bj     = b->j;
412*632d0f97SHong Zhang   garray = c->garray;
413*632d0f97SHong Zhang 
414*632d0f97SHong Zhang 
415*632d0f97SHong Zhang   for (i=0; i<imax; i++) {
416*632d0f97SHong Zhang     data_i  = data[i];
417*632d0f97SHong Zhang     table_i = table[i];
418*632d0f97SHong Zhang     isz_i   = isz[i];
419*632d0f97SHong Zhang     for (j=0,max=isz[i]; j<max; j++) {
420*632d0f97SHong Zhang       row   = data_i[j] - rstart;
421*632d0f97SHong Zhang       start = ai[row];
422*632d0f97SHong Zhang       end   = ai[row+1];
423*632d0f97SHong Zhang       for (k=start; k<end; k++) { /* Amat */
424*632d0f97SHong Zhang         val = aj[k] + cstart;
425*632d0f97SHong Zhang         if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
426*632d0f97SHong Zhang       }
427*632d0f97SHong Zhang       start = bi[row];
428*632d0f97SHong Zhang       end   = bi[row+1];
429*632d0f97SHong Zhang       for (k=start; k<end; k++) { /* Bmat */
430*632d0f97SHong Zhang         val = garray[bj[k]];
431*632d0f97SHong Zhang         if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
432*632d0f97SHong Zhang       }
433*632d0f97SHong Zhang     }
434*632d0f97SHong Zhang     isz[i] = isz_i;
435*632d0f97SHong Zhang   }
436*632d0f97SHong Zhang   PetscFunctionReturn(0);
437*632d0f97SHong Zhang }
438*632d0f97SHong Zhang #undef __FUNCT__
439*632d0f97SHong Zhang #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Receive"
440*632d0f97SHong Zhang /*
441*632d0f97SHong Zhang       MatIncreaseOverlap_MPISBAIJ_Receive - Process the recieved messages,
442*632d0f97SHong Zhang          and return the output
443*632d0f97SHong Zhang 
444*632d0f97SHong Zhang          Input:
445*632d0f97SHong Zhang            C    - the matrix
446*632d0f97SHong Zhang            nrqr - no of messages being processed.
447*632d0f97SHong Zhang            rbuf - an array of pointers to the recieved requests
448*632d0f97SHong Zhang 
449*632d0f97SHong Zhang          Output:
450*632d0f97SHong Zhang            xdata - array of messages to be sent back
451*632d0f97SHong Zhang            isz1  - size of each message
452*632d0f97SHong Zhang 
453*632d0f97SHong Zhang   For better efficiency perhaps we should malloc seperately each xdata[i],
454*632d0f97SHong Zhang then if a remalloc is required we need only copy the data for that one row
455*632d0f97SHong Zhang rather then all previous rows as it is now where a single large chunck of
456*632d0f97SHong Zhang memory is used.
457*632d0f97SHong Zhang 
458*632d0f97SHong Zhang */
459*632d0f97SHong Zhang static int MatIncreaseOverlap_MPISBAIJ_Receive(Mat C,int nrqr,int **rbuf,int **xdata,int * isz1)
460*632d0f97SHong Zhang {
461*632d0f97SHong Zhang   Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data;
462*632d0f97SHong Zhang   Mat         A = c->A,B = c->B;
463*632d0f97SHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
464*632d0f97SHong Zhang   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)B->data;
465*632d0f97SHong Zhang   int         rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k;
466*632d0f97SHong Zhang   int         row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end;
467*632d0f97SHong Zhang   int         val,max1,max2,rank,Mbs,no_malloc =0,*tmp,new_estimate,ctr;
468*632d0f97SHong Zhang   int         *rbuf_i,kmax,rbuf_0,ierr;
469*632d0f97SHong Zhang   PetscBT     xtable;
470*632d0f97SHong Zhang 
471*632d0f97SHong Zhang   PetscFunctionBegin;
472*632d0f97SHong Zhang   rank   = c->rank;
473*632d0f97SHong Zhang   Mbs    = c->Mbs;
474*632d0f97SHong Zhang   rstart = c->rstart;
475*632d0f97SHong Zhang   cstart = c->cstart;
476*632d0f97SHong Zhang   ai     = a->i;
477*632d0f97SHong Zhang   aj     = a->j;
478*632d0f97SHong Zhang   bi     = b->i;
479*632d0f97SHong Zhang   bj     = b->j;
480*632d0f97SHong Zhang   garray = c->garray;
481*632d0f97SHong Zhang 
482*632d0f97SHong Zhang 
483*632d0f97SHong Zhang   for (i=0,ct=0,total_sz=0; i<nrqr; ++i) {
484*632d0f97SHong Zhang     rbuf_i  =  rbuf[i];
485*632d0f97SHong Zhang     rbuf_0  =  rbuf_i[0];
486*632d0f97SHong Zhang     ct     += rbuf_0;
487*632d0f97SHong Zhang     for (j=1; j<=rbuf_0; j++) { total_sz += rbuf_i[2*j]; }
488*632d0f97SHong Zhang   }
489*632d0f97SHong Zhang 
490*632d0f97SHong Zhang   if (c->Mbs) max1 = ct*(a->nz +b->nz)/c->Mbs;
491*632d0f97SHong Zhang   else        max1 = 1;
492*632d0f97SHong Zhang   mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1);
493*632d0f97SHong Zhang   ierr         = PetscMalloc(mem_estimate*sizeof(int),&xdata[0]);CHKERRQ(ierr);
494*632d0f97SHong Zhang   ++no_malloc;
495*632d0f97SHong Zhang   ierr         = PetscBTCreate(Mbs,xtable);CHKERRQ(ierr);
496*632d0f97SHong Zhang   ierr         = PetscMemzero(isz1,nrqr*sizeof(int));CHKERRQ(ierr);
497*632d0f97SHong Zhang 
498*632d0f97SHong Zhang   ct3 = 0;
499*632d0f97SHong Zhang   for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */
500*632d0f97SHong Zhang     rbuf_i =  rbuf[i];
501*632d0f97SHong Zhang     rbuf_0 =  rbuf_i[0];
502*632d0f97SHong Zhang     ct1    =  2*rbuf_0+1;
503*632d0f97SHong Zhang     ct2    =  ct1;
504*632d0f97SHong Zhang     ct3    += ct1;
505*632d0f97SHong Zhang     for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/
506*632d0f97SHong Zhang       ierr = PetscBTMemzero(Mbs,xtable);CHKERRQ(ierr);
507*632d0f97SHong Zhang       oct2 = ct2;
508*632d0f97SHong Zhang       kmax = rbuf_i[2*j];
509*632d0f97SHong Zhang       for (k=0; k<kmax; k++,ct1++) {
510*632d0f97SHong Zhang         row = rbuf_i[ct1];
511*632d0f97SHong Zhang         if (!PetscBTLookupSet(xtable,row)) {
512*632d0f97SHong Zhang           if (!(ct3 < mem_estimate)) {
513*632d0f97SHong Zhang             new_estimate = (int)(1.5*mem_estimate)+1;
514*632d0f97SHong Zhang             ierr = PetscMalloc(new_estimate * sizeof(int),&tmp);CHKERRQ(ierr);
515*632d0f97SHong Zhang             ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(int));CHKERRQ(ierr);
516*632d0f97SHong Zhang             ierr = PetscFree(xdata[0]);CHKERRQ(ierr);
517*632d0f97SHong Zhang             xdata[0]     = tmp;
518*632d0f97SHong Zhang             mem_estimate = new_estimate; ++no_malloc;
519*632d0f97SHong Zhang             for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
520*632d0f97SHong Zhang           }
521*632d0f97SHong Zhang           xdata[i][ct2++] = row;
522*632d0f97SHong Zhang           ct3++;
523*632d0f97SHong Zhang         }
524*632d0f97SHong Zhang       }
525*632d0f97SHong Zhang       for (k=oct2,max2=ct2; k<max2; k++)  {
526*632d0f97SHong Zhang         row   = xdata[i][k] - rstart;
527*632d0f97SHong Zhang         start = ai[row];
528*632d0f97SHong Zhang         end   = ai[row+1];
529*632d0f97SHong Zhang         for (l=start; l<end; l++) {
530*632d0f97SHong Zhang           val = aj[l] + cstart;
531*632d0f97SHong Zhang           if (!PetscBTLookupSet(xtable,val)) {
532*632d0f97SHong Zhang             if (!(ct3 < mem_estimate)) {
533*632d0f97SHong Zhang               new_estimate = (int)(1.5*mem_estimate)+1;
534*632d0f97SHong Zhang               ierr = PetscMalloc(new_estimate * sizeof(int),&tmp);CHKERRQ(ierr);
535*632d0f97SHong Zhang               ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(int));CHKERRQ(ierr);
536*632d0f97SHong Zhang               ierr = PetscFree(xdata[0]);CHKERRQ(ierr);
537*632d0f97SHong Zhang               xdata[0]     = tmp;
538*632d0f97SHong Zhang               mem_estimate = new_estimate; ++no_malloc;
539*632d0f97SHong Zhang               for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
540*632d0f97SHong Zhang             }
541*632d0f97SHong Zhang             xdata[i][ct2++] = val;
542*632d0f97SHong Zhang             ct3++;
543*632d0f97SHong Zhang           }
544*632d0f97SHong Zhang         }
545*632d0f97SHong Zhang         start = bi[row];
546*632d0f97SHong Zhang         end   = bi[row+1];
547*632d0f97SHong Zhang         for (l=start; l<end; l++) {
548*632d0f97SHong Zhang           val = garray[bj[l]];
549*632d0f97SHong Zhang           if (!PetscBTLookupSet(xtable,val)) {
550*632d0f97SHong Zhang             if (!(ct3 < mem_estimate)) {
551*632d0f97SHong Zhang               new_estimate = (int)(1.5*mem_estimate)+1;
552*632d0f97SHong Zhang               ierr = PetscMalloc(new_estimate * sizeof(int),&tmp);CHKERRQ(ierr);
553*632d0f97SHong Zhang               ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(int));CHKERRQ(ierr);
554*632d0f97SHong Zhang               ierr = PetscFree(xdata[0]);CHKERRQ(ierr);
555*632d0f97SHong Zhang               xdata[0]     = tmp;
556*632d0f97SHong Zhang               mem_estimate = new_estimate; ++no_malloc;
557*632d0f97SHong Zhang               for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
558*632d0f97SHong Zhang             }
559*632d0f97SHong Zhang             xdata[i][ct2++] = val;
560*632d0f97SHong Zhang             ct3++;
561*632d0f97SHong Zhang           }
562*632d0f97SHong Zhang         }
563*632d0f97SHong Zhang       }
564*632d0f97SHong Zhang       /* Update the header*/
565*632d0f97SHong Zhang       xdata[i][2*j]   = ct2 - oct2; /* Undo the vector isz1 and use only a var*/
566*632d0f97SHong Zhang       xdata[i][2*j-1] = rbuf_i[2*j-1];
567*632d0f97SHong Zhang     }
568*632d0f97SHong Zhang     xdata[i][0] = rbuf_0;
569*632d0f97SHong Zhang     xdata[i+1]  = xdata[i] + ct2;
570*632d0f97SHong Zhang     isz1[i]     = ct2; /* size of each message */
571*632d0f97SHong Zhang   }
572*632d0f97SHong Zhang   ierr = PetscBTDestroy(xtable);CHKERRQ(ierr);
573*632d0f97SHong Zhang   PetscLogInfo(0,"MatIncreaseOverlap_MPISBAIJ:[%d] Allocated %d bytes, required %d, no of mallocs = %d\n",rank,mem_estimate,ct3,no_malloc);
574*632d0f97SHong Zhang   PetscFunctionReturn(0);
575*632d0f97SHong Zhang }
576*632d0f97SHong Zhang 
577*632d0f97SHong Zhang 
578