xref: /petsc/src/mat/impls/sbaij/mpi/sbaijov.c (revision c910923d57409294824c8bf37fdd74922bbb2b1c)
1632d0f97SHong Zhang /*$Id: sbaijov.c,v 1.65 2001/08/06 21:15:42 bsmith Exp $*/
2632d0f97SHong Zhang 
3632d0f97SHong Zhang /*
4632d0f97SHong Zhang    Routines to compute overlapping regions of a parallel MPI matrix.
5632d0f97SHong Zhang    Used for finding submatrices that were shared across processors.
6632d0f97SHong Zhang */
7632d0f97SHong Zhang #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
8632d0f97SHong Zhang #include "petscbt.h"
9632d0f97SHong Zhang 
10632d0f97SHong Zhang static int MatIncreaseOverlap_MPISBAIJ_Once(Mat,int,IS *);
11632d0f97SHong Zhang static int MatIncreaseOverlap_MPISBAIJ_Local(Mat,int,char **,int*,int**);
12632d0f97SHong Zhang static int MatIncreaseOverlap_MPISBAIJ_Receive(Mat,int,int **,int**,int*);
13632d0f97SHong Zhang 
14632d0f97SHong Zhang /* this function is sasme as MatCompressIndicesGeneral_MPIBAIJ -- should be removed! */
15632d0f97SHong Zhang #undef __FUNCT__
16632d0f97SHong Zhang #define __FUNCT__ "MatCompressIndicesGeneral_MPISBAIJ"
17632d0f97SHong Zhang static int MatCompressIndicesGeneral_MPISBAIJ(Mat C,int imax,const IS is_in[],IS is_out[])
18632d0f97SHong Zhang {
19632d0f97SHong Zhang   Mat_MPISBAIJ        *baij = (Mat_MPISBAIJ*)C->data;
20632d0f97SHong Zhang   int                ierr,isz,bs = baij->bs,n,i,j,*idx,ival;
21632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
22632d0f97SHong Zhang   PetscTable         gid1_lid1;
23632d0f97SHong Zhang   int                tt, gid1, *nidx;
24632d0f97SHong Zhang   PetscTablePosition tpos;
25632d0f97SHong Zhang #else
26632d0f97SHong Zhang   int                Nbs,*nidx;
27632d0f97SHong Zhang   PetscBT            table;
28632d0f97SHong Zhang #endif
29632d0f97SHong Zhang 
30632d0f97SHong Zhang   PetscFunctionBegin;
31632d0f97SHong Zhang   /* printf(" ...MatCompressIndicesGeneral_MPISBAIJ is called ...\n"); */
32632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
33632d0f97SHong Zhang   ierr = PetscTableCreate(baij->mbs,&gid1_lid1);CHKERRQ(ierr);
34632d0f97SHong Zhang #else
35632d0f97SHong Zhang   Nbs  = baij->Nbs;
36632d0f97SHong Zhang   ierr = PetscMalloc((Nbs+1)*sizeof(int),&nidx);CHKERRQ(ierr);
37632d0f97SHong Zhang   ierr = PetscBTCreate(Nbs,table);CHKERRQ(ierr);
38632d0f97SHong Zhang #endif
39632d0f97SHong Zhang   for (i=0; i<imax; i++) {
40632d0f97SHong Zhang     isz  = 0;
41632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
42632d0f97SHong Zhang     ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr);
43632d0f97SHong Zhang #else
44632d0f97SHong Zhang     ierr = PetscBTMemzero(Nbs,table);CHKERRQ(ierr);
45632d0f97SHong Zhang #endif
46632d0f97SHong Zhang     ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr);
47632d0f97SHong Zhang     ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr);
48632d0f97SHong Zhang     for (j=0; j<n ; j++) {
49632d0f97SHong Zhang       ival = idx[j]/bs; /* convert the indices into block indices */
50632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
51632d0f97SHong Zhang       ierr = PetscTableFind(gid1_lid1,ival+1,&tt);CHKERRQ(ierr);
52632d0f97SHong Zhang       if (!tt) {
53632d0f97SHong Zhang 	ierr = PetscTableAdd(gid1_lid1,ival+1,isz+1);CHKERRQ(ierr);
54632d0f97SHong Zhang         isz++;
55632d0f97SHong Zhang       }
56632d0f97SHong Zhang #else
57632d0f97SHong Zhang       if (ival>Nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than mat-dim");
58632d0f97SHong Zhang       if(!PetscBTLookupSet(table,ival)) { nidx[isz++] = ival;}
59632d0f97SHong Zhang #endif
60632d0f97SHong Zhang     }
61632d0f97SHong Zhang     ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr);
62632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
63632d0f97SHong Zhang     ierr = PetscMalloc((isz+1)*sizeof(int),&nidx);CHKERRQ(ierr);
64632d0f97SHong Zhang     ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr);
65632d0f97SHong Zhang     j = 0;
66632d0f97SHong Zhang     while (tpos) {
67632d0f97SHong Zhang       ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid1,&tt);CHKERRQ(ierr);
68632d0f97SHong Zhang       if (tt-- > isz) { SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"index greater than array-dim"); }
69632d0f97SHong Zhang       nidx[tt] = gid1 - 1;
70632d0f97SHong Zhang       j++;
71632d0f97SHong Zhang     }
72632d0f97SHong Zhang     if (j != isz) { SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"table error: jj != isz"); }
73632d0f97SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,(is_out+i));CHKERRQ(ierr);
74632d0f97SHong Zhang     ierr = PetscFree(nidx);CHKERRQ(ierr);
75632d0f97SHong Zhang #else
76632d0f97SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz,nidx,(is_out+i));CHKERRQ(ierr);
77632d0f97SHong Zhang #endif
78632d0f97SHong Zhang   }
79632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
80632d0f97SHong Zhang   ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr);
81632d0f97SHong Zhang #else
82632d0f97SHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
83632d0f97SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
84632d0f97SHong Zhang #endif
85632d0f97SHong Zhang   PetscFunctionReturn(0);
86632d0f97SHong Zhang }
87632d0f97SHong Zhang 
88632d0f97SHong Zhang #undef __FUNCT__
89632d0f97SHong Zhang #define __FUNCT__ "MatCompressIndicesSorted_MPISBAIJ"
90632d0f97SHong Zhang static int MatCompressIndicesSorted_MPISBAIJ(Mat C,int imax,const IS is_in[],IS is_out[])
91632d0f97SHong Zhang {
92632d0f97SHong Zhang   Mat_MPISBAIJ  *baij = (Mat_MPISBAIJ*)C->data;
93632d0f97SHong Zhang   int          ierr,bs=baij->bs,i,j,k,val,n,*idx,*nidx,*idx_local;
94632d0f97SHong Zhang   PetscTruth   flg;
95632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
96632d0f97SHong Zhang   int maxsz;
97632d0f97SHong Zhang #else
98632d0f97SHong Zhang   int Nbs=baij->Nbs;
99632d0f97SHong Zhang #endif
100632d0f97SHong Zhang   PetscFunctionBegin;
101632d0f97SHong Zhang   printf(" ... MatCompressIndicesSorted_MPISBAIJ is called ...\n");
102632d0f97SHong Zhang   for (i=0; i<imax; i++) {
103632d0f97SHong Zhang     ierr = ISSorted(is_in[i],&flg);CHKERRQ(ierr);
104632d0f97SHong Zhang     if (!flg) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Indices are not sorted");
105632d0f97SHong Zhang   }
106632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
107632d0f97SHong Zhang   /* Now check max size */
108632d0f97SHong Zhang   for (i=0,maxsz=0; i<imax; i++) {
109632d0f97SHong Zhang     ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr);
110632d0f97SHong Zhang     ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr);
111632d0f97SHong Zhang     if (n%bs !=0) SETERRQ(1,"Indices are not block ordered");
112632d0f97SHong Zhang     n = n/bs; /* The reduced index size */
113632d0f97SHong Zhang     if (n > maxsz) maxsz = n;
114632d0f97SHong Zhang   }
115632d0f97SHong Zhang   ierr = PetscMalloc((maxsz+1)*sizeof(int),&nidx);CHKERRQ(ierr);
116632d0f97SHong Zhang #else
117632d0f97SHong Zhang   ierr = PetscMalloc((Nbs+1)*sizeof(int),&nidx);CHKERRQ(ierr);
118632d0f97SHong Zhang #endif
119632d0f97SHong Zhang   /* Now check if the indices are in block order */
120632d0f97SHong Zhang   for (i=0; i<imax; i++) {
121632d0f97SHong Zhang     ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr);
122632d0f97SHong Zhang     ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr);
123632d0f97SHong Zhang     if (n%bs !=0) SETERRQ(1,"Indices are not block ordered");
124632d0f97SHong Zhang 
125632d0f97SHong Zhang     n = n/bs; /* The reduced index size */
126632d0f97SHong Zhang     idx_local = idx;
127632d0f97SHong Zhang     for (j=0; j<n ; j++) {
128632d0f97SHong Zhang       val = idx_local[0];
129632d0f97SHong Zhang       if (val%bs != 0) SETERRQ(1,"Indices are not block ordered");
130632d0f97SHong Zhang       for (k=0; k<bs; k++) {
131632d0f97SHong Zhang         if (val+k != idx_local[k]) SETERRQ(1,"Indices are not block ordered");
132632d0f97SHong Zhang       }
133632d0f97SHong Zhang       nidx[j] = val/bs;
134632d0f97SHong Zhang       idx_local +=bs;
135632d0f97SHong Zhang     }
136632d0f97SHong Zhang     ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr);
137632d0f97SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,n,nidx,(is_out+i));CHKERRQ(ierr);
138632d0f97SHong Zhang   }
139632d0f97SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
140632d0f97SHong Zhang 
141632d0f97SHong Zhang   PetscFunctionReturn(0);
142632d0f97SHong Zhang }
143632d0f97SHong Zhang 
144632d0f97SHong Zhang #undef __FUNCT__
145632d0f97SHong Zhang #define __FUNCT__ "MatExpandIndices_MPISBAIJ"
146632d0f97SHong Zhang static int MatExpandIndices_MPISBAIJ(Mat C,int imax,const IS is_in[],IS is_out[])
147632d0f97SHong Zhang {
148632d0f97SHong Zhang   Mat_MPISBAIJ  *baij = (Mat_MPISBAIJ*)C->data;
149632d0f97SHong Zhang   int          ierr,bs = baij->bs,n,i,j,k,*idx,*nidx;
150632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
151632d0f97SHong Zhang   int          maxsz;
152632d0f97SHong Zhang #else
153632d0f97SHong Zhang   int          Nbs = baij->Nbs;
154632d0f97SHong Zhang #endif
155632d0f97SHong Zhang 
156632d0f97SHong Zhang   PetscFunctionBegin;
157632d0f97SHong Zhang   /* printf(" ... MatExpandIndices_MPISBAIJ is called ...\n"); */
158632d0f97SHong Zhang #if defined (PETSC_USE_CTABLE)
159632d0f97SHong Zhang   /* Now check max size */
160632d0f97SHong Zhang   for (i=0,maxsz=0; i<imax; i++) {
161632d0f97SHong Zhang     ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr);
162632d0f97SHong Zhang     ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr);
163632d0f97SHong Zhang     if (n*bs > maxsz) maxsz = n*bs;
164632d0f97SHong Zhang   }
165632d0f97SHong Zhang   ierr = PetscMalloc((maxsz+1)*sizeof(int),&nidx);CHKERRQ(ierr);
166632d0f97SHong Zhang #else
167632d0f97SHong Zhang   ierr = PetscMalloc((Nbs*bs+1)*sizeof(int),&nidx);CHKERRQ(ierr);
168632d0f97SHong Zhang #endif
169632d0f97SHong Zhang 
170632d0f97SHong Zhang   for (i=0; i<imax; i++) {
171632d0f97SHong Zhang     ierr = ISGetIndices(is_in[i],&idx);CHKERRQ(ierr);
172632d0f97SHong Zhang     ierr = ISGetLocalSize(is_in[i],&n);CHKERRQ(ierr);
173632d0f97SHong Zhang     for (j=0; j<n ; ++j){
174632d0f97SHong Zhang       for (k=0; k<bs; k++)
175632d0f97SHong Zhang         nidx[j*bs+k] = idx[j]*bs+k;
176632d0f97SHong Zhang     }
177632d0f97SHong Zhang     ierr = ISRestoreIndices(is_in[i],&idx);CHKERRQ(ierr);
178632d0f97SHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,n*bs,nidx,is_out+i);CHKERRQ(ierr);
179632d0f97SHong Zhang   }
180632d0f97SHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
181632d0f97SHong Zhang   PetscFunctionReturn(0);
182632d0f97SHong Zhang }
183632d0f97SHong Zhang 
184632d0f97SHong Zhang 
185632d0f97SHong Zhang #undef __FUNCT__
186632d0f97SHong Zhang #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ"
187*c910923dSHong Zhang int MatIncreaseOverlap_MPISBAIJ(Mat C,int is_max,IS is[],int ov)
188632d0f97SHong Zhang {
189632d0f97SHong Zhang   int i,ierr;
190632d0f97SHong Zhang   IS  *is_new;
191632d0f97SHong Zhang 
192632d0f97SHong Zhang   PetscFunctionBegin;
193*c910923dSHong Zhang   ierr = PetscMalloc(is_max*sizeof(IS),&is_new);CHKERRQ(ierr);
194632d0f97SHong Zhang   /* Convert the indices into block format */
195*c910923dSHong Zhang   ierr = MatCompressIndicesGeneral_MPISBAIJ(C,is_max,is,is_new);CHKERRQ(ierr);
196632d0f97SHong Zhang   if (ov < 0){ SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n");}
197632d0f97SHong Zhang   for (i=0; i<ov; ++i) {
198*c910923dSHong Zhang     ierr = MatIncreaseOverlap_MPISBAIJ_Once(C,is_max,is_new);CHKERRQ(ierr);
199632d0f97SHong Zhang   }
200*c910923dSHong Zhang   for (i=0; i<is_max; i++) {ierr = ISDestroy(is[i]);CHKERRQ(ierr);}
201*c910923dSHong Zhang   ierr = MatExpandIndices_MPISBAIJ(C,is_max,is_new,is);CHKERRQ(ierr);
202*c910923dSHong Zhang   for (i=0; i<is_max; i++) {ierr = ISDestroy(is_new[i]);CHKERRQ(ierr);}
203632d0f97SHong Zhang   ierr = PetscFree(is_new);CHKERRQ(ierr);
204632d0f97SHong Zhang   PetscFunctionReturn(0);
205632d0f97SHong Zhang }
206632d0f97SHong Zhang 
207632d0f97SHong Zhang #undef __FUNCT__
208632d0f97SHong Zhang #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Once"
209*c910923dSHong Zhang static int MatIncreaseOverlap_MPISBAIJ_Once(Mat C,int is_max,IS is[])
210632d0f97SHong Zhang {
211632d0f97SHong Zhang   Mat_MPISBAIJ  *c = (Mat_MPISBAIJ*)C->data;
212632d0f97SHong Zhang   int         **idx,*n,len,*idx_i;
213*c910923dSHong Zhang   int         size,rank,Mbs,i,j,k,ierr,**rbuf,nrqs,msz,*outdat,*indat;
214632d0f97SHong Zhang   int         *onodes1,*olengths1,tag1,tag2,*onodes2,*olengths2,flag,proc_id;
215632d0f97SHong Zhang   MPI_Comm    comm;
216*c910923dSHong Zhang   MPI_Request *s_waits1,*s_waits2,r_req;
217632d0f97SHong Zhang   MPI_Status  *s_status,r_status;
218632d0f97SHong Zhang 
219632d0f97SHong Zhang   PetscFunctionBegin;
220632d0f97SHong Zhang 
221632d0f97SHong Zhang   comm = C->comm;
222632d0f97SHong Zhang   size = c->size;
223632d0f97SHong Zhang   rank = c->rank;
224632d0f97SHong Zhang   Mbs  = c->Mbs;
225632d0f97SHong Zhang 
226632d0f97SHong Zhang   /* 1. Send is[] to all other processors */
227632d0f97SHong Zhang   /*--------------------------------------*/
228632d0f97SHong Zhang   /* This processor sends its is[] to all other processors in the format:
229632d0f97SHong Zhang        outdat[0]          = is_max, no of is in this processor
230632d0f97SHong Zhang        outdat[1]          = n[0], size of is[0]
231632d0f97SHong Zhang         ...
232632d0f97SHong Zhang        outdat[is_max]     = n[is_max-1], size of is[is_max-1]
233632d0f97SHong Zhang        outdat[is_max + 1] = data(is[0])
234632d0f97SHong Zhang         ...
235632d0f97SHong Zhang        outdat[is_max + i] = data(is[i])
236632d0f97SHong Zhang         ...
237632d0f97SHong Zhang   */
238*c910923dSHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr);
239*c910923dSHong Zhang   ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr);
240*c910923dSHong Zhang   printf(" [%d] tags: %d, %d\n",rank,tag1,tag2);
241*c910923dSHong Zhang 
242*c910923dSHong Zhang   len  = (is_max+1)*sizeof(int*)+ (is_max)*sizeof(int);
243632d0f97SHong Zhang   ierr = PetscMalloc(len,&idx);CHKERRQ(ierr);
244*c910923dSHong Zhang   n    = (int*)(idx + is_max);
245632d0f97SHong Zhang 
246632d0f97SHong Zhang   /* Allocate Memory for outgoing messages */
247*c910923dSHong Zhang   len = 1 + is_max;
248*c910923dSHong Zhang   for (i=0; i<is_max; i++) {
249632d0f97SHong Zhang     ierr = ISGetIndices(is[i],&idx[i]);CHKERRQ(ierr);
250632d0f97SHong Zhang     ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr);
251632d0f97SHong Zhang     len += n[i];
252632d0f97SHong Zhang   }
253632d0f97SHong Zhang   ierr = PetscMalloc(len*sizeof(int),&outdat);CHKERRQ(ierr);
254632d0f97SHong Zhang 
255632d0f97SHong Zhang   /* Form the outgoing messages */
256*c910923dSHong Zhang   outdat[0] = is_max;
257*c910923dSHong Zhang   for (i=0; i<is_max; i++) {
258632d0f97SHong Zhang     outdat[i+1] = n[i];
259632d0f97SHong Zhang   }
260*c910923dSHong Zhang   k = is_max + 1;
261*c910923dSHong Zhang   for (i=0; i<is_max; i++) { /* for is[i] */
262632d0f97SHong Zhang     idx_i = idx[i];
263632d0f97SHong Zhang     for (j=0; j<n[i]; j++){
264632d0f97SHong Zhang       outdat[k] = *(idx_i);
265632d0f97SHong Zhang       /* if (!rank) printf(" outdat[%d] = %d\n",k,outdat[k] ); */
266632d0f97SHong Zhang       k++; idx_i++;
267632d0f97SHong Zhang     }
268632d0f97SHong Zhang     /* printf(" [%d] n[%d]=%d, k: %d, \n",rank,i,n[i],k); */
269632d0f97SHong Zhang   }
270632d0f97SHong Zhang   if (k != len) SETERRQ3(1,"[%d] Error on forming the outgoing messages: k %d != len %d",rank,k,len);
271632d0f97SHong Zhang 
272632d0f97SHong Zhang   /*  Now  post the sends */
273632d0f97SHong Zhang   ierr = PetscMalloc(size*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr);
274632d0f97SHong Zhang 
275632d0f97SHong Zhang   k = 0;
276*c910923dSHong Zhang   for (proc_id=0; proc_id<size; ++proc_id) { /* send outdat to processor [proc_id] */
277*c910923dSHong Zhang     if (proc_id != rank){
278*c910923dSHong Zhang       ierr = MPI_Isend(outdat,len,MPI_INT,proc_id,tag1,comm,s_waits1+k);CHKERRQ(ierr);
279*c910923dSHong Zhang       printf(" [%d] send %d msg to [%d] \n",rank,len,proc_id);
280632d0f97SHong Zhang       k++;
281632d0f97SHong Zhang     }
282632d0f97SHong Zhang   }
283632d0f97SHong Zhang 
284632d0f97SHong Zhang   /* 2. Do local work */
285632d0f97SHong Zhang   /*------------------*/
286*c910923dSHong Zhang   Mat          A = c->A, B = c->B;
287*c910923dSHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
288*c910923dSHong Zhang   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)B->data;
289*c910923dSHong Zhang   int          row,mbs, *nidx,*nidx_i,col,isz,isz0,*ai,*aj,bs,*bi,*bj,*garray,rstart,l;
290*c910923dSHong Zhang   int          a_start,a_end,b_start,b_end;
291*c910923dSHong Zhang   PetscBT      table;
292*c910923dSHong Zhang   PetscBT      table0;
293632d0f97SHong Zhang 
294*c910923dSHong Zhang   mbs    = a->mbs;
295*c910923dSHong Zhang   bs     = a->bs;
296*c910923dSHong Zhang   ai     = a->i;
297*c910923dSHong Zhang   aj     = a->j;
298*c910923dSHong Zhang   bi     = b->i;
299*c910923dSHong Zhang   bj     = b->j;
300*c910923dSHong Zhang   garray = c->garray;
301*c910923dSHong Zhang   rstart = c->rstart;
302*c910923dSHong Zhang 
303*c910923dSHong Zhang   ierr = PetscBTCreate(Mbs,table);CHKERRQ(ierr);
304*c910923dSHong Zhang   ierr = PetscMalloc(is_max*Mbs*sizeof(int),&nidx);CHKERRQ(ierr);
305*c910923dSHong Zhang   ierr = PetscBTCreate(Mbs,table0);CHKERRQ(ierr);
306*c910923dSHong Zhang 
307*c910923dSHong Zhang   for (i=0; i<is_max; i++) { /* for each is */
308*c910923dSHong Zhang     isz  = 0;
309*c910923dSHong Zhang     ierr = PetscBTMemzero(Mbs,table);CHKERRQ(ierr);
310*c910923dSHong Zhang     idx_i = idx[i];
311*c910923dSHong Zhang     nidx_i = nidx+i*Mbs;  /*  holds new is[i] array */
312*c910923dSHong Zhang 
313*c910923dSHong Zhang     /* Enter these into the temp arrays i.e mark table[row], enter row into new index */
314*c910923dSHong Zhang     for (j=0; j<n[i]; j++){
315*c910923dSHong Zhang       col = idx_i[j];
316*c910923dSHong Zhang       if (col >= Mbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"[%d] index col %d >= Mbs %d",rank,col,Mbs);
317*c910923dSHong Zhang       if(!PetscBTLookupSet(table,col)) { nidx_i[isz++] = col;}
318632d0f97SHong Zhang     }
319*c910923dSHong Zhang 
320*c910923dSHong Zhang     k = 0;
321*c910923dSHong Zhang     /* set table0 for lookup */
322*c910923dSHong Zhang     ierr = PetscBTMemzero(mbs,table0);CHKERRQ(ierr);
323*c910923dSHong Zhang     for (l=k; l<isz; l++) PetscBTSet(table0,nidx_i[l]);
324*c910923dSHong Zhang 
325*c910923dSHong Zhang     isz0 = isz; /* length of nidx_i[] before updating */
326*c910923dSHong Zhang     for (row=0; row<mbs; row++){
327*c910923dSHong Zhang       a_start = ai[row]; a_end = ai[row+1];
328*c910923dSHong Zhang       b_start = bi[row]; b_end = bi[row+1];
329*c910923dSHong Zhang       if (PetscBTLookup(table0,row+rstart)){ /* row is on nidx_i - row search: collect all col in this row */
330*c910923dSHong Zhang         /* printf(" [%d] is[%d] row %d is on nidx_i\n",rank,i,row+rstart); */
331*c910923dSHong Zhang         for (l = a_start; l<a_end ; l++){ /* Amat */
332*c910923dSHong Zhang           col = aj[l] + rstart;
333*c910923dSHong Zhang           if (!PetscBTLookupSet(table,col)) {nidx_i[isz++] = col;}
334*c910923dSHong Zhang         }
335*c910923dSHong Zhang         for (l = b_start; l<b_end ; l++){ /* Bmat */
336*c910923dSHong Zhang           col = garray[bj[l]];
337*c910923dSHong Zhang           if (!PetscBTLookupSet(table,col)) {nidx_i[isz++] = col;}
338*c910923dSHong Zhang         }
339*c910923dSHong Zhang         k++;
340*c910923dSHong Zhang         if (k >= isz0) break; /* for (row=0; row<mbs; row++) */
341*c910923dSHong Zhang       } else { /* row is not on nidx_i - col serach: add row onto nidx_i if there is a col in nidx_i */
342*c910923dSHong Zhang         for (l = a_start; l<a_end ; l++){ /* Amat */
343*c910923dSHong Zhang           col = aj[l] + rstart;
344*c910923dSHong Zhang           if (PetscBTLookup(table0,col)){
345*c910923dSHong Zhang             if (!PetscBTLookupSet(table,row+rstart)) {nidx_i[isz++] = row+rstart;}
346*c910923dSHong Zhang             break; /* for l = start; l<end ; l++) */
347*c910923dSHong Zhang           }
348*c910923dSHong Zhang         }
349*c910923dSHong Zhang         for (l = b_start; l<b_end ; l++){ /* Bmat */
350*c910923dSHong Zhang           col = garray[bj[l]];
351*c910923dSHong Zhang           if (PetscBTLookup(table0,col)){
352*c910923dSHong Zhang             if (!PetscBTLookupSet(table,row+rstart)) {nidx_i[isz++] = row+rstart;}
353*c910923dSHong Zhang             break; /* for l = start; l<end ; l++) */
354*c910923dSHong Zhang           }
355*c910923dSHong Zhang         }
356*c910923dSHong Zhang       }
357*c910923dSHong Zhang     } /* for (row=0; row<mbs; row++) */
358*c910923dSHong Zhang 
359*c910923dSHong Zhang     ierr = ISRestoreIndices(is[i],idx+i);CHKERRQ(ierr);
360*c910923dSHong Zhang     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
361*c910923dSHong Zhang     n[i] = isz;
362*c910923dSHong Zhang   } /* /* for each is */
363*c910923dSHong Zhang   ierr = PetscBTDestroy(table);CHKERRQ(ierr);
364*c910923dSHong Zhang   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
365*c910923dSHong Zhang 
366632d0f97SHong Zhang 
367632d0f97SHong Zhang   /* 3. Receive other's is[] and process. Then send back */
368632d0f97SHong Zhang   /*----------------------------------------------------*/
369632d0f97SHong Zhang   /* Send is done */
370632d0f97SHong Zhang   nrqs = size-1;
371632d0f97SHong Zhang   ierr = PetscMalloc(size*sizeof(MPI_Status),&s_status);CHKERRQ(ierr);
372632d0f97SHong Zhang   ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr);
373632d0f97SHong Zhang   ierr = PetscFree(outdat);CHKERRQ(ierr);
374632d0f97SHong Zhang   ierr = PetscMalloc(size*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr);
375632d0f97SHong Zhang   k = 0;
376632d0f97SHong Zhang   do {
377632d0f97SHong Zhang     /* Receive messages */
378*c910923dSHong Zhang     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status);
379632d0f97SHong Zhang     if (flag){
380632d0f97SHong Zhang       ierr = MPI_Get_count(&r_status,MPI_INT,&len);
381632d0f97SHong Zhang       proc_id = r_status.MPI_SOURCE;
382*c910923dSHong Zhang       ierr = PetscMalloc(len*sizeof(int),&indat);CHKERRQ(ierr);
383*c910923dSHong Zhang       ierr = MPI_Irecv(indat,len,MPI_INT,proc_id,r_status.MPI_TAG,comm,&r_req);
384632d0f97SHong Zhang       printf(" [%d] recv %d msg from [%d]\n",rank,len,proc_id);
385632d0f97SHong Zhang 
386632d0f97SHong Zhang       /*  Process messages -- not done yet */
387632d0f97SHong Zhang       len = indat[0];
388632d0f97SHong Zhang       ierr = PetscMalloc(len*sizeof(int),&outdat);CHKERRQ(ierr);
389632d0f97SHong Zhang       for (i=0; i<len; i++){outdat[i] = indat[i+1];}
390632d0f97SHong Zhang 
391632d0f97SHong Zhang       /* Send messages back */
392632d0f97SHong Zhang       printf(" [%d] send %d msg back to [%d] \n",rank,len,proc_id);
393*c910923dSHong Zhang       ierr = MPI_Isend(outdat,len,MPI_INT,proc_id,tag2,comm,&s_waits2[k]);CHKERRQ(ierr);
394632d0f97SHong Zhang 
395632d0f97SHong Zhang       k++;
396632d0f97SHong Zhang       ierr = PetscFree(outdat);CHKERRQ(ierr);
397632d0f97SHong Zhang       ierr = PetscFree(indat);CHKERRQ(ierr);
398632d0f97SHong Zhang     }
399632d0f97SHong Zhang   } while (k < nrqs);
400632d0f97SHong Zhang 
401632d0f97SHong Zhang   /* 4. Receive work done on other processors, then process */
402632d0f97SHong Zhang   /*--------------------------------------------------------*/
403632d0f97SHong Zhang   ierr = MPI_Waitall(nrqs,s_waits2,s_status);CHKERRQ(ierr);
404632d0f97SHong Zhang   k = 0;
405632d0f97SHong Zhang   do {
406632d0f97SHong Zhang     /* Receive messages */
407*c910923dSHong Zhang     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag2,comm,&flag,&r_status);
408632d0f97SHong Zhang     if (flag){
409632d0f97SHong Zhang       ierr = MPI_Get_count(&r_status,MPI_INT,&len);
410632d0f97SHong Zhang       proc_id = r_status.MPI_SOURCE;
411632d0f97SHong Zhang       ierr = ierr = PetscMalloc(len*sizeof(int),&indat);CHKERRQ(ierr);
412*c910923dSHong Zhang       ierr = MPI_Irecv(indat,len,MPI_INT,proc_id,r_status.MPI_TAG,comm,&r_req);
413632d0f97SHong Zhang       printf(" [%d] recv %d msg from [%d]\n",rank,len,proc_id);
414632d0f97SHong Zhang 
415632d0f97SHong Zhang       /*  Process messages -- not done yet */
416632d0f97SHong Zhang 
417632d0f97SHong Zhang 
418632d0f97SHong Zhang       k++;
419632d0f97SHong Zhang       ierr = PetscFree(indat);CHKERRQ(ierr);
420632d0f97SHong Zhang     }
421632d0f97SHong Zhang   } while (k < nrqs);
422632d0f97SHong Zhang 
423*c910923dSHong Zhang   /* 5. Create new is[] */
424*c910923dSHong Zhang   /*--------------------*/
425*c910923dSHong Zhang   for (i=0; i<is_max; i++) {
426*c910923dSHong Zhang     nidx_i = nidx+i*Mbs;
427*c910923dSHong Zhang     ierr = ISCreateGeneral(PETSC_COMM_SELF,n[i],nidx_i,is+i);CHKERRQ(ierr);
428632d0f97SHong Zhang   }
429*c910923dSHong Zhang   ierr = PetscFree(nidx);CHKERRQ(ierr);
430632d0f97SHong Zhang 
431*c910923dSHong Zhang #ifdef OLD
432632d0f97SHong Zhang   ierr = PetscFree(onodes2);CHKERRQ(ierr);
433632d0f97SHong Zhang   ierr = PetscFree(olengths2);CHKERRQ(ierr);
434632d0f97SHong Zhang   ierr = PetscFree(rbuf2);CHKERRQ(ierr);
435*c910923dSHong Zhang 
436632d0f97SHong Zhang   ierr = PetscFree(table);CHKERRQ(ierr);
437632d0f97SHong Zhang   ierr = PetscFree(s_status);CHKERRQ(ierr);
438632d0f97SHong Zhang   ierr = PetscFree(recv_status);CHKERRQ(ierr);
439632d0f97SHong Zhang   ierr = PetscFree(xdata[0]);CHKERRQ(ierr);
440632d0f97SHong Zhang   ierr = PetscFree(xdata);CHKERRQ(ierr);
441632d0f97SHong Zhang   ierr = PetscFree(isz1);CHKERRQ(ierr);
442632d0f97SHong Zhang #endif /* OLD */
443*c910923dSHong Zhang   ierr = PetscFree(idx);CHKERRQ(ierr);
444632d0f97SHong Zhang   ierr = PetscFree(s_waits1);CHKERRQ(ierr);
445632d0f97SHong Zhang   ierr = PetscFree(s_waits2);CHKERRQ(ierr);
446632d0f97SHong Zhang   ierr = PetscFree(s_status);CHKERRQ(ierr);
447632d0f97SHong Zhang   PetscFunctionReturn(0);
448632d0f97SHong Zhang }
449632d0f97SHong Zhang 
450632d0f97SHong Zhang #undef __FUNCT__
451632d0f97SHong Zhang #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Local"
452632d0f97SHong Zhang /*
453632d0f97SHong Zhang    MatIncreaseOverlap_MPISBAIJ_Local - Called by MatincreaseOverlap, to do
454632d0f97SHong Zhang        the work on the local processor.
455632d0f97SHong Zhang 
456632d0f97SHong Zhang      Inputs:
457632d0f97SHong Zhang       C      - MAT_MPISBAIJ;
458632d0f97SHong Zhang       imax - total no of index sets processed at a time;
459632d0f97SHong Zhang       table  - an array of char - size = Mbs bits.
460632d0f97SHong Zhang 
461632d0f97SHong Zhang      Output:
462632d0f97SHong Zhang       isz    - array containing the count of the solution elements correspondign
463632d0f97SHong Zhang                to each index set;
464632d0f97SHong Zhang       data   - pointer to the solutions
465632d0f97SHong Zhang */
466632d0f97SHong Zhang static int MatIncreaseOverlap_MPISBAIJ_Local(Mat C,int imax,PetscBT *table,int *isz,int **data)
467632d0f97SHong Zhang {
468632d0f97SHong Zhang   Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data;
469632d0f97SHong Zhang   Mat         A = c->A,B = c->B;
470632d0f97SHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
471632d0f97SHong Zhang   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)B->data;
472632d0f97SHong Zhang   int         start,end,val,max,rstart,cstart,*ai,*aj;
473632d0f97SHong Zhang   int         *bi,*bj,*garray,i,j,k,row,*data_i,isz_i;
474632d0f97SHong Zhang   PetscBT     table_i;
475632d0f97SHong Zhang 
476632d0f97SHong Zhang   PetscFunctionBegin;
477632d0f97SHong Zhang   rstart = c->rstart;
478632d0f97SHong Zhang   cstart = c->cstart;
479632d0f97SHong Zhang   ai     = a->i;
480632d0f97SHong Zhang   aj     = a->j;
481632d0f97SHong Zhang   bi     = b->i;
482632d0f97SHong Zhang   bj     = b->j;
483632d0f97SHong Zhang   garray = c->garray;
484632d0f97SHong Zhang 
485632d0f97SHong Zhang 
486632d0f97SHong Zhang   for (i=0; i<imax; i++) {
487632d0f97SHong Zhang     data_i  = data[i];
488632d0f97SHong Zhang     table_i = table[i];
489632d0f97SHong Zhang     isz_i   = isz[i];
490632d0f97SHong Zhang     for (j=0,max=isz[i]; j<max; j++) {
491632d0f97SHong Zhang       row   = data_i[j] - rstart;
492632d0f97SHong Zhang       start = ai[row];
493632d0f97SHong Zhang       end   = ai[row+1];
494632d0f97SHong Zhang       for (k=start; k<end; k++) { /* Amat */
495632d0f97SHong Zhang         val = aj[k] + cstart;
496632d0f97SHong Zhang         if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
497632d0f97SHong Zhang       }
498632d0f97SHong Zhang       start = bi[row];
499632d0f97SHong Zhang       end   = bi[row+1];
500632d0f97SHong Zhang       for (k=start; k<end; k++) { /* Bmat */
501632d0f97SHong Zhang         val = garray[bj[k]];
502632d0f97SHong Zhang         if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
503632d0f97SHong Zhang       }
504632d0f97SHong Zhang     }
505632d0f97SHong Zhang     isz[i] = isz_i;
506632d0f97SHong Zhang   }
507632d0f97SHong Zhang   PetscFunctionReturn(0);
508632d0f97SHong Zhang }
509632d0f97SHong Zhang #undef __FUNCT__
510632d0f97SHong Zhang #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Receive"
511632d0f97SHong Zhang /*
512632d0f97SHong Zhang       MatIncreaseOverlap_MPISBAIJ_Receive - Process the recieved messages,
513632d0f97SHong Zhang          and return the output
514632d0f97SHong Zhang 
515632d0f97SHong Zhang          Input:
516632d0f97SHong Zhang            C    - the matrix
517632d0f97SHong Zhang            nrqr - no of messages being processed.
518632d0f97SHong Zhang            rbuf - an array of pointers to the recieved requests
519632d0f97SHong Zhang 
520632d0f97SHong Zhang          Output:
521632d0f97SHong Zhang            xdata - array of messages to be sent back
522632d0f97SHong Zhang            isz1  - size of each message
523632d0f97SHong Zhang 
524632d0f97SHong Zhang   For better efficiency perhaps we should malloc seperately each xdata[i],
525632d0f97SHong Zhang then if a remalloc is required we need only copy the data for that one row
526632d0f97SHong Zhang rather then all previous rows as it is now where a single large chunck of
527632d0f97SHong Zhang memory is used.
528632d0f97SHong Zhang 
529632d0f97SHong Zhang */
530632d0f97SHong Zhang static int MatIncreaseOverlap_MPISBAIJ_Receive(Mat C,int nrqr,int **rbuf,int **xdata,int * isz1)
531632d0f97SHong Zhang {
532632d0f97SHong Zhang   Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data;
533632d0f97SHong Zhang   Mat         A = c->A,B = c->B;
534632d0f97SHong Zhang   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)A->data;
535632d0f97SHong Zhang   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)B->data;
536632d0f97SHong Zhang   int         rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k;
537632d0f97SHong Zhang   int         row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end;
538632d0f97SHong Zhang   int         val,max1,max2,rank,Mbs,no_malloc =0,*tmp,new_estimate,ctr;
539632d0f97SHong Zhang   int         *rbuf_i,kmax,rbuf_0,ierr;
540632d0f97SHong Zhang   PetscBT     xtable;
541632d0f97SHong Zhang 
542632d0f97SHong Zhang   PetscFunctionBegin;
543632d0f97SHong Zhang   rank   = c->rank;
544632d0f97SHong Zhang   Mbs    = c->Mbs;
545632d0f97SHong Zhang   rstart = c->rstart;
546632d0f97SHong Zhang   cstart = c->cstart;
547632d0f97SHong Zhang   ai     = a->i;
548632d0f97SHong Zhang   aj     = a->j;
549632d0f97SHong Zhang   bi     = b->i;
550632d0f97SHong Zhang   bj     = b->j;
551632d0f97SHong Zhang   garray = c->garray;
552632d0f97SHong Zhang 
553632d0f97SHong Zhang 
554632d0f97SHong Zhang   for (i=0,ct=0,total_sz=0; i<nrqr; ++i) {
555632d0f97SHong Zhang     rbuf_i  =  rbuf[i];
556632d0f97SHong Zhang     rbuf_0  =  rbuf_i[0];
557632d0f97SHong Zhang     ct     += rbuf_0;
558632d0f97SHong Zhang     for (j=1; j<=rbuf_0; j++) { total_sz += rbuf_i[2*j]; }
559632d0f97SHong Zhang   }
560632d0f97SHong Zhang 
561632d0f97SHong Zhang   if (c->Mbs) max1 = ct*(a->nz +b->nz)/c->Mbs;
562632d0f97SHong Zhang   else        max1 = 1;
563632d0f97SHong Zhang   mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1);
564632d0f97SHong Zhang   ierr         = PetscMalloc(mem_estimate*sizeof(int),&xdata[0]);CHKERRQ(ierr);
565632d0f97SHong Zhang   ++no_malloc;
566632d0f97SHong Zhang   ierr         = PetscBTCreate(Mbs,xtable);CHKERRQ(ierr);
567632d0f97SHong Zhang   ierr         = PetscMemzero(isz1,nrqr*sizeof(int));CHKERRQ(ierr);
568632d0f97SHong Zhang 
569632d0f97SHong Zhang   ct3 = 0;
570632d0f97SHong Zhang   for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */
571632d0f97SHong Zhang     rbuf_i =  rbuf[i];
572632d0f97SHong Zhang     rbuf_0 =  rbuf_i[0];
573632d0f97SHong Zhang     ct1    =  2*rbuf_0+1;
574632d0f97SHong Zhang     ct2    =  ct1;
575632d0f97SHong Zhang     ct3    += ct1;
576632d0f97SHong Zhang     for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/
577632d0f97SHong Zhang       ierr = PetscBTMemzero(Mbs,xtable);CHKERRQ(ierr);
578632d0f97SHong Zhang       oct2 = ct2;
579632d0f97SHong Zhang       kmax = rbuf_i[2*j];
580632d0f97SHong Zhang       for (k=0; k<kmax; k++,ct1++) {
581632d0f97SHong Zhang         row = rbuf_i[ct1];
582632d0f97SHong Zhang         if (!PetscBTLookupSet(xtable,row)) {
583632d0f97SHong Zhang           if (!(ct3 < mem_estimate)) {
584632d0f97SHong Zhang             new_estimate = (int)(1.5*mem_estimate)+1;
585632d0f97SHong Zhang             ierr = PetscMalloc(new_estimate * sizeof(int),&tmp);CHKERRQ(ierr);
586632d0f97SHong Zhang             ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(int));CHKERRQ(ierr);
587632d0f97SHong Zhang             ierr = PetscFree(xdata[0]);CHKERRQ(ierr);
588632d0f97SHong Zhang             xdata[0]     = tmp;
589632d0f97SHong Zhang             mem_estimate = new_estimate; ++no_malloc;
590632d0f97SHong Zhang             for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
591632d0f97SHong Zhang           }
592632d0f97SHong Zhang           xdata[i][ct2++] = row;
593632d0f97SHong Zhang           ct3++;
594632d0f97SHong Zhang         }
595632d0f97SHong Zhang       }
596632d0f97SHong Zhang       for (k=oct2,max2=ct2; k<max2; k++)  {
597632d0f97SHong Zhang         row   = xdata[i][k] - rstart;
598632d0f97SHong Zhang         start = ai[row];
599632d0f97SHong Zhang         end   = ai[row+1];
600632d0f97SHong Zhang         for (l=start; l<end; l++) {
601632d0f97SHong Zhang           val = aj[l] + cstart;
602632d0f97SHong Zhang           if (!PetscBTLookupSet(xtable,val)) {
603632d0f97SHong Zhang             if (!(ct3 < mem_estimate)) {
604632d0f97SHong Zhang               new_estimate = (int)(1.5*mem_estimate)+1;
605632d0f97SHong Zhang               ierr = PetscMalloc(new_estimate * sizeof(int),&tmp);CHKERRQ(ierr);
606632d0f97SHong Zhang               ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(int));CHKERRQ(ierr);
607632d0f97SHong Zhang               ierr = PetscFree(xdata[0]);CHKERRQ(ierr);
608632d0f97SHong Zhang               xdata[0]     = tmp;
609632d0f97SHong Zhang               mem_estimate = new_estimate; ++no_malloc;
610632d0f97SHong Zhang               for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
611632d0f97SHong Zhang             }
612632d0f97SHong Zhang             xdata[i][ct2++] = val;
613632d0f97SHong Zhang             ct3++;
614632d0f97SHong Zhang           }
615632d0f97SHong Zhang         }
616632d0f97SHong Zhang         start = bi[row];
617632d0f97SHong Zhang         end   = bi[row+1];
618632d0f97SHong Zhang         for (l=start; l<end; l++) {
619632d0f97SHong Zhang           val = garray[bj[l]];
620632d0f97SHong Zhang           if (!PetscBTLookupSet(xtable,val)) {
621632d0f97SHong Zhang             if (!(ct3 < mem_estimate)) {
622632d0f97SHong Zhang               new_estimate = (int)(1.5*mem_estimate)+1;
623632d0f97SHong Zhang               ierr = PetscMalloc(new_estimate * sizeof(int),&tmp);CHKERRQ(ierr);
624632d0f97SHong Zhang               ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(int));CHKERRQ(ierr);
625632d0f97SHong Zhang               ierr = PetscFree(xdata[0]);CHKERRQ(ierr);
626632d0f97SHong Zhang               xdata[0]     = tmp;
627632d0f97SHong Zhang               mem_estimate = new_estimate; ++no_malloc;
628632d0f97SHong Zhang               for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
629632d0f97SHong Zhang             }
630632d0f97SHong Zhang             xdata[i][ct2++] = val;
631632d0f97SHong Zhang             ct3++;
632632d0f97SHong Zhang           }
633632d0f97SHong Zhang         }
634632d0f97SHong Zhang       }
635632d0f97SHong Zhang       /* Update the header*/
636632d0f97SHong Zhang       xdata[i][2*j]   = ct2 - oct2; /* Undo the vector isz1 and use only a var*/
637632d0f97SHong Zhang       xdata[i][2*j-1] = rbuf_i[2*j-1];
638632d0f97SHong Zhang     }
639632d0f97SHong Zhang     xdata[i][0] = rbuf_0;
640632d0f97SHong Zhang     xdata[i+1]  = xdata[i] + ct2;
641632d0f97SHong Zhang     isz1[i]     = ct2; /* size of each message */
642632d0f97SHong Zhang   }
643632d0f97SHong Zhang   ierr = PetscBTDestroy(xtable);CHKERRQ(ierr);
644632d0f97SHong Zhang   PetscLogInfo(0,"MatIncreaseOverlap_MPISBAIJ:[%d] Allocated %d bytes, required %d, no of mallocs = %d\n",rank,mem_estimate,ct3,no_malloc);
645632d0f97SHong Zhang   PetscFunctionReturn(0);
646632d0f97SHong Zhang }
647632d0f97SHong Zhang 
648632d0f97SHong Zhang 
649