xref: /petsc/src/mat/impls/baij/mpi/baijov.c (revision c07bf0741b2e8ac3bb29c3d024f5883d65444abc)
1 #define PETSCMAT_DLL
2 
3 
4 /*
5    Routines to compute overlapping regions of a parallel MPI matrix
6   and to find submatrices that were shared across processors.
7 */
8 #include "../src/mat/impls/baij/mpi/mpibaij.h"
9 #include "petscbt.h"
10 
11 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Once(Mat,PetscInt,IS *);
12 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Local(Mat,PetscInt,char **,PetscInt*,PetscInt**);
13 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Receive(Mat,PetscInt,PetscInt **,PetscInt**,PetscInt*);
14 EXTERN PetscErrorCode MatGetRow_MPIBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
15 EXTERN PetscErrorCode MatRestoreRow_MPIBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ"
19 PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat C,PetscInt imax,IS is[],PetscInt ov)
20 {
21   PetscErrorCode ierr;
22   PetscInt       i,N=C->cmap->N, bs=C->rmap->bs;
23   IS             *is_new;
24 
25   PetscFunctionBegin;
26   ierr = PetscMalloc(imax*sizeof(IS),&is_new);CHKERRQ(ierr);
27   /* Convert the indices into block format */
28   ierr = ISCompressIndicesGeneral(N,bs,imax,is,is_new);CHKERRQ(ierr);
29   if (ov < 0){ SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n");}
30   for (i=0; i<ov; ++i) {
31     ierr = MatIncreaseOverlap_MPIBAIJ_Once(C,imax,is_new);CHKERRQ(ierr);
32   }
33   for (i=0; i<imax; i++) {ierr = ISDestroy(is[i]);CHKERRQ(ierr);}
34   ierr = ISExpandIndicesGeneral(N,bs,imax,is_new,is);CHKERRQ(ierr);
35   for (i=0; i<imax; i++) {ierr = ISDestroy(is_new[i]);CHKERRQ(ierr);}
36   ierr = PetscFree(is_new);CHKERRQ(ierr);
37   PetscFunctionReturn(0);
38 }
39 
40 /*
41   Sample message format:
42   If a processor A wants processor B to process some elements corresponding
43   to index sets is[1], is[5]
44   mesg [0] = 2   (no of index sets in the mesg)
45   -----------
46   mesg [1] = 1 => is[1]
47   mesg [2] = sizeof(is[1]);
48   -----------
49   mesg [5] = 5  => is[5]
50   mesg [6] = sizeof(is[5]);
51   -----------
52   mesg [7]
53   mesg [n]  data(is[1])
54   -----------
55   mesg[n+1]
56   mesg[m]  data(is[5])
57   -----------
58 
59   Notes:
60   nrqs - no of requests sent (or to be sent out)
61   nrqr - no of requests recieved (which have to be or which have been processed
62 */
63 #undef __FUNCT__
64 #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ_Once"
65 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Once(Mat C,PetscInt imax,IS is[])
66 {
67   Mat_MPIBAIJ    *c = (Mat_MPIBAIJ*)C->data;
68   const PetscInt **idx,*idx_i;
69   PetscInt       *n,*w3,*w4,*rtable,**data,len;
70   PetscErrorCode ierr;
71   PetscMPIInt    size,rank,tag1,tag2,*w2,*w1,nrqr;
72   PetscInt       Mbs,i,j,k,**rbuf,row,proc,nrqs,msz,**outdat,**ptr;
73   PetscInt       *ctr,*pa,*tmp,*isz,*isz1,**xdata,**rbuf2;
74   PetscMPIInt    *onodes1,*olengths1,*onodes2,*olengths2;
75   PetscBT        *table;
76   MPI_Comm       comm;
77   MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2;
78   MPI_Status     *s_status,*recv_status;
79 
80   PetscFunctionBegin;
81   comm   = ((PetscObject)C)->comm;
82   size   = c->size;
83   rank   = c->rank;
84   Mbs    = c->Mbs;
85 
86   ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr);
87   ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr);
88 
89   ierr = PetscMalloc3(imax+1,const PetscInt*,&idx,imax,PetscInt,&n,Mbs,PetscInt,&rtable);CHKERRQ(ierr);
90   for (i=0; i<imax; i++) {
91     ierr = ISGetIndices(is[i],&idx[i]);CHKERRQ(ierr);
92     ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr);
93   }
94 
95   /* Create hash table for the mapping :row -> proc*/
96   for (i=0,j=0; i<size; i++) {
97     len = c->rangebs[i+1];
98     for (; j<len; j++) {
99       rtable[j] = i;
100     }
101   }
102 
103   /* evaluate communication - mesg to who,length of mesg, and buffer space
104      required. Based on this, buffers are allocated, and data copied into them*/
105   ierr = PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscInt,&w3,size,PetscInt,&w4);CHKERRQ(ierr);
106   ierr = PetscMemzero(w1,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
107   ierr = PetscMemzero(w2,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
108   ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr);
109   for (i=0; i<imax; i++) {
110     ierr  = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialise work vector*/
111     idx_i = idx[i];
112     len   = n[i];
113     for (j=0; j<len; j++) {
114       row  = idx_i[j];
115       if (row < 0) {
116         SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index set cannot have negative entries");
117       }
118       proc = rtable[row];
119       w4[proc]++;
120     }
121     for (j=0; j<size; j++){
122       if (w4[j]) { w1[j] += w4[j]; w3[j]++;}
123     }
124   }
125 
126   nrqs     = 0;              /* no of outgoing messages */
127   msz      = 0;              /* total mesg length (for all proc */
128   w1[rank] = 0;              /* no mesg sent to itself */
129   w3[rank] = 0;
130   for (i=0; i<size; i++) {
131     if (w1[i])  {w2[i] = 1; nrqs++;} /* there exists a message to proc i */
132   }
133   /* pa - is list of processors to communicate with */
134   ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr);
135   for (i=0,j=0; i<size; i++) {
136     if (w1[i]) {pa[j] = i; j++;}
137   }
138 
139   /* Each message would have a header = 1 + 2*(no of IS) + data */
140   for (i=0; i<nrqs; i++) {
141     j      = pa[i];
142     w1[j] += w2[j] + 2*w3[j];
143     msz   += w1[j];
144   }
145 
146   /* Determine the number of messages to expect, their lengths, from from-ids */
147   ierr = PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);CHKERRQ(ierr);
148   ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);CHKERRQ(ierr);
149 
150   /* Now post the Irecvs corresponding to these messages */
151   ierr = PetscPostIrecvInt(comm,tag1,nrqr,onodes1,olengths1,&rbuf,&r_waits1);CHKERRQ(ierr);
152 
153   /* Allocate Memory for outgoing messages */
154   ierr = PetscMalloc4(size,PetscInt*,&outdat,size,PetscInt*,&ptr,msz,PetscInt,&tmp,size,PetscInt,&ctr);CHKERRQ(ierr);
155   ierr = PetscMemzero(outdat,size*sizeof(PetscInt*));CHKERRQ(ierr);
156   ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr);
157   {
158     PetscInt *iptr = tmp,ict  = 0;
159     for (i=0; i<nrqs; i++) {
160       j         = pa[i];
161       iptr     +=  ict;
162       outdat[j] = iptr;
163       ict       = w1[j];
164     }
165   }
166 
167   /* Form the outgoing messages */
168   /*plug in the headers*/
169   for (i=0; i<nrqs; i++) {
170     j            = pa[i];
171     outdat[j][0] = 0;
172     ierr = PetscMemzero(outdat[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr);
173     ptr[j]       = outdat[j] + 2*w3[j] + 1;
174   }
175 
176   /* Memory for doing local proc's work*/
177   {
178     PetscInt  *d_p;
179     char      *t_p;
180 
181     /* should change to use PetscMallocN() */
182     ierr = PetscMalloc((imax)*(sizeof(PetscBT) + sizeof(PetscInt*)+ sizeof(PetscInt)) +
183       (Mbs)*imax*sizeof(PetscInt)  + (Mbs/PETSC_BITS_PER_BYTE+1)*imax*sizeof(char),&table);CHKERRQ(ierr);
184     ierr = PetscMemzero(table,(imax)*(sizeof(PetscBT) + sizeof(PetscInt*)+ sizeof(PetscInt)) +
185       (Mbs)*imax*sizeof(PetscInt)  + (Mbs/PETSC_BITS_PER_BYTE+1)*imax*sizeof(char));CHKERRQ(ierr);
186     data = (PetscInt **)(table + imax);
187     isz  = (PetscInt  *)(data  + imax);
188     d_p  = (PetscInt  *)(isz   + imax);
189     t_p  = (char *)(d_p   + Mbs*imax);
190     for (i=0; i<imax; i++) {
191       table[i] = t_p + (Mbs/PETSC_BITS_PER_BYTE+1)*i;
192       data[i]  = d_p + (Mbs)*i;
193     }
194   }
195 
196   /* Parse the IS and update local tables and the outgoing buf with the data*/
197   {
198     PetscInt n_i,*data_i,isz_i,*outdat_j,ctr_j;
199     PetscBT  table_i;
200 
201     for (i=0; i<imax; i++) {
202       ierr    = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr);
203       n_i     = n[i];
204       table_i = table[i];
205       idx_i   = idx[i];
206       data_i  = data[i];
207       isz_i   = isz[i];
208       for (j=0;  j<n_i; j++) {  /* parse the indices of each IS */
209         row  = idx_i[j];
210         proc = rtable[row];
211         if (proc != rank) { /* copy to the outgoing buffer */
212           ctr[proc]++;
213           *ptr[proc] = row;
214           ptr[proc]++;
215         } else { /* Update the local table */
216           if (!PetscBTLookupSet(table_i,row)) { data_i[isz_i++] = row;}
217         }
218       }
219       /* Update the headers for the current IS */
220       for (j=0; j<size; j++) { /* Can Optimise this loop by using pa[] */
221         if ((ctr_j = ctr[j])) {
222           outdat_j        = outdat[j];
223           k               = ++outdat_j[0];
224           outdat_j[2*k]   = ctr_j;
225           outdat_j[2*k-1] = i;
226         }
227       }
228       isz[i] = isz_i;
229     }
230   }
231 
232   /*  Now  post the sends */
233   ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr);
234   for (i=0; i<nrqs; ++i) {
235     j    = pa[i];
236     ierr = MPI_Isend(outdat[j],w1[j],MPIU_INT,j,tag1,comm,s_waits1+i);CHKERRQ(ierr);
237   }
238 
239   /* No longer need the original indices*/
240   for (i=0; i<imax; ++i) {
241     ierr = ISRestoreIndices(is[i],idx+i);CHKERRQ(ierr);
242   }
243   ierr = PetscFree3(idx,n,rtable);CHKERRQ(ierr);
244 
245   for (i=0; i<imax; ++i) {
246     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
247   }
248 
249   /* Do Local work*/
250   ierr = MatIncreaseOverlap_MPIBAIJ_Local(C,imax,table,isz,data);CHKERRQ(ierr);
251 
252   /* Receive messages*/
253   ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&recv_status);CHKERRQ(ierr);
254   if (nrqr) {ierr = MPI_Waitall(nrqr,r_waits1,recv_status);CHKERRQ(ierr);}
255 
256   ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status);CHKERRQ(ierr);
257   if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr);}
258 
259   /* Phase 1 sends are complete - deallocate buffers */
260   ierr = PetscFree4(outdat,ptr,tmp,ctr);CHKERRQ(ierr);
261   ierr = PetscFree4(w1,w2,w3,w4);CHKERRQ(ierr);
262 
263   ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&xdata);CHKERRQ(ierr);
264   ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt),&isz1);CHKERRQ(ierr);
265   ierr = MatIncreaseOverlap_MPIBAIJ_Receive(C,nrqr,rbuf,xdata,isz1);CHKERRQ(ierr);
266   ierr = PetscFree(rbuf[0]);CHKERRQ(ierr);
267   ierr = PetscFree(rbuf);CHKERRQ(ierr);
268 
269   /* Send the data back*/
270   /* Do a global reduction to know the buffer space req for incoming messages*/
271   {
272     PetscMPIInt *rw1;
273 
274     ierr = PetscMalloc(size*sizeof(PetscInt),&rw1);CHKERRQ(ierr);
275     ierr = PetscMemzero(rw1,size*sizeof(PetscInt));CHKERRQ(ierr);
276 
277     for (i=0; i<nrqr; ++i) {
278       proc      = recv_status[i].MPI_SOURCE;
279       if (proc != onodes1[i]) SETERRQ(PETSC_ERR_PLIB,"MPI_SOURCE mismatch");
280       rw1[proc] = isz1[i];
281     }
282 
283     ierr = PetscFree(onodes1);CHKERRQ(ierr);
284     ierr = PetscFree(olengths1);CHKERRQ(ierr);
285 
286     /* Determine the number of messages to expect, their lengths, from from-ids */
287     ierr = PetscGatherMessageLengths(comm,nrqr,nrqs,rw1,&onodes2,&olengths2);CHKERRQ(ierr);
288     ierr = PetscFree(rw1);CHKERRQ(ierr);
289   }
290   /* Now post the Irecvs corresponding to these messages */
291   ierr = PetscPostIrecvInt(comm,tag2,nrqs,onodes2,olengths2,&rbuf2,&r_waits2);CHKERRQ(ierr);
292 
293   /*  Now  post the sends */
294   ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr);
295   for (i=0; i<nrqr; ++i) {
296     j    = recv_status[i].MPI_SOURCE;
297     ierr = MPI_Isend(xdata[i],isz1[i],MPIU_INT,j,tag2,comm,s_waits2+i);CHKERRQ(ierr);
298   }
299 
300   /* receive work done on other processors*/
301   {
302     PetscMPIInt idex;
303     PetscInt    is_no,ct1,max,*rbuf2_i,isz_i,*data_i,jmax;
304     PetscBT     table_i;
305     MPI_Status  *status2;
306 
307     ierr = PetscMalloc((PetscMax(nrqr,nrqs)+1)*sizeof(MPI_Status),&status2);CHKERRQ(ierr);
308     for (i=0; i<nrqs; ++i) {
309       ierr = MPI_Waitany(nrqs,r_waits2,&idex,status2+i);CHKERRQ(ierr);
310       /* Process the message*/
311       rbuf2_i = rbuf2[idex];
312       ct1     = 2*rbuf2_i[0]+1;
313       jmax    = rbuf2[idex][0];
314       for (j=1; j<=jmax; j++) {
315         max     = rbuf2_i[2*j];
316         is_no   = rbuf2_i[2*j-1];
317         isz_i   = isz[is_no];
318         data_i  = data[is_no];
319         table_i = table[is_no];
320         for (k=0; k<max; k++,ct1++) {
321           row = rbuf2_i[ct1];
322           if (!PetscBTLookupSet(table_i,row)) { data_i[isz_i++] = row;}
323         }
324         isz[is_no] = isz_i;
325       }
326     }
327     if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,status2);CHKERRQ(ierr);}
328     ierr = PetscFree(status2);CHKERRQ(ierr);
329   }
330 
331   for (i=0; i<imax; ++i) {
332     ierr = ISCreateGeneral(PETSC_COMM_SELF,isz[i],data[i],is+i);CHKERRQ(ierr);
333   }
334 
335 
336   ierr = PetscFree(onodes2);CHKERRQ(ierr);
337   ierr = PetscFree(olengths2);CHKERRQ(ierr);
338 
339   ierr = PetscFree(pa);CHKERRQ(ierr);
340   ierr = PetscFree(rbuf2[0]);CHKERRQ(ierr);
341   ierr = PetscFree(rbuf2);CHKERRQ(ierr);
342   ierr = PetscFree(s_waits1);CHKERRQ(ierr);
343   ierr = PetscFree(r_waits1);CHKERRQ(ierr);
344   ierr = PetscFree(s_waits2);CHKERRQ(ierr);
345   ierr = PetscFree(r_waits2);CHKERRQ(ierr);
346   ierr = PetscFree(table);CHKERRQ(ierr);
347   ierr = PetscFree(s_status);CHKERRQ(ierr);
348   ierr = PetscFree(recv_status);CHKERRQ(ierr);
349   ierr = PetscFree(xdata[0]);CHKERRQ(ierr);
350   ierr = PetscFree(xdata);CHKERRQ(ierr);
351   ierr = PetscFree(isz1);CHKERRQ(ierr);
352   PetscFunctionReturn(0);
353 }
354 
355 #undef __FUNCT__
356 #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ_Local"
357 /*
358    MatIncreaseOverlap_MPIBAIJ_Local - Called by MatincreaseOverlap, to do
359        the work on the local processor.
360 
361      Inputs:
362       C      - MAT_MPIBAIJ;
363       imax - total no of index sets processed at a time;
364       table  - an array of char - size = Mbs bits.
365 
366      Output:
367       isz    - array containing the count of the solution elements corresponding
368                to each index set;
369       data   - pointer to the solutions
370 */
371 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Local(Mat C,PetscInt imax,PetscBT *table,PetscInt *isz,PetscInt **data)
372 {
373   Mat_MPIBAIJ *c = (Mat_MPIBAIJ*)C->data;
374   Mat         A = c->A,B = c->B;
375   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data;
376   PetscInt    start,end,val,max,rstart,cstart,*ai,*aj;
377   PetscInt    *bi,*bj,*garray,i,j,k,row,*data_i,isz_i;
378   PetscBT     table_i;
379 
380   PetscFunctionBegin;
381   rstart = c->rstartbs;
382   cstart = c->cstartbs;
383   ai     = a->i;
384   aj     = a->j;
385   bi     = b->i;
386   bj     = b->j;
387   garray = c->garray;
388 
389 
390   for (i=0; i<imax; i++) {
391     data_i  = data[i];
392     table_i = table[i];
393     isz_i   = isz[i];
394     for (j=0,max=isz[i]; j<max; j++) {
395       row   = data_i[j] - rstart;
396       start = ai[row];
397       end   = ai[row+1];
398       for (k=start; k<end; k++) { /* Amat */
399         val = aj[k] + cstart;
400         if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
401       }
402       start = bi[row];
403       end   = bi[row+1];
404       for (k=start; k<end; k++) { /* Bmat */
405         val = garray[bj[k]];
406         if (!PetscBTLookupSet(table_i,val)) { data_i[isz_i++] = val;}
407       }
408     }
409     isz[i] = isz_i;
410   }
411   PetscFunctionReturn(0);
412 }
413 #undef __FUNCT__
414 #define __FUNCT__ "MatIncreaseOverlap_MPIBAIJ_Receive"
415 /*
416       MatIncreaseOverlap_MPIBAIJ_Receive - Process the recieved messages,
417          and return the output
418 
419          Input:
420            C    - the matrix
421            nrqr - no of messages being processed.
422            rbuf - an array of pointers to the recieved requests
423 
424          Output:
425            xdata - array of messages to be sent back
426            isz1  - size of each message
427 
428   For better efficiency perhaps we should malloc separately each xdata[i],
429 then if a remalloc is required we need only copy the data for that one row
430 rather than all previous rows as it is now where a single large chunck of
431 memory is used.
432 
433 */
434 static PetscErrorCode MatIncreaseOverlap_MPIBAIJ_Receive(Mat C,PetscInt nrqr,PetscInt **rbuf,PetscInt **xdata,PetscInt * isz1)
435 {
436   Mat_MPIBAIJ    *c = (Mat_MPIBAIJ*)C->data;
437   Mat            A = c->A,B = c->B;
438   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)B->data;
439   PetscErrorCode ierr;
440   PetscInt       rstart,cstart,*ai,*aj,*bi,*bj,*garray,i,j,k;
441   PetscInt       row,total_sz,ct,ct1,ct2,ct3,mem_estimate,oct2,l,start,end;
442   PetscInt       val,max1,max2,Mbs,no_malloc =0,*tmp,new_estimate,ctr;
443   PetscInt       *rbuf_i,kmax,rbuf_0;
444   PetscBT        xtable;
445   PetscMPIInt    rank;
446 
447   PetscFunctionBegin;
448   rank   = c->rank;
449   Mbs    = c->Mbs;
450   rstart = c->rstartbs;
451   cstart = c->cstartbs;
452   ai     = a->i;
453   aj     = a->j;
454   bi     = b->i;
455   bj     = b->j;
456   garray = c->garray;
457 
458 
459   for (i=0,ct=0,total_sz=0; i<nrqr; ++i) {
460     rbuf_i  =  rbuf[i];
461     rbuf_0  =  rbuf_i[0];
462     ct     += rbuf_0;
463     for (j=1; j<=rbuf_0; j++) { total_sz += rbuf_i[2*j]; }
464   }
465 
466   if (c->Mbs) max1 = ct*(a->nz +b->nz)/c->Mbs;
467   else        max1 = 1;
468   mem_estimate = 3*((total_sz > max1 ? total_sz : max1)+1);
469   ierr         = PetscMalloc(mem_estimate*sizeof(PetscInt),&xdata[0]);CHKERRQ(ierr);
470   ++no_malloc;
471   ierr         = PetscBTCreate(Mbs,xtable);CHKERRQ(ierr);
472   ierr         = PetscMemzero(isz1,nrqr*sizeof(PetscInt));CHKERRQ(ierr);
473 
474   ct3 = 0;
475   for (i=0; i<nrqr; i++) { /* for easch mesg from proc i */
476     rbuf_i =  rbuf[i];
477     rbuf_0 =  rbuf_i[0];
478     ct1    =  2*rbuf_0+1;
479     ct2    =  ct1;
480     ct3    += ct1;
481     for (j=1; j<=rbuf_0; j++) { /* for each IS from proc i*/
482       ierr = PetscBTMemzero(Mbs,xtable);CHKERRQ(ierr);
483       oct2 = ct2;
484       kmax = rbuf_i[2*j];
485       for (k=0; k<kmax; k++,ct1++) {
486         row = rbuf_i[ct1];
487         if (!PetscBTLookupSet(xtable,row)) {
488           if (!(ct3 < mem_estimate)) {
489             new_estimate = (PetscInt)(1.5*mem_estimate)+1;
490             ierr = PetscMalloc(new_estimate * sizeof(PetscInt),&tmp);CHKERRQ(ierr);
491             ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr);
492             ierr = PetscFree(xdata[0]);CHKERRQ(ierr);
493             xdata[0]     = tmp;
494             mem_estimate = new_estimate; ++no_malloc;
495             for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
496           }
497           xdata[i][ct2++] = row;
498           ct3++;
499         }
500       }
501       for (k=oct2,max2=ct2; k<max2; k++)  {
502         row   = xdata[i][k] - rstart;
503         start = ai[row];
504         end   = ai[row+1];
505         for (l=start; l<end; l++) {
506           val = aj[l] + cstart;
507           if (!PetscBTLookupSet(xtable,val)) {
508             if (!(ct3 < mem_estimate)) {
509               new_estimate = (PetscInt)(1.5*mem_estimate)+1;
510               ierr = PetscMalloc(new_estimate * sizeof(PetscInt),&tmp);CHKERRQ(ierr);
511               ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr);
512               ierr = PetscFree(xdata[0]);CHKERRQ(ierr);
513               xdata[0]     = tmp;
514               mem_estimate = new_estimate; ++no_malloc;
515               for (ctr=1; ctr<=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
516             }
517             xdata[i][ct2++] = val;
518             ct3++;
519           }
520         }
521         start = bi[row];
522         end   = bi[row+1];
523         for (l=start; l<end; l++) {
524           val = garray[bj[l]];
525           if (!PetscBTLookupSet(xtable,val)) {
526             if (!(ct3 < mem_estimate)) {
527               new_estimate = (PetscInt)(1.5*mem_estimate)+1;
528               ierr = PetscMalloc(new_estimate * sizeof(PetscInt),&tmp);CHKERRQ(ierr);
529               ierr = PetscMemcpy(tmp,xdata[0],mem_estimate*sizeof(PetscInt));CHKERRQ(ierr);
530               ierr = PetscFree(xdata[0]);CHKERRQ(ierr);
531               xdata[0]     = tmp;
532               mem_estimate = new_estimate; ++no_malloc;
533               for (ctr =1; ctr <=i; ctr++) { xdata[ctr] = xdata[ctr-1] + isz1[ctr-1];}
534             }
535             xdata[i][ct2++] = val;
536             ct3++;
537           }
538         }
539       }
540       /* Update the header*/
541       xdata[i][2*j]   = ct2 - oct2; /* Undo the vector isz1 and use only a var*/
542       xdata[i][2*j-1] = rbuf_i[2*j-1];
543     }
544     xdata[i][0] = rbuf_0;
545     xdata[i+1]  = xdata[i] + ct2;
546     isz1[i]     = ct2; /* size of each message */
547   }
548   ierr = PetscBTDestroy(xtable);CHKERRQ(ierr);
549   ierr = PetscInfo3(C,"Allocated %D bytes, required %D, no of mallocs = %D\n",mem_estimate,ct3,no_malloc);CHKERRQ(ierr);
550   PetscFunctionReturn(0);
551 }
552 
553 static PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *);
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetSubMatrices_MPIBAIJ"
557 PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submat[])
558 {
559   IS             *isrow_new,*iscol_new;
560   Mat_MPIBAIJ    *c = (Mat_MPIBAIJ*)C->data;
561   PetscErrorCode ierr;
562   PetscInt       nmax,nstages_local,nstages,i,pos,max_no,N=C->cmap->N,bs=C->rmap->bs;
563 
564   PetscFunctionBegin;
565   /* The compression and expansion should be avoided. Does'nt point
566      out errors might change the indices hence buggey */
567 
568   ierr = PetscMalloc2(ismax+1,IS,&isrow_new,ismax+1,IS,&iscol_new);CHKERRQ(ierr);
569   ierr = ISCompressIndicesGeneral(N,bs,ismax,isrow,isrow_new);CHKERRQ(ierr);
570   ierr = ISCompressIndicesGeneral(N,bs,ismax,iscol,iscol_new);CHKERRQ(ierr);
571 
572   /* Allocate memory to hold all the submatrices */
573   if (scall != MAT_REUSE_MATRIX) {
574     ierr = PetscMalloc((ismax+1)*sizeof(Mat),submat);CHKERRQ(ierr);
575   }
576   /* Determine the number of stages through which submatrices are done */
577   nmax          = 20*1000000 / (c->Nbs * sizeof(PetscInt));
578   if (!nmax) nmax = 1;
579   nstages_local = ismax/nmax + ((ismax % nmax)?1:0);
580 
581   /* Make sure every processor loops through the nstages */
582   ierr = MPI_Allreduce(&nstages_local,&nstages,1,MPIU_INT,MPI_MAX,((PetscObject)C)->comm);CHKERRQ(ierr);
583   for (i=0,pos=0; i<nstages; i++) {
584     if (pos+nmax <= ismax) max_no = nmax;
585     else if (pos == ismax) max_no = 0;
586     else                   max_no = ismax-pos;
587     ierr = MatGetSubMatrices_MPIBAIJ_local(C,max_no,isrow_new+pos,iscol_new+pos,scall,*submat+pos);CHKERRQ(ierr);
588     pos += max_no;
589   }
590 
591   for (i=0; i<ismax; i++) {
592     ierr = ISDestroy(isrow_new[i]);CHKERRQ(ierr);
593     ierr = ISDestroy(iscol_new[i]);CHKERRQ(ierr);
594   }
595   ierr = PetscFree2(isrow_new,iscol_new);CHKERRQ(ierr);
596   PetscFunctionReturn(0);
597 }
598 
599 #if defined (PETSC_USE_CTABLE)
600 #undef __FUNCT__
601 #define __FUNCT__ "PetscGetProc"
602 PetscErrorCode PetscGetProc(const PetscInt row, const PetscMPIInt size, const PetscInt proc_gnode[], PetscMPIInt *rank)
603 {
604   PetscInt    nGlobalNd = proc_gnode[size];
605   PetscMPIInt fproc = PetscMPIIntCast( (PetscInt)(((float)row * (float)size / (float)nGlobalNd + 0.5)));
606 
607   PetscFunctionBegin;
608   if (fproc > size) fproc = size;
609   while (row < proc_gnode[fproc] || row >= proc_gnode[fproc+1]) {
610     if (row < proc_gnode[fproc]) fproc--;
611     else                         fproc++;
612   }
613   *rank = fproc;
614   PetscFunctionReturn(0);
615 }
616 #endif
617 
618 /* -------------------------------------------------------------------------*/
619 /* This code is used for BAIJ and SBAIJ matrices (unfortunate dependency) */
620 #undef __FUNCT__
621 #define __FUNCT__ "MatGetSubMatrices_MPIBAIJ_local"
622 static PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat C,PetscInt ismax,const IS isrow[],const IS iscol[],MatReuse scall,Mat *submats)
623 {
624   Mat_MPIBAIJ    *c = (Mat_MPIBAIJ*)C->data;
625   Mat            A = c->A;
626   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data,*b = (Mat_SeqBAIJ*)c->B->data,*mat;
627   const PetscInt **irow,**icol,*irow_i;
628   PetscInt       *nrow,*ncol,*w3,*w4,start;
629   PetscErrorCode ierr;
630   PetscMPIInt    size,tag0,tag1,tag2,tag3,*w1,*w2,nrqr,idex,end,proc;
631   PetscInt       **sbuf1,**sbuf2,rank,i,j,k,l,ct1,ct2,**rbuf1,row;
632   PetscInt       nrqs,msz,**ptr,*req_size,*ctr,*pa,*tmp,tcol;
633   PetscInt       **rbuf3,*req_source,**sbuf_aj,**rbuf2,max1,max2;
634   PetscInt       **lens,is_no,ncols,*cols,mat_i,*mat_j,tmp2,jmax;
635   PetscInt       ctr_j,*sbuf1_j,*sbuf_aj_i,*rbuf1_i,kmax,*lens_i;
636   PetscInt       bs=C->rmap->bs,bs2=c->bs2,*a_j=a->j,*b_j=b->j,*cworkA,*cworkB;
637   PetscInt       cstart = c->cstartbs,nzA,nzB,*a_i=a->i,*b_i=b->i,imark;
638   PetscInt       *bmap = c->garray,ctmp,rstart=c->rstartbs;
639   MPI_Request    *s_waits1,*r_waits1,*s_waits2,*r_waits2,*r_waits3;
640   MPI_Request    *r_waits4,*s_waits3,*s_waits4;
641   MPI_Status     *r_status1,*r_status2,*s_status1,*s_status3,*s_status2;
642   MPI_Status     *r_status3,*r_status4,*s_status4;
643   MPI_Comm       comm;
644   MatScalar      **rbuf4,**sbuf_aa,*vals,*mat_a,*sbuf_aa_i,*vworkA,*vworkB;
645   MatScalar      *a_a=a->a,*b_a=b->a;
646   PetscTruth     flag;
647   PetscMPIInt    *onodes1,*olengths1;
648 
649 #if defined (PETSC_USE_CTABLE)
650   PetscInt       tt;
651   PetscTable     *rowmaps,*colmaps,lrow1_grow1,lcol1_gcol1;
652 #else
653   PetscInt       **cmap,*cmap_i,*rtable,*rmap_i,**rmap, Mbs = c->Mbs;
654 #endif
655 
656   PetscFunctionBegin;
657   comm   = ((PetscObject)C)->comm;
658   tag0   = ((PetscObject)C)->tag;
659   size   = c->size;
660   rank   = c->rank;
661 
662   /* Get some new tags to keep the communication clean */
663   ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr);
664   ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr);
665   ierr = PetscObjectGetNewTag((PetscObject)C,&tag3);CHKERRQ(ierr);
666 
667 #if defined(PETSC_USE_CTABLE)
668   ierr = PetscMalloc4(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol);CHKERRQ(ierr);
669 #else
670   ierr = PetscMalloc5(ismax,const PetscInt*,&irow,ismax,const PetscInt*,&icol,ismax,PetscInt,&nrow,ismax,PetscInt,&ncol,Mbs+1,PetscInt,&rtable);CHKERRQ(ierr);
671   /* Create hash table for the mapping :row -> proc*/
672   for (i=0,j=0; i<size; i++) {
673     jmax = c->rowners[i+1];
674     for (; j<jmax; j++) {
675       rtable[j] = i;
676     }
677   }
678 #endif
679 
680   for (i=0; i<ismax; i++) {
681     ierr = ISGetIndices(isrow[i],&irow[i]);CHKERRQ(ierr);
682     ierr = ISGetIndices(iscol[i],&icol[i]);CHKERRQ(ierr);
683     ierr = ISGetLocalSize(isrow[i],&nrow[i]);CHKERRQ(ierr);
684     ierr = ISGetLocalSize(iscol[i],&ncol[i]);CHKERRQ(ierr);
685   }
686 
687   /* evaluate communication - mesg to who,length of mesg,and buffer space
688      required. Based on this, buffers are allocated, and data copied into them*/
689   ierr = PetscMalloc4(size,PetscMPIInt,&w1,size,PetscMPIInt,&w2,size,PetscInt,&w3,size,PetscInt,&w4);CHKERRQ(ierr);
690   ierr = PetscMemzero(w1,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
691   ierr = PetscMemzero(w2,size*sizeof(PetscMPIInt));CHKERRQ(ierr);
692   ierr = PetscMemzero(w3,size*sizeof(PetscInt));CHKERRQ(ierr);
693   for (i=0; i<ismax; i++) {
694     ierr   = PetscMemzero(w4,size*sizeof(PetscInt));CHKERRQ(ierr); /* initialise work vector*/
695     jmax   = nrow[i];
696     irow_i = irow[i];
697     for (j=0; j<jmax; j++) {
698       row  = irow_i[j];
699 #if defined (PETSC_USE_CTABLE)
700       ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr);
701 #else
702       proc = rtable[row];
703 #endif
704       w4[proc]++;
705     }
706     for (j=0; j<size; j++) {
707       if (w4[j]) { w1[j] += w4[j];  w3[j]++;}
708     }
709   }
710 
711   nrqs     = 0;              /* no of outgoing messages */
712   msz      = 0;              /* total mesg length for all proc */
713   w1[rank] = 0;              /* no mesg sent to intself */
714   w3[rank] = 0;
715   for (i=0; i<size; i++) {
716     if (w1[i])  { w2[i] = 1; nrqs++;} /* there exists a message to proc i */
717   }
718   ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt),&pa);CHKERRQ(ierr); /*(proc -array)*/
719   for (i=0,j=0; i<size; i++) {
720     if (w1[i]) { pa[j] = i; j++; }
721   }
722 
723   /* Each message would have a header = 1 + 2*(no of IS) + data */
724   for (i=0; i<nrqs; i++) {
725     j     = pa[i];
726     w1[j] += w2[j] + 2* w3[j];
727     msz   += w1[j];
728   }
729 
730   /* Determine the number of messages to expect, their lengths, from from-ids */
731   ierr = PetscGatherNumberOfMessages(comm,w2,w1,&nrqr);CHKERRQ(ierr);
732   ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,w1,&onodes1,&olengths1);CHKERRQ(ierr);
733 
734   /* Now post the Irecvs corresponding to these messages */
735   ierr = PetscPostIrecvInt(comm,tag0,nrqr,onodes1,olengths1,&rbuf1,&r_waits1);CHKERRQ(ierr);
736 
737   ierr = PetscFree(onodes1);CHKERRQ(ierr);
738   ierr = PetscFree(olengths1);CHKERRQ(ierr);
739 
740   /* Allocate Memory for outgoing messages */
741   ierr = PetscMalloc4(size,PetscInt*,&sbuf1,size,PetscInt*,&ptr,2*msz,PetscInt,&tmp,size,PetscInt,&ctr);CHKERRQ(ierr);
742   ierr = PetscMemzero(sbuf1,size*sizeof(PetscInt*));CHKERRQ(ierr);
743   ierr = PetscMemzero(ptr,size*sizeof(PetscInt*));CHKERRQ(ierr);
744   {
745     PetscInt *iptr = tmp,ict = 0;
746     for (i=0; i<nrqs; i++) {
747       j         = pa[i];
748       iptr     += ict;
749       sbuf1[j]  = iptr;
750       ict       = w1[j];
751     }
752   }
753 
754   /* Form the outgoing messages */
755   /* Initialise the header space */
756   for (i=0; i<nrqs; i++) {
757     j           = pa[i];
758     sbuf1[j][0] = 0;
759     ierr        = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr);
760     ptr[j]      = sbuf1[j] + 2*w3[j] + 1;
761   }
762 
763   /* Parse the isrow and copy data into outbuf */
764   for (i=0; i<ismax; i++) {
765     ierr   = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr);
766     irow_i = irow[i];
767     jmax   = nrow[i];
768     for (j=0; j<jmax; j++) {  /* parse the indices of each IS */
769       row  = irow_i[j];
770 #if defined (PETSC_USE_CTABLE)
771       ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr);
772 #else
773       proc = rtable[row];
774 #endif
775       if (proc != rank) { /* copy to the outgoing buf*/
776         ctr[proc]++;
777         *ptr[proc] = row;
778         ptr[proc]++;
779       }
780     }
781     /* Update the headers for the current IS */
782     for (j=0; j<size; j++) { /* Can Optimise this loop too */
783       if ((ctr_j = ctr[j])) {
784         sbuf1_j        = sbuf1[j];
785         k              = ++sbuf1_j[0];
786         sbuf1_j[2*k]   = ctr_j;
787         sbuf1_j[2*k-1] = i;
788       }
789     }
790   }
791 
792   /*  Now  post the sends */
793   ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr);
794   for (i=0; i<nrqs; ++i) {
795     j = pa[i];
796     ierr = MPI_Isend(sbuf1[j],w1[j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr);
797   }
798 
799   /* Post Recieves to capture the buffer size */
800   ierr     = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr);
801   ierr     = PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf2);CHKERRQ(ierr);
802   rbuf2[0] = tmp + msz;
803   for (i=1; i<nrqs; ++i) {
804     j        = pa[i];
805     rbuf2[i] = rbuf2[i-1]+w1[pa[i-1]];
806   }
807   for (i=0; i<nrqs; ++i) {
808     j    = pa[i];
809     ierr = MPI_Irecv(rbuf2[i],w1[j],MPIU_INT,j,tag1,comm,r_waits2+i);CHKERRQ(ierr);
810   }
811 
812   /* Send to other procs the buf size they should allocate */
813 
814   /* Receive messages*/
815   ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr);
816   ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&r_status1);CHKERRQ(ierr);
817   ierr = PetscMalloc3(nrqr+1,PetscInt*,&sbuf2,nrqr,PetscInt,&req_size,nrqr,PetscInt,&req_source);CHKERRQ(ierr);
818   {
819     Mat_SeqBAIJ *sA = (Mat_SeqBAIJ*)c->A->data,*sB = (Mat_SeqBAIJ*)c->B->data;
820     PetscInt    *sAi = sA->i,*sBi = sB->i,id,*sbuf2_i;
821 
822     for (i=0; i<nrqr; ++i) {
823       ierr = MPI_Waitany(nrqr,r_waits1,&idex,r_status1+i);CHKERRQ(ierr);
824       req_size[idex] = 0;
825       rbuf1_i         = rbuf1[idex];
826       start           = 2*rbuf1_i[0] + 1;
827       ierr            = MPI_Get_count(r_status1+i,MPIU_INT,&end);CHKERRQ(ierr);
828       ierr            = PetscMalloc(end*sizeof(PetscInt),&sbuf2[idex]);CHKERRQ(ierr);
829       sbuf2_i         = sbuf2[idex];
830       for (j=start; j<end; j++) {
831         id               = rbuf1_i[j] - rstart;
832         ncols            = sAi[id+1] - sAi[id] + sBi[id+1] - sBi[id];
833         sbuf2_i[j]       = ncols;
834         req_size[idex] += ncols;
835       }
836       req_source[idex] = r_status1[i].MPI_SOURCE;
837       /* form the header */
838       sbuf2_i[0]   = req_size[idex];
839       for (j=1; j<start; j++) { sbuf2_i[j] = rbuf1_i[j]; }
840       ierr = MPI_Isend(sbuf2_i,end,MPIU_INT,req_source[idex],tag1,comm,s_waits2+i);CHKERRQ(ierr);
841     }
842   }
843   ierr = PetscFree(r_status1);CHKERRQ(ierr);
844   ierr = PetscFree(r_waits1);CHKERRQ(ierr);
845 
846   /*  recv buffer sizes */
847   /* Receive messages*/
848 
849   ierr = PetscMalloc((nrqs+1)*sizeof(PetscInt*),&rbuf3);CHKERRQ(ierr);
850   ierr = PetscMalloc((nrqs+1)*sizeof(MatScalar*),&rbuf4);CHKERRQ(ierr);
851   ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits3);CHKERRQ(ierr);
852   ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits4);CHKERRQ(ierr);
853   ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status2);CHKERRQ(ierr);
854 
855   for (i=0; i<nrqs; ++i) {
856     ierr = MPI_Waitany(nrqs,r_waits2,&idex,r_status2+i);CHKERRQ(ierr);
857     ierr = PetscMalloc(rbuf2[idex][0]*sizeof(PetscInt),&rbuf3[idex]);CHKERRQ(ierr);
858     ierr = PetscMalloc(rbuf2[idex][0]*bs2*sizeof(MatScalar),&rbuf4[idex]);CHKERRQ(ierr);
859     ierr = MPI_Irecv(rbuf3[idex],rbuf2[idex][0],MPIU_INT,r_status2[i].MPI_SOURCE,tag2,comm,r_waits3+idex);CHKERRQ(ierr);
860     ierr = MPI_Irecv(rbuf4[idex],rbuf2[idex][0]*bs2,MPIU_MATSCALAR,r_status2[i].MPI_SOURCE,tag3,comm,r_waits4+idex);CHKERRQ(ierr);
861   }
862   ierr = PetscFree(r_status2);CHKERRQ(ierr);
863   ierr = PetscFree(r_waits2);CHKERRQ(ierr);
864 
865   /* Wait on sends1 and sends2 */
866   ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&s_status1);CHKERRQ(ierr);
867   ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status2);CHKERRQ(ierr);
868 
869   if (nrqs) {ierr = MPI_Waitall(nrqs,s_waits1,s_status1);CHKERRQ(ierr);}
870   if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits2,s_status2);CHKERRQ(ierr);}
871   ierr = PetscFree(s_status1);CHKERRQ(ierr);
872   ierr = PetscFree(s_status2);CHKERRQ(ierr);
873   ierr = PetscFree(s_waits1);CHKERRQ(ierr);
874   ierr = PetscFree(s_waits2);CHKERRQ(ierr);
875 
876   /* Now allocate buffers for a->j, and send them off */
877   ierr = PetscMalloc((nrqr+1)*sizeof(PetscInt*),&sbuf_aj);CHKERRQ(ierr);
878   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
879   ierr = PetscMalloc((j+1)*sizeof(PetscInt),&sbuf_aj[0]);CHKERRQ(ierr);
880   for (i=1; i<nrqr; i++)  sbuf_aj[i] = sbuf_aj[i-1] + req_size[i-1];
881 
882   ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits3);CHKERRQ(ierr);
883   {
884      for (i=0; i<nrqr; i++) {
885       rbuf1_i   = rbuf1[i];
886       sbuf_aj_i = sbuf_aj[i];
887       ct1       = 2*rbuf1_i[0] + 1;
888       ct2       = 0;
889       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
890         kmax = rbuf1[i][2*j];
891         for (k=0; k<kmax; k++,ct1++) {
892           row    = rbuf1_i[ct1] - rstart;
893           nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
894           ncols  = nzA + nzB;
895           cworkA = a_j + a_i[row]; cworkB = b_j + b_i[row];
896 
897           /* load the column indices for this row into cols*/
898           cols  = sbuf_aj_i + ct2;
899           for (l=0; l<nzB; l++) {
900             if ((ctmp = bmap[cworkB[l]]) < cstart)  cols[l] = ctmp;
901             else break;
902           }
903           imark = l;
904           for (l=0; l<nzA; l++)   cols[imark+l] = cstart + cworkA[l];
905           for (l=imark; l<nzB; l++) cols[nzA+l] = bmap[cworkB[l]];
906           ct2 += ncols;
907         }
908       }
909       ierr = MPI_Isend(sbuf_aj_i,req_size[i],MPIU_INT,req_source[i],tag2,comm,s_waits3+i);CHKERRQ(ierr);
910     }
911   }
912   ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status3);CHKERRQ(ierr);
913   ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status3);CHKERRQ(ierr);
914 
915   /* Allocate buffers for a->a, and send them off */
916   ierr = PetscMalloc((nrqr+1)*sizeof(MatScalar *),&sbuf_aa);CHKERRQ(ierr);
917   for (i=0,j=0; i<nrqr; i++) j += req_size[i];
918   ierr = PetscMalloc((j+1)*bs2*sizeof(MatScalar),&sbuf_aa[0]);CHKERRQ(ierr);
919   for (i=1; i<nrqr; i++)  sbuf_aa[i] = sbuf_aa[i-1] + req_size[i-1]*bs2;
920 
921   ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits4);CHKERRQ(ierr);
922   {
923     for (i=0; i<nrqr; i++) {
924       rbuf1_i   = rbuf1[i];
925       sbuf_aa_i = sbuf_aa[i];
926       ct1       = 2*rbuf1_i[0]+1;
927       ct2       = 0;
928       for (j=1,max1=rbuf1_i[0]; j<=max1; j++) {
929         kmax = rbuf1_i[2*j];
930         for (k=0; k<kmax; k++,ct1++) {
931           row    = rbuf1_i[ct1] - rstart;
932           nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
933           ncols  = nzA + nzB;
934           cworkA = a_j + a_i[row];     cworkB = b_j + b_i[row];
935           vworkA = a_a + a_i[row]*bs2; vworkB = b_a + b_i[row]*bs2;
936 
937           /* load the column values for this row into vals*/
938           vals  = sbuf_aa_i+ct2*bs2;
939           for (l=0; l<nzB; l++) {
940             if ((bmap[cworkB[l]]) < cstart) {
941               ierr = PetscMemcpy(vals+l*bs2,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
942             }
943             else break;
944           }
945           imark = l;
946           for (l=0; l<nzA; l++) {
947             ierr = PetscMemcpy(vals+(imark+l)*bs2,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
948           }
949           for (l=imark; l<nzB; l++) {
950             ierr = PetscMemcpy(vals+(nzA+l)*bs2,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
951           }
952           ct2 += ncols;
953         }
954       }
955       ierr = MPI_Isend(sbuf_aa_i,req_size[i]*bs2,MPIU_MATSCALAR,req_source[i],tag3,comm,s_waits4+i);CHKERRQ(ierr);
956     }
957   }
958   ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Status),&r_status4);CHKERRQ(ierr);
959   ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Status),&s_status4);CHKERRQ(ierr);
960   ierr = PetscFree(rbuf1[0]);CHKERRQ(ierr);
961   ierr = PetscFree(rbuf1);CHKERRQ(ierr);
962 
963   /* Form the matrix */
964   /* create col map */
965   {
966     const PetscInt *icol_i;
967 #if defined (PETSC_USE_CTABLE)
968     /* Create row map*/
969     ierr = PetscMalloc((1+ismax)*sizeof(PetscTable),&colmaps);CHKERRQ(ierr);
970     for (i=0; i<ismax; i++) {
971       ierr = PetscTableCreate(ncol[i]+1,&colmaps[i]);CHKERRQ(ierr);
972     }
973 #else
974     ierr    = PetscMalloc(ismax*sizeof(PetscInt*),&cmap);CHKERRQ(ierr);
975     ierr    = PetscMalloc(ismax*c->Nbs*sizeof(PetscInt),&cmap[0]);CHKERRQ(ierr);
976     for (i=1; i<ismax; i++) { cmap[i] = cmap[i-1] + c->Nbs; }
977 #endif
978     for (i=0; i<ismax; i++) {
979       jmax   = ncol[i];
980       icol_i = icol[i];
981 #if defined (PETSC_USE_CTABLE)
982       lcol1_gcol1 = colmaps[i];
983       for (j=0; j<jmax; j++) {
984 	ierr = PetscTableAdd(lcol1_gcol1,icol_i[j]+1,j+1);CHKERRQ(ierr);
985       }
986 #else
987       cmap_i = cmap[i];
988       for (j=0; j<jmax; j++) {
989         cmap_i[icol_i[j]] = j+1;
990       }
991 #endif
992     }
993   }
994 
995   /* Create lens which is required for MatCreate... */
996   for (i=0,j=0; i<ismax; i++) { j += nrow[i]; }
997   ierr    = PetscMalloc((1+ismax)*sizeof(PetscInt*)+ j*sizeof(PetscInt),&lens);CHKERRQ(ierr);
998   lens[0] = (PetscInt*)(lens + ismax);
999   ierr    = PetscMemzero(lens[0],j*sizeof(PetscInt));CHKERRQ(ierr);
1000   for (i=1; i<ismax; i++) { lens[i] = lens[i-1] + nrow[i-1]; }
1001 
1002   /* Update lens from local data */
1003   for (i=0; i<ismax; i++) {
1004     jmax   = nrow[i];
1005 #if defined (PETSC_USE_CTABLE)
1006     lcol1_gcol1 = colmaps[i];
1007 #else
1008     cmap_i = cmap[i];
1009 #endif
1010     irow_i = irow[i];
1011     lens_i = lens[i];
1012     for (j=0; j<jmax; j++) {
1013       row  = irow_i[j];
1014 #if defined (PETSC_USE_CTABLE)
1015       ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr);
1016 #else
1017       proc = rtable[row];
1018 #endif
1019       if (proc == rank) {
1020         /* Get indices from matA and then from matB */
1021         row    = row - rstart;
1022         nzA    = a_i[row+1] - a_i[row];     nzB = b_i[row+1] - b_i[row];
1023         cworkA =  a_j + a_i[row]; cworkB = b_j + b_i[row];
1024 #if defined (PETSC_USE_CTABLE)
1025        for (k=0; k<nzA; k++) {
1026 	 ierr = PetscTableFind(lcol1_gcol1,cstart+cworkA[k]+1,&tt);CHKERRQ(ierr);
1027           if (tt) { lens_i[j]++; }
1028         }
1029         for (k=0; k<nzB; k++) {
1030 	  ierr = PetscTableFind(lcol1_gcol1,bmap[cworkB[k]]+1,&tt);CHKERRQ(ierr);
1031           if (tt) { lens_i[j]++; }
1032         }
1033 #else
1034         for (k=0; k<nzA; k++) {
1035           if (cmap_i[cstart + cworkA[k]]) { lens_i[j]++; }
1036         }
1037         for (k=0; k<nzB; k++) {
1038           if (cmap_i[bmap[cworkB[k]]]) { lens_i[j]++; }
1039         }
1040 #endif
1041       }
1042     }
1043   }
1044 #if defined (PETSC_USE_CTABLE)
1045   /* Create row map*/
1046   ierr = PetscMalloc((1+ismax)*sizeof(PetscTable),&rowmaps);CHKERRQ(ierr);
1047   for (i=0; i<ismax; i++){
1048     ierr = PetscTableCreate(nrow[i]+1,&rowmaps[i]);CHKERRQ(ierr);
1049   }
1050 #else
1051   /* Create row map*/
1052   ierr    = PetscMalloc((1+ismax)*sizeof(PetscInt*)+ ismax*Mbs*sizeof(PetscInt),&rmap);CHKERRQ(ierr);
1053   rmap[0] = (PetscInt*)(rmap + ismax);
1054   ierr    = PetscMemzero(rmap[0],ismax*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
1055   for (i=1; i<ismax; i++) { rmap[i] = rmap[i-1] + Mbs;}
1056 #endif
1057   for (i=0; i<ismax; i++) {
1058     irow_i = irow[i];
1059     jmax   = nrow[i];
1060 #if defined (PETSC_USE_CTABLE)
1061     lrow1_grow1 = rowmaps[i];
1062     for (j=0; j<jmax; j++) {
1063       ierr = PetscTableAdd(lrow1_grow1,irow_i[j]+1,j+1);CHKERRQ(ierr);
1064     }
1065 #else
1066     rmap_i = rmap[i];
1067     for (j=0; j<jmax; j++) {
1068       rmap_i[irow_i[j]] = j;
1069     }
1070 #endif
1071   }
1072 
1073   /* Update lens from offproc data */
1074   {
1075     PetscInt    *rbuf2_i,*rbuf3_i,*sbuf1_i;
1076     PetscMPIInt ii;
1077 
1078     for (tmp2=0; tmp2<nrqs; tmp2++) {
1079       ierr    = MPI_Waitany(nrqs,r_waits3,&ii,r_status3+tmp2);CHKERRQ(ierr);
1080       idex   = pa[ii];
1081       sbuf1_i = sbuf1[idex];
1082       jmax    = sbuf1_i[0];
1083       ct1     = 2*jmax+1;
1084       ct2     = 0;
1085       rbuf2_i = rbuf2[ii];
1086       rbuf3_i = rbuf3[ii];
1087       for (j=1; j<=jmax; j++) {
1088         is_no   = sbuf1_i[2*j-1];
1089         max1    = sbuf1_i[2*j];
1090         lens_i  = lens[is_no];
1091 #if defined (PETSC_USE_CTABLE)
1092 	lcol1_gcol1 = colmaps[is_no];
1093 	lrow1_grow1 = rowmaps[is_no];
1094 #else
1095         cmap_i  = cmap[is_no];
1096 	rmap_i  = rmap[is_no];
1097 #endif
1098         for (k=0; k<max1; k++,ct1++) {
1099 #if defined (PETSC_USE_CTABLE)
1100 	  ierr = PetscTableFind(lrow1_grow1,sbuf1_i[ct1]+1,&row);CHKERRQ(ierr);
1101           row--;
1102           if (row < 0) { SETERRQ(PETSC_ERR_PLIB,"row not found in table"); }
1103 #else
1104           row  = rmap_i[sbuf1_i[ct1]]; /* the val in the new matrix to be */
1105 #endif
1106           max2 = rbuf2_i[ct1];
1107           for (l=0; l<max2; l++,ct2++) {
1108 #if defined (PETSC_USE_CTABLE)
1109 	    ierr = PetscTableFind(lcol1_gcol1,rbuf3_i[ct2]+1,&tt);CHKERRQ(ierr);
1110 	    if (tt) {
1111               lens_i[row]++;
1112             }
1113 #else
1114             if (cmap_i[rbuf3_i[ct2]]) {
1115               lens_i[row]++;
1116             }
1117 #endif
1118           }
1119         }
1120       }
1121     }
1122   }
1123   ierr = PetscFree(r_status3);CHKERRQ(ierr);
1124   ierr = PetscFree(r_waits3);CHKERRQ(ierr);
1125   if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits3,s_status3);CHKERRQ(ierr);}
1126   ierr = PetscFree(s_status3);CHKERRQ(ierr);
1127   ierr = PetscFree(s_waits3);CHKERRQ(ierr);
1128 
1129   /* Create the submatrices */
1130   if (scall == MAT_REUSE_MATRIX) {
1131     /*
1132         Assumes new rows are same length as the old rows, hence bug!
1133     */
1134     for (i=0; i<ismax; i++) {
1135       mat = (Mat_SeqBAIJ *)(submats[i]->data);
1136       if ((mat->mbs != nrow[i]) || (mat->nbs != ncol[i] || C->rmap->bs != bs)) {
1137         SETERRQ(PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. wrong size");
1138       }
1139       ierr = PetscMemcmp(mat->ilen,lens[i],mat->mbs *sizeof(PetscInt),&flag);CHKERRQ(ierr);
1140       if (!flag) {
1141         SETERRQ(PETSC_ERR_ARG_INCOMP,"Cannot reuse matrix. wrong no of nonzeros");
1142       }
1143       /* Initial matrix as if empty */
1144       ierr = PetscMemzero(mat->ilen,mat->mbs*sizeof(PetscInt));CHKERRQ(ierr);
1145       submats[i]->factor = C->factor;
1146     }
1147   } else {
1148     for (i=0; i<ismax; i++) {
1149       ierr = MatCreate(PETSC_COMM_SELF,submats+i);CHKERRQ(ierr);
1150       ierr = MatSetSizes(submats[i],nrow[i]*bs,ncol[i]*bs,nrow[i]*bs,ncol[i]*bs);CHKERRQ(ierr);
1151       ierr = MatSetType(submats[i],((PetscObject)A)->type_name);CHKERRQ(ierr);
1152       ierr = MatSeqBAIJSetPreallocation(submats[i],C->rmap->bs,0,lens[i]);CHKERRQ(ierr);
1153       ierr = MatSeqSBAIJSetPreallocation(submats[i],C->rmap->bs,0,lens[i]);CHKERRQ(ierr);
1154     }
1155   }
1156 
1157   /* Assemble the matrices */
1158   /* First assemble the local rows */
1159   {
1160     PetscInt       ilen_row,*imat_ilen,*imat_j,*imat_i;
1161     MatScalar *imat_a;
1162 
1163     for (i=0; i<ismax; i++) {
1164       mat       = (Mat_SeqBAIJ*)submats[i]->data;
1165       imat_ilen = mat->ilen;
1166       imat_j    = mat->j;
1167       imat_i    = mat->i;
1168       imat_a    = mat->a;
1169 
1170 #if defined (PETSC_USE_CTABLE)
1171       lcol1_gcol1 = colmaps[i];
1172       lrow1_grow1 = rowmaps[i];
1173 #else
1174       cmap_i    = cmap[i];
1175       rmap_i    = rmap[i];
1176 #endif
1177       irow_i    = irow[i];
1178       jmax      = nrow[i];
1179       for (j=0; j<jmax; j++) {
1180         row      = irow_i[j];
1181 #if defined (PETSC_USE_CTABLE)
1182 	ierr = PetscGetProc(row,size,c->rangebs,&proc);CHKERRQ(ierr);
1183 #else
1184 	proc = rtable[row];
1185 #endif
1186         if (proc == rank) {
1187           row      = row - rstart;
1188           nzA      = a_i[row+1] - a_i[row];
1189           nzB      = b_i[row+1] - b_i[row];
1190           cworkA   = a_j + a_i[row];
1191           cworkB   = b_j + b_i[row];
1192           vworkA   = a_a + a_i[row]*bs2;
1193           vworkB   = b_a + b_i[row]*bs2;
1194 #if defined (PETSC_USE_CTABLE)
1195 	  ierr = PetscTableFind(lrow1_grow1,row+rstart+1,&row);CHKERRQ(ierr);
1196           row--;
1197           if (row < 0) { SETERRQ(PETSC_ERR_PLIB,"row not found in table"); }
1198 #else
1199           row      = rmap_i[row + rstart];
1200 #endif
1201           mat_i    = imat_i[row];
1202           mat_a    = imat_a + mat_i*bs2;
1203           mat_j    = imat_j + mat_i;
1204           ilen_row = imat_ilen[row];
1205 
1206           /* load the column indices for this row into cols*/
1207           for (l=0; l<nzB; l++) {
1208 	    if ((ctmp = bmap[cworkB[l]]) < cstart) {
1209 #if defined (PETSC_USE_CTABLE)
1210 	      ierr = PetscTableFind(lcol1_gcol1,ctmp+1,&tcol);CHKERRQ(ierr);
1211 	      if (tcol) {
1212 #else
1213               if ((tcol = cmap_i[ctmp])) {
1214 #endif
1215                 *mat_j++ = tcol - 1;
1216                 ierr     = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1217                 mat_a   += bs2;
1218                 ilen_row++;
1219               }
1220             } else break;
1221           }
1222           imark = l;
1223           for (l=0; l<nzA; l++) {
1224 #if defined (PETSC_USE_CTABLE)
1225 	    ierr = PetscTableFind(lcol1_gcol1,cstart+cworkA[l]+1,&tcol);CHKERRQ(ierr);
1226 	    if (tcol) {
1227 #else
1228 	    if ((tcol = cmap_i[cstart + cworkA[l]])) {
1229 #endif
1230               *mat_j++ = tcol - 1;
1231               ierr     = PetscMemcpy(mat_a,vworkA+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1232               mat_a   += bs2;
1233               ilen_row++;
1234             }
1235           }
1236           for (l=imark; l<nzB; l++) {
1237 #if defined (PETSC_USE_CTABLE)
1238 	    ierr = PetscTableFind(lcol1_gcol1,bmap[cworkB[l]]+1,&tcol);CHKERRQ(ierr);
1239 	    if (tcol) {
1240 #else
1241             if ((tcol = cmap_i[bmap[cworkB[l]]])) {
1242 #endif
1243               *mat_j++ = tcol - 1;
1244               ierr     = PetscMemcpy(mat_a,vworkB+l*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1245               mat_a   += bs2;
1246               ilen_row++;
1247             }
1248           }
1249           imat_ilen[row] = ilen_row;
1250         }
1251       }
1252 
1253     }
1254   }
1255 
1256   /*   Now assemble the off proc rows*/
1257   {
1258     PetscInt    *sbuf1_i,*rbuf2_i,*rbuf3_i,*imat_ilen,ilen;
1259     PetscInt    *imat_j,*imat_i;
1260     MatScalar   *imat_a,*rbuf4_i;
1261     PetscMPIInt ii;
1262 
1263     for (tmp2=0; tmp2<nrqs; tmp2++) {
1264       ierr    = MPI_Waitany(nrqs,r_waits4,&ii,r_status4+tmp2);CHKERRQ(ierr);
1265       idex   = pa[ii];
1266       sbuf1_i = sbuf1[idex];
1267       jmax    = sbuf1_i[0];
1268       ct1     = 2*jmax + 1;
1269       ct2     = 0;
1270       rbuf2_i = rbuf2[ii];
1271       rbuf3_i = rbuf3[ii];
1272       rbuf4_i = rbuf4[ii];
1273       for (j=1; j<=jmax; j++) {
1274         is_no     = sbuf1_i[2*j-1];
1275 #if defined (PETSC_USE_CTABLE)
1276 	lrow1_grow1 = rowmaps[is_no];
1277 	lcol1_gcol1 = colmaps[is_no];
1278 #else
1279         rmap_i    = rmap[is_no];
1280         cmap_i    = cmap[is_no];
1281 #endif
1282         mat       = (Mat_SeqBAIJ*)submats[is_no]->data;
1283         imat_ilen = mat->ilen;
1284         imat_j    = mat->j;
1285         imat_i    = mat->i;
1286         imat_a    = mat->a;
1287         max1      = sbuf1_i[2*j];
1288         for (k=0; k<max1; k++,ct1++) {
1289           row   = sbuf1_i[ct1];
1290 #if defined (PETSC_USE_CTABLE)
1291 	  ierr = PetscTableFind(lrow1_grow1,row+1,&row);CHKERRQ(ierr);
1292           row--;
1293           if(row < 0) { SETERRQ(PETSC_ERR_PLIB,"row not found in table"); }
1294 #else
1295           row   = rmap_i[row];
1296 #endif
1297           ilen  = imat_ilen[row];
1298           mat_i = imat_i[row];
1299           mat_a = imat_a + mat_i*bs2;
1300           mat_j = imat_j + mat_i;
1301           max2 = rbuf2_i[ct1];
1302           for (l=0; l<max2; l++,ct2++) {
1303 #if defined (PETSC_USE_CTABLE)
1304 	    ierr = PetscTableFind(lcol1_gcol1,rbuf3_i[ct2]+1,&tcol);CHKERRQ(ierr);
1305 	    if (tcol) {
1306 #else
1307 	    if ((tcol = cmap_i[rbuf3_i[ct2]])) {
1308 #endif
1309               *mat_j++    = tcol - 1;
1310               /* *mat_a++= rbuf4_i[ct2]; */
1311               ierr        = PetscMemcpy(mat_a,rbuf4_i+ct2*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1312               mat_a      += bs2;
1313               ilen++;
1314             }
1315           }
1316           imat_ilen[row] = ilen;
1317         }
1318       }
1319     }
1320   }
1321   ierr = PetscFree(r_status4);CHKERRQ(ierr);
1322   ierr = PetscFree(r_waits4);CHKERRQ(ierr);
1323   if (nrqr) {ierr = MPI_Waitall(nrqr,s_waits4,s_status4);CHKERRQ(ierr);}
1324   ierr = PetscFree(s_waits4);CHKERRQ(ierr);
1325   ierr = PetscFree(s_status4);CHKERRQ(ierr);
1326 
1327   /* Restore the indices */
1328   for (i=0; i<ismax; i++) {
1329     ierr = ISRestoreIndices(isrow[i],irow+i);CHKERRQ(ierr);
1330     ierr = ISRestoreIndices(iscol[i],icol+i);CHKERRQ(ierr);
1331   }
1332 
1333   /* Destroy allocated memory */
1334 #if defined(PETSC_USE_CTABLE)
1335   ierr = PetscFree4(irow,icol,nrow,ncol);CHKERRQ(ierr);
1336 #else
1337   ierr = PetscFree5(irow,icol,nrow,ncol,rtable);CHKERRQ(ierr);
1338 #endif
1339   ierr = PetscFree4(w1,w2,w3,w4);CHKERRQ(ierr);
1340   ierr = PetscFree(pa);CHKERRQ(ierr);
1341 
1342   ierr = PetscFree4(sbuf1,ptr,tmp,ctr);CHKERRQ(ierr);
1343   ierr = PetscFree(sbuf1);CHKERRQ(ierr);
1344   ierr = PetscFree(rbuf2);CHKERRQ(ierr);
1345   for (i=0; i<nrqr; ++i) {
1346     ierr = PetscFree(sbuf2[i]);CHKERRQ(ierr);
1347   }
1348   for (i=0; i<nrqs; ++i) {
1349     ierr = PetscFree(rbuf3[i]);CHKERRQ(ierr);
1350     ierr = PetscFree(rbuf4[i]);CHKERRQ(ierr);
1351   }
1352   ierr = PetscFree3(sbuf2,req_size,req_source);CHKERRQ(ierr);
1353   ierr = PetscFree(rbuf3);CHKERRQ(ierr);
1354   ierr = PetscFree(rbuf4);CHKERRQ(ierr);
1355   ierr = PetscFree(sbuf_aj[0]);CHKERRQ(ierr);
1356   ierr = PetscFree(sbuf_aj);CHKERRQ(ierr);
1357   ierr = PetscFree(sbuf_aa[0]);CHKERRQ(ierr);
1358   ierr = PetscFree(sbuf_aa);CHKERRQ(ierr);
1359 
1360 #if defined (PETSC_USE_CTABLE)
1361   for (i=0; i<ismax; i++){
1362     ierr = PetscTableDestroy(rowmaps[i]);CHKERRQ(ierr);
1363     ierr = PetscTableDestroy(colmaps[i]);CHKERRQ(ierr);
1364   }
1365   ierr = PetscFree(colmaps);CHKERRQ(ierr);
1366   ierr = PetscFree(rowmaps);CHKERRQ(ierr);
1367 #else
1368   ierr = PetscFree(rmap);CHKERRQ(ierr);
1369   ierr = PetscFree(cmap[0]);CHKERRQ(ierr);
1370   ierr = PetscFree(cmap);CHKERRQ(ierr);
1371 #endif
1372   ierr = PetscFree(lens);CHKERRQ(ierr);
1373 
1374   for (i=0; i<ismax; i++) {
1375     ierr = MatAssemblyBegin(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1376     ierr = MatAssemblyEnd(submats[i],MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1377   }
1378   PetscFunctionReturn(0);
1379 }
1380 
1381