xref: /petsc/src/mat/impls/sbaij/mpi/sbaijov.c (revision 5483b11dfd67de01cae3704bdb6f9c5d29f1b185)
1 /*$Id: sbaijov.c,v 1.65 2001/08/06 21:15:42 bsmith Exp $*/
2 
3 /*
4    Routines to compute overlapping regions of a parallel MPI matrix.
5    Used for finding submatrices that were shared across processors.
6 */
7 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
8 #include "petscbt.h"
9 
10 static int MatIncreaseOverlap_MPISBAIJ_Once(Mat,int,IS *);
11 static int MatIncreaseOverlap_MPISBAIJ_Local(Mat,int *,int,int **,PetscBT*);
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ"
15 int MatIncreaseOverlap_MPISBAIJ(Mat C,int is_max,IS is[],int ov)
16 {
17   Mat_MPISBAIJ  *c = (Mat_MPISBAIJ*)C->data;
18   int           i,ierr,N=C->N, bs=c->bs;
19   IS            *is_new;
20 
21   PetscFunctionBegin;
22   ierr = PetscMalloc(is_max*sizeof(IS),&is_new);CHKERRQ(ierr);
23   /* Convert the indices into block format */
24   ierr = ISCompressIndicesGeneral(N,bs,is_max,is,is_new);CHKERRQ(ierr);
25   if (ov < 0){ SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative overlap specified\n");}
26   for (i=0; i<ov; ++i) {
27     ierr = MatIncreaseOverlap_MPISBAIJ_Once(C,is_max,is_new);CHKERRQ(ierr);
28   }
29   for (i=0; i<is_max; i++) {ierr = ISDestroy(is[i]);CHKERRQ(ierr);}
30   ierr = ISExpandIndicesGeneral(N,bs,is_max,is_new,is);CHKERRQ(ierr);
31   for (i=0; i<is_max; i++) {ierr = ISDestroy(is_new[i]);CHKERRQ(ierr);}
32   ierr = PetscFree(is_new);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 typedef enum {MINE,OTHER} WhoseOwner;
37 /*  data1, odata1 and odata2 are packed in the format (for communication):
38        data[0]          = is_max, no of is
39        data[1]          = size of is[0]
40         ...
41        data[is_max]     = size of is[is_max-1]
42        data[is_max + 1] = data(is[0])
43         ...
44        data[is_max+1+sum(size of is[k]), k=0,...,i-1] = data(is[i])
45         ...
46    data2 is packed in the format (for creating output is[]):
47        data[0]          = is_max, no of is
48        data[1]          = size of is[0]
49         ...
50        data[is_max]     = size of is[is_max-1]
51        data[is_max + 1] = data(is[0])
52         ...
53        data[is_max + 1 + Mbs*i) = data(is[i])
54         ...
55 */
56 #undef USEBcol /* not working yet! e.g., mpirun -np 3 ex92 -mat_size 100 -nd 1 (02/24/2004) */
57 #undef __FUNCT__
58 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Once"
59 static int MatIncreaseOverlap_MPISBAIJ_Once(Mat C,int is_max,IS is[])
60 {
61   Mat_MPISBAIJ  *c = (Mat_MPISBAIJ*)C->data;
62   int         len,idx,*idx_i,isz,col,*n,*data1,**data1_start,*data2,*data2_i,*data,*data_i,
63               size,rank,Mbs,i,j,k,ierr,nrqs,*odata1,*odata2,
64               tag1,tag2,flag,proc_id,**odata2_ptr,waits_idx,*ctable=0,*btable,len_b;
65   int         nrqr,*id_r1,*len_r1,proc_end,*iwork,*len_s,*data1_i;
66   char        *t_p;
67   MPI_Comm    comm;
68   MPI_Request *s_waits1,*s_waits2,r_req,*r_waits1;
69   MPI_Status  *s_status,r_status;
70   PetscBT     *table=0;  /* mark indices of this processor's is[] */
71   PetscBT     table_i;
72   PetscBT     otable; /* mark indices of other processors' is[] */
73 
74   PetscFunctionBegin;
75 
76   comm = C->comm;
77   size = c->size;
78   rank = c->rank;
79   Mbs  = c->Mbs;
80 
81   ierr = PetscObjectGetNewTag((PetscObject)C,&tag1);CHKERRQ(ierr);
82   ierr = PetscObjectGetNewTag((PetscObject)C,&tag2);CHKERRQ(ierr);
83 
84   /* 1. Send this processor's is[] to other processors */
85   /*---------------------------------------------------*/
86   ierr = PetscMalloc(is_max*sizeof(int),&n);CHKERRQ(ierr);
87   len = 0;
88   for (i=0; i<is_max; i++) {
89     ierr = ISGetLocalSize(is[i],&n[i]);CHKERRQ(ierr);
90     len += n[i];
91   }
92   if (len == 0) {
93     is_max = 0;
94   } else {
95     len += 1 + is_max; /* max length of data1 for one processor */
96   }
97   /* ierr = PetscPrintf(PETSC_COMM_SELF, " [%d] len=%d, is_max: %d\n",rank,len,is_max); */
98 
99   ierr   = PetscMalloc(size*3*sizeof(int),&len_s);CHKERRQ(ierr);
100   btable = len_s + size;
101   iwork  = btable + size;
102 
103   ierr = PetscMalloc((size*len+1)*sizeof(int),&data1);CHKERRQ(ierr);
104   ierr = PetscMalloc(size*sizeof(int*),&data1_start);CHKERRQ(ierr);
105   for (i=0; i<size; i++) data1_start[i] = data1 + i*len;
106 
107   if (is_max){
108     /* create hash table ctable which maps c->row to proc_id) */
109     ierr = PetscMalloc(Mbs*sizeof(int),&ctable);CHKERRQ(ierr);
110     for (proc_id=0,j=0; proc_id<size; proc_id++) {
111       for (; j<c->rowners[proc_id+1]; j++) {
112         ctable[j] = proc_id;
113       }
114     }
115 
116     /* create tables for marking indices */
117     len_b = is_max*sizeof(PetscBT) + (Mbs/PETSC_BITS_PER_BYTE+1)*is_max*sizeof(char) + 1;
118     ierr = PetscMalloc(len_b,&table);CHKERRQ(ierr);
119     t_p  = (char *)(table + is_max);
120     for (i=0; i<is_max; i++) {
121       table[i]  = t_p  + (Mbs/PETSC_BITS_PER_BYTE+1)*i;
122     }
123 
124 #ifdef USEBcol
125     Vec        vfrom_mpi,vto_seq;
126     int        bs=c->bs,Bn = c->B->n,Bnbs = Bn/bs,N,low,*Bowners;
127     PetscScalar garray[Bnbs],*array;
128 #ifdef NEW
129     IS is_local,is_gl;
130     ierr = ISCreateGeneral(comm,Bnbs,c->garray,&is_local);CHKERRQ(ierr);
131     ierr = ISAllGather(is_local, &is_gl);CHKERRQ(ierr);
132     if (rank == 1){
133       ierr = ISView(is_gl,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr);
134     }
135     ierr = ISDestroy(is_gl);
136     ierr = ISDestroy(is_local);
137 
138 #endif /* NEW */
139     /* create vfrom_mpi, a mpi vector holding local B->garray */
140     for (i=0; i<Bnbs; i++) garray[i] = c->garray[i];
141     ierr = VecCreateMPIWithArray(comm,Bnbs,PETSC_DECIDE,garray,&vfrom_mpi);CHKERRQ(ierr);
142 
143     /* create vto_seq, a seq vector for holding B->garray of all processors */
144     ierr = VecGetSize(vfrom_mpi,&N);CHKERRQ(ierr);
145     ierr = VecGetOwnershipRange(vfrom_mpi,&low,PETSC_NULL);CHKERRQ(ierr);
146 
147     ierr = PetscMalloc((size+1)*sizeof(int),&Bowners);CHKERRQ(ierr);
148     ierr = MPI_Allgather(&low,1,MPI_INT,Bowners,1,MPI_INT,comm);CHKERRQ(ierr);
149     Bowners[size] = N;
150     ierr = VecConvertMPIToSeqAll(vfrom_mpi,&vto_seq);CHKERRQ(ierr);
151 
152     if (rank == 0){
153       /* ierr = VecView(vto_seq,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); */
154       for (i = 0; i<=size; i++){
155         ierr = PetscPrintf(PETSC_COMM_SELF," Bowners[%d]=%d, N=%d\n",i,Bowners[i],N);
156       }
157     }
158     ierr = VecDestroy(vfrom_mpi);CHKERRQ(ierr);
159 #endif
160 
161 #ifdef USEBcol
162     /* hash table table_i[idx] = 1 if idx is an index of one of the is[] array
163                                = 0 otherwise */
164     table_i = table[0];
165     ierr    = PetscBTMemzero(Mbs,table_i);CHKERRQ(ierr);
166     for (i=0; i<is_max; i++){
167       ierr = ISGetIndices(is[i],&idx_i);CHKERRQ(ierr);
168       for (j=0; j<n[i]; j++){
169         idx = idx_i[j];
170         ierr = PetscBTSet(table_i,idx);CHKERRQ(ierr);
171       }
172       ierr = ISRestoreIndices(is[i],&idx_i);CHKERRQ(ierr);
173     }
174 
175     ierr = VecGetArray(vto_seq,&array);
176     for (i=0; i<size; i++){
177       btable[i] = 0;
178       for (j = Bowners[i]; j<Bowners[i+1]; j++){ /* go through B cols */
179         idx = (int)array[j];
180         if(PetscBTLookup(table_i,idx)){
181           btable[i] = 1;
182           break;
183         }
184       }
185 
186       if (!btable[i]){ /* go through A cols */
187         for (j = c->rowners[i]; j<c->rowners[i+1]; j++){
188           if(PetscBTLookup(table_i,j)){
189             btable[i] = 1;
190             break;
191           }
192         }
193       }
194     }
195     if (rank == 1){
196       for (i=0; i<size; i++) printf(" %d\n",btable[i]);
197     }
198     ierr = VecRestoreArray(vto_seq,&array);CHKERRQ(ierr);
199     ierr = PetscFree(Bowners);CHKERRQ(ierr);
200     ierr = VecDestroy(vto_seq);CHKERRQ(ierr);
201 #endif
202   } /* if (is_max) */
203 
204   /* evaluate communication - mesg to who, length, and buffer space */
205   for (i=0; i<size; i++) {
206     len_s[i] = 0;
207   }
208 
209   /* header of data1 */
210   for (proc_id=0; proc_id<size; proc_id++){
211     iwork[proc_id] = 0;
212     *data1_start[proc_id] = is_max;
213     data1_start[proc_id]++;
214     for (j=0; j<is_max; j++) {
215       if (proc_id == rank){
216         *data1_start[proc_id] = n[j];
217       } else {
218         *data1_start[proc_id] = 0;
219       }
220       data1_start[proc_id]++;
221     }
222   }
223 
224   for (i=0; i<is_max; i++) {
225     ierr = ISGetIndices(is[i],&idx_i);CHKERRQ(ierr);
226     ierr = PetscSortInt(n[i],idx_i);CHKERRQ(ierr);
227     for (j=0; j<n[i]; j++){
228       idx = idx_i[j];
229       *data1_start[rank] = idx; data1_start[rank]++; /* for local proccessing */
230       proc_end = ctable[idx];
231       for (proc_id=0;  proc_id<=proc_end; proc_id++){ /* for others to process */
232         if (proc_id != rank ){
233 #ifdef USEBcol
234           if ( (proc_id < proc_end && btable[proc_id]) || proc_id == proc_end){
235 #endif
236             *data1_start[proc_id] = idx; data1_start[proc_id]++;
237             len_s[proc_id]++;
238 #ifdef USEBcol
239           }
240 #endif
241         }
242       }
243     }
244 
245     /* update header data */
246     for (proc_id=0; proc_id<=proc_end; proc_id++){
247       if (proc_id== rank) continue;
248       *(data1 + proc_id*len + 1 + i) = len_s[proc_id] - iwork[proc_id];
249       iwork[proc_id] = len_s[proc_id] ;
250     }
251 
252     ierr = ISRestoreIndices(is[i],&idx_i);CHKERRQ(ierr);
253   } /* for (i=0; i<is_max; i++) */
254 
255   nrqs = 0; nrqr = 0;
256   for (i=0; i<size; i++){
257     data1_start[i] = data1 + i*len;
258     if (len_s[i]){
259       nrqs++;
260       len_s[i] += 1 + is_max; /* add no. of header msg */
261     }
262   }
263 
264   for (i=0; i<is_max; i++) {
265     ierr = ISDestroy(is[i]);CHKERRQ(ierr);
266   }
267   ierr = PetscFree(n);CHKERRQ(ierr);
268   if (ctable){ierr = PetscFree(ctable);CHKERRQ(ierr);}
269 
270   /* Determine the number of messages to expect, their lengths, from from-ids */
271   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&nrqr);CHKERRQ(ierr);
272   ierr = PetscGatherMessageLengths(comm,nrqs,nrqr,len_s,&id_r1,&len_r1);CHKERRQ(ierr);
273   /* ierr = PetscPrintf(PETSC_COMM_SELF, "[%d] nrqs: %d, nrqr: %d\n",rank,nrqs,nrqr); */
274 
275   /*  Now  post the sends */
276   ierr = PetscMalloc(2*size*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr);
277   s_waits2 = s_waits1 + size;
278   k = 0;
279   for (proc_id=0; proc_id<size; proc_id++){  /* send data1 to processor [proc_id] */
280     if (len_s[proc_id]){
281       ierr = MPI_Isend(data1_start[proc_id],len_s[proc_id],MPI_INT,proc_id,tag1,comm,s_waits1+k);CHKERRQ(ierr);
282       k++;
283     }
284   }
285   ierr = PetscFree(data1_start);CHKERRQ(ierr);
286 
287   /* 2. Do local work on this processor's is[] */
288   /*-------------------------------------------*/
289   ierr = PetscBTCreate(Mbs,otable);CHKERRQ(ierr);
290   ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,data1,MINE,&data,table);CHKERRQ(ierr);
291 
292   /* 3. Receive other's is[] and process. Then send back */
293   /*-----------------------------------------------------*/
294   len = 0;
295   for (i=0; i<nrqr; i++){
296     if (len_r1[i] > len)len = len_r1[i];
297     /* ierr = PetscPrintf(PETSC_COMM_SELF, "[%d] expect to recv len=%d from [%d]\n",rank,len_r1[i],id_r1[i]); */
298   }
299   ierr = PetscMalloc((len+1)*sizeof(int),&odata1);CHKERRQ(ierr);
300   ierr = PetscMalloc(size*sizeof(int**),&odata2_ptr);CHKERRQ(ierr);
301   k = 0;
302   while (k < nrqr){
303     /* Receive messages */
304     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag1,comm,&flag,&r_status);CHKERRQ(ierr);
305     if (flag){
306       ierr = MPI_Get_count(&r_status,MPI_INT,&len);CHKERRQ(ierr);
307       proc_id = r_status.MPI_SOURCE;
308       ierr = MPI_Irecv(odata1,len,MPI_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr);
309       ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr);
310       /* ierr = PetscPrintf(PETSC_COMM_SELF, " [%d] recv %d from [%d]\n",rank,len,proc_id); */
311 
312       /*  Process messages */
313       ierr = MatIncreaseOverlap_MPISBAIJ_Local(C,odata1,OTHER,&odata2_ptr[k],&otable);CHKERRQ(ierr);
314       odata2 = odata2_ptr[k];
315       len = 1 + odata2[0];
316       for (i=0; i<odata2[0]; i++){
317         len += odata2[1 + i];
318       }
319 
320       /* Send messages back */
321       ierr = MPI_Isend(odata2,len,MPI_INT,proc_id,tag2,comm,s_waits2+k);CHKERRQ(ierr);
322       /* ierr = PetscPrintf(PETSC_COMM_SELF," [%d] send %d back to [%d] \n",rank,len,proc_id); */
323       k++;
324     }
325   }
326   ierr = PetscFree(odata1);CHKERRQ(ierr);
327 
328   /* 4. Receive work done on other processors, then merge */
329   /*--------------------------------------------------------*/
330   /* Allocate memory for incoming data */
331   len = (1+is_max*(Mbs+1));
332   ierr = PetscMalloc(len*sizeof(int),&data2);CHKERRQ(ierr);
333 
334   k = 0;
335   while (k < nrqs){
336     /* Receive messages */
337     ierr = MPI_Iprobe(MPI_ANY_SOURCE,tag2,comm,&flag,&r_status);
338     if (flag){
339       ierr = MPI_Get_count(&r_status,MPI_INT,&len);CHKERRQ(ierr);
340       proc_id = r_status.MPI_SOURCE;
341       ierr = MPI_Irecv(data2,len,MPI_INT,proc_id,r_status.MPI_TAG,comm,&r_req);CHKERRQ(ierr);
342       ierr = MPI_Wait(&r_req,&r_status);CHKERRQ(ierr);
343       /* ierr = PetscPrintf(PETSC_COMM_SELF," [%d] recv %d from [%d], data2:\n",rank,len,proc_id); */
344       if (len > 1+is_max){ /* Add data2 into data */
345         data2_i = data2 + 1 + is_max;
346         for (i=0; i<is_max; i++){
347           table_i = table[i];
348           data_i  = data + 1 + is_max + Mbs*i;
349           isz     = data[1+i];
350           for (j=0; j<data2[1+i]; j++){
351             col = data2_i[j];
352             if (!PetscBTLookupSet(table_i,col)) {data_i[isz++] = col;}
353           }
354           data[1+i] = isz;
355           if (i < is_max - 1) data2_i += data2[1+i];
356         }
357       }
358       k++;
359     }
360   }
361 
362   /* phase 1 sends are complete */
363   ierr = PetscMalloc(size*sizeof(MPI_Status),&s_status);CHKERRQ(ierr);
364   if (nrqs){
365     ierr = MPI_Waitall(nrqs,s_waits1,s_status);CHKERRQ(ierr);
366   }
367   ierr = PetscFree(data1);CHKERRQ(ierr);
368 
369   /* phase 3 sends are complete */
370   if (nrqr){
371     ierr = MPI_Waitall(nrqr,s_waits2,s_status);CHKERRQ(ierr);
372   }
373   for (k=0; k<nrqr; k++){
374     ierr = PetscFree(odata2_ptr[k]);CHKERRQ(ierr);
375   }
376   ierr = PetscFree(odata2_ptr);CHKERRQ(ierr);
377 
378   /* 5. Create new is[] */
379   /*--------------------*/
380   for (i=0; i<is_max; i++) {
381     data_i = data + 1 + is_max + Mbs*i;
382     ierr = ISCreateGeneral(PETSC_COMM_SELF,data[1+i],data_i,is+i);CHKERRQ(ierr);
383   }
384 
385   ierr = PetscFree(data2);CHKERRQ(ierr);
386   ierr = PetscFree(data);CHKERRQ(ierr);
387   ierr = PetscFree(s_waits1);CHKERRQ(ierr);
388   ierr = PetscFree(s_status);CHKERRQ(ierr);
389   if (table) {ierr = PetscFree(table);CHKERRQ(ierr);}
390   ierr = PetscBTDestroy(otable);CHKERRQ(ierr);
391 
392   ierr = PetscFree(len_s);CHKERRQ(ierr);
393   ierr = PetscFree(id_r1);CHKERRQ(ierr);
394   ierr = PetscFree(len_r1);CHKERRQ(ierr);
395 
396   PetscFunctionReturn(0);
397 }
398 
399 #undef __FUNCT__
400 #define __FUNCT__ "MatIncreaseOverlap_MPISBAIJ_Local"
401 /*
402    MatIncreaseOverlap_MPISBAIJ_Local - Called by MatIncreaseOverlap, to do
403        the work on the local processor.
404 
405      Inputs:
406       C      - MAT_MPISBAIJ;
407       data   - holds is[]. See MatIncreaseOverlap_MPISBAIJ_Once() for the format.
408       whose  - whose is[] to be processed,
409                MINE:  this processor's is[]
410                OTHER: other processor's is[]
411      Output:
412        data_new  - whose = MINE:
413                      holds input and newly found indices in the same format as data
414                    whose = OTHER:
415                      only holds the newly found indices
416        table     - table[i]: mark the indices of is[i], i=0,...,is_max. Used only in the case 'whose=MINE'.
417 */
418 static int MatIncreaseOverlap_MPISBAIJ_Local(Mat C,int *data,int whose,int **data_new,PetscBT *table)
419 {
420   Mat_MPISBAIJ *c = (Mat_MPISBAIJ*)C->data;
421   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(c->A)->data;
422   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(c->B)->data;
423   int          ierr,row,mbs,Mbs,*nidx,*nidx_i,col,isz,isz0,*ai,*aj,*bi,*bj,*garray,rstart,l;
424   int          a_start,a_end,b_start,b_end,i,j,k,is_max,*idx_i,n;
425   PetscBT      table0;  /* mark the indices of input is[] for look up */
426   PetscBT      table_i; /* poits to i-th table. When whose=OTHER, a single table is used for all is[] */
427 
428   PetscFunctionBegin;
429   Mbs = c->Mbs; mbs = a->mbs;
430   ai = a->i; aj = a->j;
431   bi = b->i; bj = b->j;
432   garray = c->garray;
433   rstart = c->rstart;
434   is_max = data[0];
435 
436   ierr = PetscBTCreate(Mbs,table0);CHKERRQ(ierr);
437 
438   ierr = PetscMalloc((1+is_max*(Mbs+1))*sizeof(int),&nidx);CHKERRQ(ierr);
439   nidx[0] = is_max;
440 
441   idx_i  = data + is_max + 1; /* ptr to input is[0] array */
442   nidx_i = nidx + is_max + 1; /* ptr to output is[0] array */
443   for (i=0; i<is_max; i++) { /* for each is */
444     isz  = 0;
445     n = data[1+i]; /* size of input is[i] */
446 
447     /* initialize table_i, set table0 */
448     if (whose == MINE){ /* process this processor's is[] */
449       table_i = table[i];
450       nidx_i  = nidx + 1+ is_max + Mbs*i;
451     } else {            /* process other processor's is[] - only use one temp table */
452       table_i = *table;
453     }
454     ierr = PetscBTMemzero(Mbs,table_i);CHKERRQ(ierr);
455     ierr = PetscBTMemzero(Mbs,table0);CHKERRQ(ierr);
456     if (n > 0) {
457       isz0 = 0;
458       for (j=0; j<n; j++){
459         col = idx_i[j];
460         if (col >= Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"index col %d >= Mbs %d",col,Mbs);
461         if(!PetscBTLookupSet(table_i,col)) {
462           ierr = PetscBTSet(table0,col);CHKERRQ(ierr);
463           if (whose == MINE) {nidx_i[isz0] = col;}
464           isz0++;
465         }
466       }
467 
468       if (whose == MINE) {isz = isz0;}
469       k = 0;  /* no. of indices from input is[i] that have been examined */
470       for (row=0; row<mbs; row++){
471         a_start = ai[row]; a_end = ai[row+1];
472         b_start = bi[row]; b_end = bi[row+1];
473         if (PetscBTLookup(table0,row+rstart)){ /* row is on input is[i]:
474            do row search: collect all col in this row */
475           for (l = a_start; l<a_end ; l++){ /* Amat */
476             col = aj[l] + rstart;
477             if (!PetscBTLookupSet(table_i,col)) {nidx_i[isz++] = col;}
478           }
479           for (l = b_start; l<b_end ; l++){ /* Bmat */
480             col = garray[bj[l]];
481             if (!PetscBTLookupSet(table_i,col)) {nidx_i[isz++] = col;}
482           }
483           k++;
484           if (k >= isz0) break; /* for (row=0; row<mbs; row++) */
485         } else { /* row is not on input is[i]:
486           do col serach: add row onto nidx_i if there is a col in nidx_i */
487           for (l = a_start; l<a_end ; l++){ /* Amat */
488             col = aj[l] + rstart;
489             if (PetscBTLookup(table0,col)){
490               if (!PetscBTLookupSet(table_i,row+rstart)) {nidx_i[isz++] = row+rstart;}
491               break; /* for l = start; l<end ; l++) */
492             }
493           }
494           for (l = b_start; l<b_end ; l++){ /* Bmat */
495             col = garray[bj[l]];
496             if (PetscBTLookup(table0,col)){
497               if (!PetscBTLookupSet(table_i,row+rstart)) {nidx_i[isz++] = row+rstart;}
498               break; /* for l = start; l<end ; l++) */
499             }
500           }
501         }
502       }
503     } /* if (n > 0) */
504 
505     if (i < is_max - 1){
506       idx_i  += n;   /* ptr to input is[i+1] array */
507       nidx_i += isz; /* ptr to output is[i+1] array */
508     }
509     nidx[1+i] = isz; /* size of new is[i] */
510   } /* for each is */
511   *data_new = nidx;
512   ierr = PetscBTDestroy(table0);CHKERRQ(ierr);
513 
514   PetscFunctionReturn(0);
515 }
516 
517 
518