xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ed1caa07d30b4bf52fa72dab33adf574bdef2af0)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (lastcol1 > col) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (lastcol2 > col) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; \
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIAIJ"
107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
110   PetscScalar    value;
111   PetscErrorCode ierr;
112   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
113   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
114   PetscTruth     roworiented = aij->roworiented;
115 
116   /* Some Variables required in the macro */
117   Mat            A = aij->A;
118   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
119   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
120   PetscScalar    *aa = a->a;
121   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
122   Mat            B = aij->B;
123   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
124   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
125   PetscScalar    *ba = b->a;
126 
127   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
128   PetscInt       nonew = a->nonew;
129   PetscScalar    *ap1,*ap2;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (im[i] < 0) continue;
134 #if defined(PETSC_USE_DEBUG)
135     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
136 #endif
137     if (im[i] >= rstart && im[i] < rend) {
138       row      = im[i] - rstart;
139       lastcol1 = -1;
140       rp1      = aj + ai[row];
141       ap1      = aa + ai[row];
142       rmax1    = aimax[row];
143       nrow1    = ailen[row];
144       low1     = 0;
145       high1    = nrow1;
146       lastcol2 = -1;
147       rp2      = bj + bi[row];
148       ap2      = ba + bi[row];
149       rmax2    = bimax[row];
150       nrow2    = bilen[row];
151       low2     = 0;
152       high2    = nrow2;
153 
154       for (j=0; j<n; j++) {
155         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
156         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
157         if (in[j] >= cstart && in[j] < cend){
158           col = in[j] - cstart;
159           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
160         } else if (in[j] < 0) continue;
161 #if defined(PETSC_USE_DEBUG)
162         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
163 #endif
164         else {
165           if (mat->was_assembled) {
166             if (!aij->colmap) {
167               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
168             }
169 #if defined (PETSC_USE_CTABLE)
170             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
171 	    col--;
172 #else
173             col = aij->colmap[in[j]] - 1;
174 #endif
175             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
176               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
177               col =  in[j];
178               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
179               B = aij->B;
180               b = (Mat_SeqAIJ*)B->data;
181               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
182               rp2      = bj + bi[row];
183               ap2      = ba + bi[row];
184               rmax2    = bimax[row];
185               nrow2    = bilen[row];
186               low2     = 0;
187               high2    = nrow2;
188               bm       = aij->B->m;
189               ba = b->a;
190             }
191           } else col = in[j];
192           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
193         }
194       }
195     } else {
196       if (!aij->donotstash) {
197         if (roworiented) {
198           if (ignorezeroentries && v[i*n] == 0.0) continue;
199           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
200         } else {
201           if (ignorezeroentries && v[i] == 0.0) continue;
202           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
203         }
204       }
205     }
206   }
207   PetscFunctionReturn(0);
208 }
209 
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatGetValues_MPIAIJ"
213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
214 {
215   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
216   PetscErrorCode ierr;
217   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
218   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
219 
220   PetscFunctionBegin;
221   for (i=0; i<m; i++) {
222     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
223     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
224     if (idxm[i] >= rstart && idxm[i] < rend) {
225       row = idxm[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
228         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
229         if (idxn[j] >= cstart && idxn[j] < cend){
230           col = idxn[j] - cstart;
231           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
232         } else {
233           if (!aij->colmap) {
234             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
235           }
236 #if defined (PETSC_USE_CTABLE)
237           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
238           col --;
239 #else
240           col = aij->colmap[idxn[j]] - 1;
241 #endif
242           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
243           else {
244             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
245           }
246         }
247       }
248     } else {
249       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
258 {
259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
260   PetscErrorCode ierr;
261   PetscInt       nstash,reallocs;
262   InsertMode     addv;
263 
264   PetscFunctionBegin;
265   if (aij->donotstash) {
266     PetscFunctionReturn(0);
267   }
268 
269   /* make sure all processors are either in INSERTMODE or ADDMODE */
270   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
271   if (addv == (ADD_VALUES|INSERT_VALUES)) {
272     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
273   }
274   mat->insertmode = addv; /* in case this processor had no cache */
275 
276   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
277   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
278   ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
288   PetscErrorCode ierr;
289   PetscMPIInt    n;
290   PetscInt       i,j,rstart,ncols,flg;
291   PetscInt       *row,*col,other_disassembled;
292   PetscScalar    *val;
293   InsertMode     addv = mat->insertmode;
294 
295   PetscFunctionBegin;
296   if (!aij->donotstash) {
297     while (1) {
298       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
299       if (!flg) break;
300 
301       for (i=0; i<n;) {
302         /* Now identify the consecutive vals belonging to the same row */
303         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
304         if (j < n) ncols = j-i;
305         else       ncols = n-i;
306         /* Now assemble all these values with a single function call */
307         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
308         i = j;
309       }
310     }
311     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
312   }
313   a->compressedrow.use     = PETSC_FALSE;
314   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
316 
317   /* determine if any processor has disassembled, if so we must
318      also disassemble ourselfs, in order that we may reassemble. */
319   /*
320      if nonzero structure of submatrix B cannot change then we know that
321      no processor disassembled thus we can skip this stuff
322   */
323   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
324     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
325     if (mat->was_assembled && !other_disassembled) {
326       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
327     }
328   }
329   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
330   b    = (Mat_SeqAIJ *)aij->B->data;
331 
332   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
333     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
334   }
335   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
336   b->compressedrow.use = PETSC_TRUE;
337   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
338   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
339 
340   if (aij->rowvalues) {
341     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
342     aij->rowvalues = 0;
343   }
344 
345   /* used by MatAXPY() */
346   a->xtoy = 0; b->xtoy = 0;
347   a->XtoY = 0; b->XtoY = 0;
348 
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
355 {
356   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
361   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNCT__
366 #define __FUNCT__ "MatZeroRows_MPIAIJ"
367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
368 {
369   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
370   PetscErrorCode ierr;
371   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
372   PetscInt       i,N,*rows,*owners = l->rowners;
373   PetscInt       *nprocs,j,idx,nsends,row;
374   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
375   PetscInt       *rvalues,count,base,slen,*source;
376   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
377   MPI_Comm       comm = A->comm;
378   MPI_Request    *send_waits,*recv_waits;
379   MPI_Status     recv_status,*send_status;
380   IS             istmp;
381 #if defined(PETSC_DEBUG)
382   PetscTruth     found = PETSC_FALSE;
383 #endif
384 
385   PetscFunctionBegin;
386   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
387   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
388 
389   /*  first count number of contributors to each processor */
390   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
391   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
392   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
393   j = 0;
394   for (i=0; i<N; i++) {
395     if (lastidx > (idx = rows[i])) j = 0;
396     lastidx = idx;
397     for (; j<size; j++) {
398       if (idx >= owners[j] && idx < owners[j+1]) {
399         nprocs[2*j]++;
400         nprocs[2*j+1] = 1;
401         owner[i] = j;
402 #if defined(PETSC_DEBUG)
403         found = PETSC_TRUE;
404 #endif
405         break;
406       }
407     }
408 #if defined(PETSC_DEBUG)
409     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
410     found = PETSC_FALSE;
411 #endif
412   }
413   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
414 
415   /* inform other processors of number of messages and max length*/
416   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
417 
418   /* post receives:   */
419   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
420   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
421   for (i=0; i<nrecvs; i++) {
422     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
423   }
424 
425   /* do sends:
426       1) starts[i] gives the starting index in svalues for stuff going to
427          the ith processor
428   */
429   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
430   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
431   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
432   starts[0] = 0;
433   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
434   for (i=0; i<N; i++) {
435     svalues[starts[owner[i]]++] = rows[i];
436   }
437   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
438 
439   starts[0] = 0;
440   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
441   count = 0;
442   for (i=0; i<size; i++) {
443     if (nprocs[2*i+1]) {
444       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
445     }
446   }
447   ierr = PetscFree(starts);CHKERRQ(ierr);
448 
449   base = owners[rank];
450 
451   /*  wait on receives */
452   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
453   source = lens + nrecvs;
454   count  = nrecvs; slen = 0;
455   while (count) {
456     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
457     /* unpack receives into our local space */
458     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
459     source[imdex]  = recv_status.MPI_SOURCE;
460     lens[imdex]    = n;
461     slen          += n;
462     count--;
463   }
464   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
465 
466   /* move the data into the send scatter */
467   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
468   count = 0;
469   for (i=0; i<nrecvs; i++) {
470     values = rvalues + i*nmax;
471     for (j=0; j<lens[i]; j++) {
472       lrows[count++] = values[j] - base;
473     }
474   }
475   ierr = PetscFree(rvalues);CHKERRQ(ierr);
476   ierr = PetscFree(lens);CHKERRQ(ierr);
477   ierr = PetscFree(owner);CHKERRQ(ierr);
478   ierr = PetscFree(nprocs);CHKERRQ(ierr);
479 
480   /* actually zap the local rows */
481   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
482   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
483 
484   /*
485         Zero the required rows. If the "diagonal block" of the matrix
486      is square and the user wishes to set the diagonal we use seperate
487      code so that MatSetValues() is not called for each diagonal allocating
488      new memory, thus calling lots of mallocs and slowing things down.
489 
490        Contributed by: Mathew Knepley
491   */
492   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
493   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
494   if (diag && (l->A->M == l->A->N)) {
495     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
496   } else if (diag) {
497     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
498     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
499       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
500 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
501     }
502     for (i = 0; i < slen; i++) {
503       row  = lrows[i] + rstart;
504       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
505     }
506     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
507     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
508   } else {
509     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
510   }
511   ierr = ISDestroy(istmp);CHKERRQ(ierr);
512   ierr = PetscFree(lrows);CHKERRQ(ierr);
513 
514   /* wait on sends */
515   if (nsends) {
516     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
517     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
518     ierr = PetscFree(send_status);CHKERRQ(ierr);
519   }
520   ierr = PetscFree(send_waits);CHKERRQ(ierr);
521   ierr = PetscFree(svalues);CHKERRQ(ierr);
522 
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatMult_MPIAIJ"
528 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
529 {
530   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
531   PetscErrorCode ierr;
532   PetscInt       nt;
533 
534   PetscFunctionBegin;
535   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
536   if (nt != A->n) {
537     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
538   }
539   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
540   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
541   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
542   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatMultAdd_MPIAIJ"
548 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
549 {
550   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
563 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567   PetscTruth     merged;
568 
569   PetscFunctionBegin;
570   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
571   /* do nondiagonal part */
572   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
573   if (!merged) {
574     /* send it on its way */
575     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
576     /* do local part */
577     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
578     /* receive remote parts: note this assumes the values are not actually */
579     /* added in yy until the next line, */
580     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
581   } else {
582     /* do local part */
583     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
584     /* send it on its way */
585     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
586     /* values actually were received in the Begin() but we need to call this nop */
587     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
588   }
589   PetscFunctionReturn(0);
590 }
591 
592 EXTERN_C_BEGIN
593 #undef __FUNCT__
594 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
595 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
596 {
597   MPI_Comm       comm;
598   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
599   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
600   IS             Me,Notme;
601   PetscErrorCode ierr;
602   PetscInt       M,N,first,last,*notme,i;
603   PetscMPIInt    size;
604 
605   PetscFunctionBegin;
606 
607   /* Easy test: symmetric diagonal block */
608   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
609   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
610   if (!*f) PetscFunctionReturn(0);
611   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
612   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
613   if (size == 1) PetscFunctionReturn(0);
614 
615   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
616   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
617   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
618   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
619   for (i=0; i<first; i++) notme[i] = i;
620   for (i=last; i<M; i++) notme[i-last+first] = i;
621   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
622   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
623   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
624   Aoff = Aoffs[0];
625   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
626   Boff = Boffs[0];
627   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
628   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
629   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
630   ierr = ISDestroy(Me);CHKERRQ(ierr);
631   ierr = ISDestroy(Notme);CHKERRQ(ierr);
632 
633   PetscFunctionReturn(0);
634 }
635 EXTERN_C_END
636 
637 #undef __FUNCT__
638 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
639 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
640 {
641   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
642   PetscErrorCode ierr;
643 
644   PetscFunctionBegin;
645   /* do nondiagonal part */
646   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
647   /* send it on its way */
648   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
649   /* do local part */
650   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
651   /* receive remote parts */
652   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 /*
657   This only works correctly for square matrices where the subblock A->A is the
658    diagonal block
659 */
660 #undef __FUNCT__
661 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
662 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
663 {
664   PetscErrorCode ierr;
665   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
666 
667   PetscFunctionBegin;
668   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
669   if (a->rstart != a->cstart || a->rend != a->cend) {
670     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
671   }
672   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatScale_MPIAIJ"
678 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
679 {
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681   PetscErrorCode ierr;
682 
683   PetscFunctionBegin;
684   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
685   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
686   PetscFunctionReturn(0);
687 }
688 
689 #undef __FUNCT__
690 #define __FUNCT__ "MatDestroy_MPIAIJ"
691 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
692 {
693   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
694   PetscErrorCode ierr;
695 
696   PetscFunctionBegin;
697 #if defined(PETSC_USE_LOG)
698   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
699 #endif
700   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
701   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
702   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
703   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
704 #if defined (PETSC_USE_CTABLE)
705   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
706 #else
707   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
708 #endif
709   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
710   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
711   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
712   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
713   ierr = PetscFree(aij);CHKERRQ(ierr);
714 
715   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
716   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
717   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
718   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
719   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
720   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
721   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPIAIJ_Binary"
727 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
728 {
729   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
730   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
731   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
732   PetscErrorCode    ierr;
733   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
734   int               fd;
735   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
736   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
737   PetscScalar       *column_values;
738 
739   PetscFunctionBegin;
740   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
741   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
742   nz   = A->nz + B->nz;
743   if (!rank) {
744     header[0] = MAT_FILE_COOKIE;
745     header[1] = mat->M;
746     header[2] = mat->N;
747     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
748     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
749     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
750     /* get largest number of rows any processor has */
751     rlen = mat->m;
752     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
753     for (i=1; i<size; i++) {
754       rlen = PetscMax(rlen,range[i+1] - range[i]);
755     }
756   } else {
757     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
758     rlen = mat->m;
759   }
760 
761   /* load up the local row counts */
762   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
763   for (i=0; i<mat->m; i++) {
764     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
765   }
766 
767   /* store the row lengths to the file */
768   if (!rank) {
769     MPI_Status status;
770     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
771     for (i=1; i<size; i++) {
772       rlen = range[i+1] - range[i];
773       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
774       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
775     }
776   } else {
777     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
778   }
779   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
780 
781   /* load up the local column indices */
782   nzmax = nz; /* )th processor needs space a largest processor needs */
783   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
784   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
785   cnt  = 0;
786   for (i=0; i<mat->m; i++) {
787     for (j=B->i[i]; j<B->i[i+1]; j++) {
788       if ( (col = garray[B->j[j]]) > cstart) break;
789       column_indices[cnt++] = col;
790     }
791     for (k=A->i[i]; k<A->i[i+1]; k++) {
792       column_indices[cnt++] = A->j[k] + cstart;
793     }
794     for (; j<B->i[i+1]; j++) {
795       column_indices[cnt++] = garray[B->j[j]];
796     }
797   }
798   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
799 
800   /* store the column indices to the file */
801   if (!rank) {
802     MPI_Status status;
803     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
804     for (i=1; i<size; i++) {
805       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
806       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
807       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
808       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
809     }
810   } else {
811     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
812     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
813   }
814   ierr = PetscFree(column_indices);CHKERRQ(ierr);
815 
816   /* load up the local column values */
817   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
818   cnt  = 0;
819   for (i=0; i<mat->m; i++) {
820     for (j=B->i[i]; j<B->i[i+1]; j++) {
821       if ( garray[B->j[j]] > cstart) break;
822       column_values[cnt++] = B->a[j];
823     }
824     for (k=A->i[i]; k<A->i[i+1]; k++) {
825       column_values[cnt++] = A->a[k];
826     }
827     for (; j<B->i[i+1]; j++) {
828       column_values[cnt++] = B->a[j];
829     }
830   }
831   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
832 
833   /* store the column values to the file */
834   if (!rank) {
835     MPI_Status status;
836     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
837     for (i=1; i<size; i++) {
838       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
839       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
840       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
841       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
842     }
843   } else {
844     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
845     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
846   }
847   ierr = PetscFree(column_values);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
853 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
854 {
855   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
856   PetscErrorCode    ierr;
857   PetscMPIInt       rank = aij->rank,size = aij->size;
858   PetscTruth        isdraw,iascii,isbinary;
859   PetscViewer       sviewer;
860   PetscViewerFormat format;
861 
862   PetscFunctionBegin;
863   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
864   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
865   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
866   if (iascii) {
867     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
868     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
869       MatInfo    info;
870       PetscTruth inodes;
871 
872       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
873       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
874       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
875       if (!inodes) {
876         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
877 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
878       } else {
879         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
880 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
881       }
882       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
883       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
884       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
885       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
886       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
887       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
888       PetscFunctionReturn(0);
889     } else if (format == PETSC_VIEWER_ASCII_INFO) {
890       PetscInt   inodecount,inodelimit,*inodes;
891       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
892       if (inodes) {
893         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
894       } else {
895         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
896       }
897       PetscFunctionReturn(0);
898     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
899       PetscFunctionReturn(0);
900     }
901   } else if (isbinary) {
902     if (size == 1) {
903       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
904       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
905     } else {
906       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
907     }
908     PetscFunctionReturn(0);
909   } else if (isdraw) {
910     PetscDraw  draw;
911     PetscTruth isnull;
912     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
913     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
914   }
915 
916   if (size == 1) {
917     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919   } else {
920     /* assemble the entire matrix onto first processor. */
921     Mat         A;
922     Mat_SeqAIJ  *Aloc;
923     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
924     PetscScalar *a;
925 
926     if (!rank) {
927       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
928     } else {
929       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
930     }
931     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
932     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
933     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
934     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
935 
936     /* copy over the A part */
937     Aloc = (Mat_SeqAIJ*)aij->A->data;
938     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
939     row = aij->rstart;
940     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
941     for (i=0; i<m; i++) {
942       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
943       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
944     }
945     aj = Aloc->j;
946     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
947 
948     /* copy over the B part */
949     Aloc = (Mat_SeqAIJ*)aij->B->data;
950     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
951     row  = aij->rstart;
952     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
953     ct   = cols;
954     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
955     for (i=0; i<m; i++) {
956       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
957       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
958     }
959     ierr = PetscFree(ct);CHKERRQ(ierr);
960     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
961     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
962     /*
963        Everyone has to call to draw the matrix since the graphics waits are
964        synchronized across all processors that share the PetscDraw object
965     */
966     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
967     if (!rank) {
968       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
969       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
970     }
971     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
972     ierr = MatDestroy(A);CHKERRQ(ierr);
973   }
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatView_MPIAIJ"
979 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
980 {
981   PetscErrorCode ierr;
982   PetscTruth     iascii,isdraw,issocket,isbinary;
983 
984   PetscFunctionBegin;
985   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
986   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
987   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
988   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
989   if (iascii || isdraw || isbinary || issocket) {
990     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
991   } else {
992     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 
998 
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatRelax_MPIAIJ"
1001 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1002 {
1003   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1004   PetscErrorCode ierr;
1005   Vec            bb1;
1006   PetscScalar    mone=-1.0;
1007 
1008   PetscFunctionBegin;
1009   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1010 
1011   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1012 
1013   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1014     if (flag & SOR_ZERO_INITIAL_GUESS) {
1015       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1016       its--;
1017     }
1018 
1019     while (its--) {
1020       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1021       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1022 
1023       /* update rhs: bb1 = bb - B*x */
1024       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1025       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1026 
1027       /* local sweep */
1028       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1029       CHKERRQ(ierr);
1030     }
1031   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1032     if (flag & SOR_ZERO_INITIAL_GUESS) {
1033       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1034       its--;
1035     }
1036     while (its--) {
1037       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1038       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1039 
1040       /* update rhs: bb1 = bb - B*x */
1041       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1042       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1043 
1044       /* local sweep */
1045       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1046       CHKERRQ(ierr);
1047     }
1048   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1049     if (flag & SOR_ZERO_INITIAL_GUESS) {
1050       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1051       its--;
1052     }
1053     while (its--) {
1054       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1056 
1057       /* update rhs: bb1 = bb - B*x */
1058       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1059       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1060 
1061       /* local sweep */
1062       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1063       CHKERRQ(ierr);
1064     }
1065   } else {
1066     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1067   }
1068 
1069   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1075 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1076 {
1077   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1078   Mat            A = mat->A,B = mat->B;
1079   PetscErrorCode ierr;
1080   PetscReal      isend[5],irecv[5];
1081 
1082   PetscFunctionBegin;
1083   info->block_size     = 1.0;
1084   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1085   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1086   isend[3] = info->memory;  isend[4] = info->mallocs;
1087   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1088   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1089   isend[3] += info->memory;  isend[4] += info->mallocs;
1090   if (flag == MAT_LOCAL) {
1091     info->nz_used      = isend[0];
1092     info->nz_allocated = isend[1];
1093     info->nz_unneeded  = isend[2];
1094     info->memory       = isend[3];
1095     info->mallocs      = isend[4];
1096   } else if (flag == MAT_GLOBAL_MAX) {
1097     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1098     info->nz_used      = irecv[0];
1099     info->nz_allocated = irecv[1];
1100     info->nz_unneeded  = irecv[2];
1101     info->memory       = irecv[3];
1102     info->mallocs      = irecv[4];
1103   } else if (flag == MAT_GLOBAL_SUM) {
1104     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1105     info->nz_used      = irecv[0];
1106     info->nz_allocated = irecv[1];
1107     info->nz_unneeded  = irecv[2];
1108     info->memory       = irecv[3];
1109     info->mallocs      = irecv[4];
1110   }
1111   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1112   info->fill_ratio_needed = 0;
1113   info->factor_mallocs    = 0;
1114   info->rows_global       = (double)matin->M;
1115   info->columns_global    = (double)matin->N;
1116   info->rows_local        = (double)matin->m;
1117   info->columns_local     = (double)matin->N;
1118 
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatSetOption_MPIAIJ"
1124 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1125 {
1126   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   switch (op) {
1131   case MAT_NO_NEW_NONZERO_LOCATIONS:
1132   case MAT_YES_NEW_NONZERO_LOCATIONS:
1133   case MAT_COLUMNS_UNSORTED:
1134   case MAT_COLUMNS_SORTED:
1135   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1136   case MAT_KEEP_ZEROED_ROWS:
1137   case MAT_NEW_NONZERO_LOCATION_ERR:
1138   case MAT_USE_INODES:
1139   case MAT_DO_NOT_USE_INODES:
1140   case MAT_IGNORE_ZERO_ENTRIES:
1141     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1142     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1143     break;
1144   case MAT_ROW_ORIENTED:
1145     a->roworiented = PETSC_TRUE;
1146     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1147     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1148     break;
1149   case MAT_ROWS_SORTED:
1150   case MAT_ROWS_UNSORTED:
1151   case MAT_YES_NEW_DIAGONALS:
1152     ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr);
1153     break;
1154   case MAT_COLUMN_ORIENTED:
1155     a->roworiented = PETSC_FALSE;
1156     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1157     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1158     break;
1159   case MAT_IGNORE_OFF_PROC_ENTRIES:
1160     a->donotstash = PETSC_TRUE;
1161     break;
1162   case MAT_NO_NEW_DIAGONALS:
1163     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1164   case MAT_SYMMETRIC:
1165   case MAT_STRUCTURALLY_SYMMETRIC:
1166   case MAT_HERMITIAN:
1167   case MAT_SYMMETRY_ETERNAL:
1168     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1169     break;
1170   case MAT_NOT_SYMMETRIC:
1171   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1172   case MAT_NOT_HERMITIAN:
1173   case MAT_NOT_SYMMETRY_ETERNAL:
1174     break;
1175   default:
1176     SETERRQ(PETSC_ERR_SUP,"unknown option");
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatGetRow_MPIAIJ"
1183 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1184 {
1185   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1186   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1187   PetscErrorCode ierr;
1188   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1189   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1190   PetscInt       *cmap,*idx_p;
1191 
1192   PetscFunctionBegin;
1193   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1194   mat->getrowactive = PETSC_TRUE;
1195 
1196   if (!mat->rowvalues && (idx || v)) {
1197     /*
1198         allocate enough space to hold information from the longest row.
1199     */
1200     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1201     PetscInt     max = 1,tmp;
1202     for (i=0; i<matin->m; i++) {
1203       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1204       if (max < tmp) { max = tmp; }
1205     }
1206     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1207     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1208   }
1209 
1210   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1211   lrow = row - rstart;
1212 
1213   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1214   if (!v)   {pvA = 0; pvB = 0;}
1215   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1216   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1217   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1218   nztot = nzA + nzB;
1219 
1220   cmap  = mat->garray;
1221   if (v  || idx) {
1222     if (nztot) {
1223       /* Sort by increasing column numbers, assuming A and B already sorted */
1224       PetscInt imark = -1;
1225       if (v) {
1226         *v = v_p = mat->rowvalues;
1227         for (i=0; i<nzB; i++) {
1228           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1229           else break;
1230         }
1231         imark = i;
1232         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1233         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1234       }
1235       if (idx) {
1236         *idx = idx_p = mat->rowindices;
1237         if (imark > -1) {
1238           for (i=0; i<imark; i++) {
1239             idx_p[i] = cmap[cworkB[i]];
1240           }
1241         } else {
1242           for (i=0; i<nzB; i++) {
1243             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1244             else break;
1245           }
1246           imark = i;
1247         }
1248         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1249         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1250       }
1251     } else {
1252       if (idx) *idx = 0;
1253       if (v)   *v   = 0;
1254     }
1255   }
1256   *nz = nztot;
1257   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1258   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1264 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1265 {
1266   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1267 
1268   PetscFunctionBegin;
1269   if (!aij->getrowactive) {
1270     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1271   }
1272   aij->getrowactive = PETSC_FALSE;
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatNorm_MPIAIJ"
1278 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1279 {
1280   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1281   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1282   PetscErrorCode ierr;
1283   PetscInt       i,j,cstart = aij->cstart;
1284   PetscReal      sum = 0.0;
1285   PetscScalar    *v;
1286 
1287   PetscFunctionBegin;
1288   if (aij->size == 1) {
1289     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1290   } else {
1291     if (type == NORM_FROBENIUS) {
1292       v = amat->a;
1293       for (i=0; i<amat->nz; i++) {
1294 #if defined(PETSC_USE_COMPLEX)
1295         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1296 #else
1297         sum += (*v)*(*v); v++;
1298 #endif
1299       }
1300       v = bmat->a;
1301       for (i=0; i<bmat->nz; i++) {
1302 #if defined(PETSC_USE_COMPLEX)
1303         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1304 #else
1305         sum += (*v)*(*v); v++;
1306 #endif
1307       }
1308       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1309       *norm = sqrt(*norm);
1310     } else if (type == NORM_1) { /* max column norm */
1311       PetscReal *tmp,*tmp2;
1312       PetscInt    *jj,*garray = aij->garray;
1313       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1314       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1315       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1316       *norm = 0.0;
1317       v = amat->a; jj = amat->j;
1318       for (j=0; j<amat->nz; j++) {
1319         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1320       }
1321       v = bmat->a; jj = bmat->j;
1322       for (j=0; j<bmat->nz; j++) {
1323         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1324       }
1325       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1326       for (j=0; j<mat->N; j++) {
1327         if (tmp2[j] > *norm) *norm = tmp2[j];
1328       }
1329       ierr = PetscFree(tmp);CHKERRQ(ierr);
1330       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1331     } else if (type == NORM_INFINITY) { /* max row norm */
1332       PetscReal ntemp = 0.0;
1333       for (j=0; j<aij->A->m; j++) {
1334         v = amat->a + amat->i[j];
1335         sum = 0.0;
1336         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1337           sum += PetscAbsScalar(*v); v++;
1338         }
1339         v = bmat->a + bmat->i[j];
1340         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1341           sum += PetscAbsScalar(*v); v++;
1342         }
1343         if (sum > ntemp) ntemp = sum;
1344       }
1345       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1346     } else {
1347       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1348     }
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatTranspose_MPIAIJ"
1355 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1356 {
1357   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1358   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1359   PetscErrorCode ierr;
1360   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1361   Mat            B;
1362   PetscScalar    *array;
1363 
1364   PetscFunctionBegin;
1365   if (!matout && M != N) {
1366     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1367   }
1368 
1369   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1370   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1371   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1372 
1373   /* copy over the A part */
1374   Aloc = (Mat_SeqAIJ*)a->A->data;
1375   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1376   row = a->rstart;
1377   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1378   for (i=0; i<m; i++) {
1379     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1380     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1381   }
1382   aj = Aloc->j;
1383   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1384 
1385   /* copy over the B part */
1386   Aloc = (Mat_SeqAIJ*)a->B->data;
1387   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1388   row  = a->rstart;
1389   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1390   ct   = cols;
1391   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1392   for (i=0; i<m; i++) {
1393     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1394     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1395   }
1396   ierr = PetscFree(ct);CHKERRQ(ierr);
1397   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1398   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1399   if (matout) {
1400     *matout = B;
1401   } else {
1402     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1409 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1410 {
1411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1412   Mat            a = aij->A,b = aij->B;
1413   PetscErrorCode ierr;
1414   PetscInt       s1,s2,s3;
1415 
1416   PetscFunctionBegin;
1417   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1418   if (rr) {
1419     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1420     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1421     /* Overlap communication with computation. */
1422     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1423   }
1424   if (ll) {
1425     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1426     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1427     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1428   }
1429   /* scale  the diagonal block */
1430   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1431 
1432   if (rr) {
1433     /* Do a scatter end and then right scale the off-diagonal block */
1434     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1435     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1436   }
1437 
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1444 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1445 {
1446   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1447   PetscErrorCode ierr;
1448 
1449   PetscFunctionBegin;
1450   if (!a->rank) {
1451     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1452   }
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1458 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1459 {
1460   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1461   PetscErrorCode ierr;
1462 
1463   PetscFunctionBegin;
1464   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1465   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1466   PetscFunctionReturn(0);
1467 }
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1470 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1471 {
1472   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1473   PetscErrorCode ierr;
1474 
1475   PetscFunctionBegin;
1476   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatEqual_MPIAIJ"
1482 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1483 {
1484   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1485   Mat            a,b,c,d;
1486   PetscTruth     flg;
1487   PetscErrorCode ierr;
1488 
1489   PetscFunctionBegin;
1490   a = matA->A; b = matA->B;
1491   c = matB->A; d = matB->B;
1492 
1493   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1494   if (flg) {
1495     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1496   }
1497   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatCopy_MPIAIJ"
1503 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1504 {
1505   PetscErrorCode ierr;
1506   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1507   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1508 
1509   PetscFunctionBegin;
1510   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1511   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1512     /* because of the column compression in the off-processor part of the matrix a->B,
1513        the number of columns in a->B and b->B may be different, hence we cannot call
1514        the MatCopy() directly on the two parts. If need be, we can provide a more
1515        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1516        then copying the submatrices */
1517     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1518   } else {
1519     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1520     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1521   }
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1527 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1528 {
1529   PetscErrorCode ierr;
1530 
1531   PetscFunctionBegin;
1532   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #include "petscblaslapack.h"
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatAXPY_MPIAIJ"
1539 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1540 {
1541   PetscErrorCode ierr;
1542   PetscInt       i;
1543   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1544   PetscBLASInt   bnz,one=1;
1545   Mat_SeqAIJ     *x,*y;
1546 
1547   PetscFunctionBegin;
1548   if (str == SAME_NONZERO_PATTERN) {
1549     x = (Mat_SeqAIJ *)xx->A->data;
1550     y = (Mat_SeqAIJ *)yy->A->data;
1551     bnz = (PetscBLASInt)x->nz;
1552     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1553     x = (Mat_SeqAIJ *)xx->B->data;
1554     y = (Mat_SeqAIJ *)yy->B->data;
1555     bnz = (PetscBLASInt)x->nz;
1556     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1557   } else if (str == SUBSET_NONZERO_PATTERN) {
1558     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1559 
1560     x = (Mat_SeqAIJ *)xx->B->data;
1561     y = (Mat_SeqAIJ *)yy->B->data;
1562     if (y->xtoy && y->XtoY != xx->B) {
1563       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1564       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1565     }
1566     if (!y->xtoy) { /* get xtoy */
1567       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1568       y->XtoY = xx->B;
1569     }
1570     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1571   } else {
1572     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1573   }
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 /* -------------------------------------------------------------------*/
1578 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1579        MatGetRow_MPIAIJ,
1580        MatRestoreRow_MPIAIJ,
1581        MatMult_MPIAIJ,
1582 /* 4*/ MatMultAdd_MPIAIJ,
1583        MatMultTranspose_MPIAIJ,
1584        MatMultTransposeAdd_MPIAIJ,
1585        0,
1586        0,
1587        0,
1588 /*10*/ 0,
1589        0,
1590        0,
1591        MatRelax_MPIAIJ,
1592        MatTranspose_MPIAIJ,
1593 /*15*/ MatGetInfo_MPIAIJ,
1594        MatEqual_MPIAIJ,
1595        MatGetDiagonal_MPIAIJ,
1596        MatDiagonalScale_MPIAIJ,
1597        MatNorm_MPIAIJ,
1598 /*20*/ MatAssemblyBegin_MPIAIJ,
1599        MatAssemblyEnd_MPIAIJ,
1600        0,
1601        MatSetOption_MPIAIJ,
1602        MatZeroEntries_MPIAIJ,
1603 /*25*/ MatZeroRows_MPIAIJ,
1604        0,
1605        0,
1606        0,
1607        0,
1608 /*30*/ MatSetUpPreallocation_MPIAIJ,
1609        0,
1610        0,
1611        0,
1612        0,
1613 /*35*/ MatDuplicate_MPIAIJ,
1614        0,
1615        0,
1616        0,
1617        0,
1618 /*40*/ MatAXPY_MPIAIJ,
1619        MatGetSubMatrices_MPIAIJ,
1620        MatIncreaseOverlap_MPIAIJ,
1621        MatGetValues_MPIAIJ,
1622        MatCopy_MPIAIJ,
1623 /*45*/ MatPrintHelp_MPIAIJ,
1624        MatScale_MPIAIJ,
1625        0,
1626        0,
1627        0,
1628 /*50*/ MatSetBlockSize_MPIAIJ,
1629        0,
1630        0,
1631        0,
1632        0,
1633 /*55*/ MatFDColoringCreate_MPIAIJ,
1634        0,
1635        MatSetUnfactored_MPIAIJ,
1636        0,
1637        0,
1638 /*60*/ MatGetSubMatrix_MPIAIJ,
1639        MatDestroy_MPIAIJ,
1640        MatView_MPIAIJ,
1641        MatGetPetscMaps_Petsc,
1642        0,
1643 /*65*/ 0,
1644        0,
1645        0,
1646        0,
1647        0,
1648 /*70*/ 0,
1649        0,
1650        MatSetColoring_MPIAIJ,
1651 #if defined(PETSC_HAVE_ADIC)
1652        MatSetValuesAdic_MPIAIJ,
1653 #else
1654        0,
1655 #endif
1656        MatSetValuesAdifor_MPIAIJ,
1657 /*75*/ 0,
1658        0,
1659        0,
1660        0,
1661        0,
1662 /*80*/ 0,
1663        0,
1664        0,
1665        0,
1666 /*84*/ MatLoad_MPIAIJ,
1667        0,
1668        0,
1669        0,
1670        0,
1671        0,
1672 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1673        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1674        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1675        MatPtAP_Basic,
1676        MatPtAPSymbolic_MPIAIJ,
1677 /*95*/ MatPtAPNumeric_MPIAIJ,
1678        0,
1679        0,
1680        0,
1681        0,
1682 /*100*/0,
1683        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1684        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1685 };
1686 
1687 /* ----------------------------------------------------------------------------------------*/
1688 
1689 EXTERN_C_BEGIN
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1692 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1693 {
1694   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1695   PetscErrorCode ierr;
1696 
1697   PetscFunctionBegin;
1698   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1699   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1700   PetscFunctionReturn(0);
1701 }
1702 EXTERN_C_END
1703 
1704 EXTERN_C_BEGIN
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1707 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1708 {
1709   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1710   PetscErrorCode ierr;
1711 
1712   PetscFunctionBegin;
1713   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1714   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1715   PetscFunctionReturn(0);
1716 }
1717 EXTERN_C_END
1718 
1719 #include "petscpc.h"
1720 EXTERN_C_BEGIN
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1723 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1724 {
1725   Mat_MPIAIJ     *b;
1726   PetscErrorCode ierr;
1727   PetscInt       i;
1728 
1729   PetscFunctionBegin;
1730   B->preallocated = PETSC_TRUE;
1731   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1732   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1733   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1734   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1735   if (d_nnz) {
1736     for (i=0; i<B->m; i++) {
1737       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1738     }
1739   }
1740   if (o_nnz) {
1741     for (i=0; i<B->m; i++) {
1742       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1743     }
1744   }
1745   b = (Mat_MPIAIJ*)B->data;
1746   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1747   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1748 
1749   PetscFunctionReturn(0);
1750 }
1751 EXTERN_C_END
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1755 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1756 {
1757   Mat            mat;
1758   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1759   PetscErrorCode ierr;
1760 
1761   PetscFunctionBegin;
1762   *newmat       = 0;
1763   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1764   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1765   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1766   a    = (Mat_MPIAIJ*)mat->data;
1767 
1768   mat->factor       = matin->factor;
1769   mat->bs           = matin->bs;
1770   mat->assembled    = PETSC_TRUE;
1771   mat->insertmode   = NOT_SET_VALUES;
1772   mat->preallocated = PETSC_TRUE;
1773 
1774   a->rstart       = oldmat->rstart;
1775   a->rend         = oldmat->rend;
1776   a->cstart       = oldmat->cstart;
1777   a->cend         = oldmat->cend;
1778   a->size         = oldmat->size;
1779   a->rank         = oldmat->rank;
1780   a->donotstash   = oldmat->donotstash;
1781   a->roworiented  = oldmat->roworiented;
1782   a->rowindices   = 0;
1783   a->rowvalues    = 0;
1784   a->getrowactive = PETSC_FALSE;
1785 
1786   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1787   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1788   if (oldmat->colmap) {
1789 #if defined (PETSC_USE_CTABLE)
1790     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1791 #else
1792     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1793     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1794     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1795 #endif
1796   } else a->colmap = 0;
1797   if (oldmat->garray) {
1798     PetscInt len;
1799     len  = oldmat->B->n;
1800     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1801     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1802     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1803   } else a->garray = 0;
1804 
1805   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1806   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1807   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1808   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1809   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1810   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1811   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1812   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1813   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1814   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1815   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1816   *newmat = mat;
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 #include "petscsys.h"
1821 
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatLoad_MPIAIJ"
1824 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1825 {
1826   Mat            A;
1827   PetscScalar    *vals,*svals;
1828   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1829   MPI_Status     status;
1830   PetscErrorCode ierr;
1831   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1832   PetscInt       i,nz,j,rstart,rend,mmax;
1833   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1834   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1835   PetscInt       cend,cstart,n,*rowners;
1836   int            fd;
1837 
1838   PetscFunctionBegin;
1839   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1840   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1841   if (!rank) {
1842     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1843     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1844     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1845   }
1846 
1847   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1848   M = header[1]; N = header[2];
1849   /* determine ownership of all rows */
1850   m    = M/size + ((M % size) > rank);
1851   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1852   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1853 
1854   /* First process needs enough room for process with most rows */
1855   if (!rank) {
1856     mmax       = rowners[1];
1857     for (i=2; i<size; i++) {
1858       mmax = PetscMax(mmax,rowners[i]);
1859     }
1860   } else mmax = m;
1861 
1862   rowners[0] = 0;
1863   for (i=2; i<=size; i++) {
1864     mmax       = PetscMax(mmax,rowners[i]);
1865     rowners[i] += rowners[i-1];
1866   }
1867   rstart = rowners[rank];
1868   rend   = rowners[rank+1];
1869 
1870   /* distribute row lengths to all processors */
1871   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1872   if (!rank) {
1873     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1874     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1875     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1876     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1877     for (j=0; j<m; j++) {
1878       procsnz[0] += ourlens[j];
1879     }
1880     for (i=1; i<size; i++) {
1881       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1882       /* calculate the number of nonzeros on each processor */
1883       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1884         procsnz[i] += rowlengths[j];
1885       }
1886       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1887     }
1888     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1889   } else {
1890     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1891   }
1892 
1893   if (!rank) {
1894     /* determine max buffer needed and allocate it */
1895     maxnz = 0;
1896     for (i=0; i<size; i++) {
1897       maxnz = PetscMax(maxnz,procsnz[i]);
1898     }
1899     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1900 
1901     /* read in my part of the matrix column indices  */
1902     nz   = procsnz[0];
1903     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1904     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1905 
1906     /* read in every one elses and ship off */
1907     for (i=1; i<size; i++) {
1908       nz   = procsnz[i];
1909       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1910       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1911     }
1912     ierr = PetscFree(cols);CHKERRQ(ierr);
1913   } else {
1914     /* determine buffer space needed for message */
1915     nz = 0;
1916     for (i=0; i<m; i++) {
1917       nz += ourlens[i];
1918     }
1919     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1920 
1921     /* receive message of column indices*/
1922     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1923     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1924     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1925   }
1926 
1927   /* determine column ownership if matrix is not square */
1928   if (N != M) {
1929     n      = N/size + ((N % size) > rank);
1930     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1931     cstart = cend - n;
1932   } else {
1933     cstart = rstart;
1934     cend   = rend;
1935     n      = cend - cstart;
1936   }
1937 
1938   /* loop over local rows, determining number of off diagonal entries */
1939   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1940   jj = 0;
1941   for (i=0; i<m; i++) {
1942     for (j=0; j<ourlens[i]; j++) {
1943       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1944       jj++;
1945     }
1946   }
1947 
1948   /* create our matrix */
1949   for (i=0; i<m; i++) {
1950     ourlens[i] -= offlens[i];
1951   }
1952   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
1953   ierr = MatSetType(A,type);CHKERRQ(ierr);
1954   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
1955 
1956   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1957   for (i=0; i<m; i++) {
1958     ourlens[i] += offlens[i];
1959   }
1960 
1961   if (!rank) {
1962     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1963 
1964     /* read in my part of the matrix numerical values  */
1965     nz   = procsnz[0];
1966     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1967 
1968     /* insert into matrix */
1969     jj      = rstart;
1970     smycols = mycols;
1971     svals   = vals;
1972     for (i=0; i<m; i++) {
1973       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1974       smycols += ourlens[i];
1975       svals   += ourlens[i];
1976       jj++;
1977     }
1978 
1979     /* read in other processors and ship out */
1980     for (i=1; i<size; i++) {
1981       nz   = procsnz[i];
1982       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1983       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
1984     }
1985     ierr = PetscFree(procsnz);CHKERRQ(ierr);
1986   } else {
1987     /* receive numeric values */
1988     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1989 
1990     /* receive message of values*/
1991     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
1992     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
1993     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1994 
1995     /* insert into matrix */
1996     jj      = rstart;
1997     smycols = mycols;
1998     svals   = vals;
1999     for (i=0; i<m; i++) {
2000       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2001       smycols += ourlens[i];
2002       svals   += ourlens[i];
2003       jj++;
2004     }
2005   }
2006   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2007   ierr = PetscFree(vals);CHKERRQ(ierr);
2008   ierr = PetscFree(mycols);CHKERRQ(ierr);
2009   ierr = PetscFree(rowners);CHKERRQ(ierr);
2010 
2011   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2012   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2013   *newmat = A;
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 #undef __FUNCT__
2018 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2019 /*
2020     Not great since it makes two copies of the submatrix, first an SeqAIJ
2021   in local and then by concatenating the local matrices the end result.
2022   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2023 */
2024 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2025 {
2026   PetscErrorCode ierr;
2027   PetscMPIInt    rank,size;
2028   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2029   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2030   Mat            *local,M,Mreuse;
2031   PetscScalar    *vwork,*aa;
2032   MPI_Comm       comm = mat->comm;
2033   Mat_SeqAIJ     *aij;
2034 
2035 
2036   PetscFunctionBegin;
2037   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2038   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2039 
2040   if (call ==  MAT_REUSE_MATRIX) {
2041     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2042     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2043     local = &Mreuse;
2044     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2045   } else {
2046     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2047     Mreuse = *local;
2048     ierr   = PetscFree(local);CHKERRQ(ierr);
2049   }
2050 
2051   /*
2052       m - number of local rows
2053       n - number of columns (same on all processors)
2054       rstart - first row in new global matrix generated
2055   */
2056   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2057   if (call == MAT_INITIAL_MATRIX) {
2058     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2059     ii  = aij->i;
2060     jj  = aij->j;
2061 
2062     /*
2063         Determine the number of non-zeros in the diagonal and off-diagonal
2064         portions of the matrix in order to do correct preallocation
2065     */
2066 
2067     /* first get start and end of "diagonal" columns */
2068     if (csize == PETSC_DECIDE) {
2069       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2070       if (mglobal == n) { /* square matrix */
2071 	nlocal = m;
2072       } else {
2073         nlocal = n/size + ((n % size) > rank);
2074       }
2075     } else {
2076       nlocal = csize;
2077     }
2078     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2079     rstart = rend - nlocal;
2080     if (rank == size - 1 && rend != n) {
2081       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2082     }
2083 
2084     /* next, compute all the lengths */
2085     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2086     olens = dlens + m;
2087     for (i=0; i<m; i++) {
2088       jend = ii[i+1] - ii[i];
2089       olen = 0;
2090       dlen = 0;
2091       for (j=0; j<jend; j++) {
2092         if (*jj < rstart || *jj >= rend) olen++;
2093         else dlen++;
2094         jj++;
2095       }
2096       olens[i] = olen;
2097       dlens[i] = dlen;
2098     }
2099     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2100     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2101     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2102     ierr = PetscFree(dlens);CHKERRQ(ierr);
2103   } else {
2104     PetscInt ml,nl;
2105 
2106     M = *newmat;
2107     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2108     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2109     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2110     /*
2111          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2112        rather than the slower MatSetValues().
2113     */
2114     M->was_assembled = PETSC_TRUE;
2115     M->assembled     = PETSC_FALSE;
2116   }
2117   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2118   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2119   ii  = aij->i;
2120   jj  = aij->j;
2121   aa  = aij->a;
2122   for (i=0; i<m; i++) {
2123     row   = rstart + i;
2124     nz    = ii[i+1] - ii[i];
2125     cwork = jj;     jj += nz;
2126     vwork = aa;     aa += nz;
2127     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2128   }
2129 
2130   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2131   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2132   *newmat = M;
2133 
2134   /* save submatrix used in processor for next request */
2135   if (call ==  MAT_INITIAL_MATRIX) {
2136     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2137     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2138   }
2139 
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 EXTERN_C_BEGIN
2144 #undef __FUNCT__
2145 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2146 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2147 {
2148   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2149   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2150   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2151   const PetscInt *JJ;
2152   PetscScalar    *values;
2153   PetscErrorCode ierr;
2154 
2155   PetscFunctionBegin;
2156 #if defined(PETSC_OPT_g)
2157   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2158 #endif
2159   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2160   o_nnz = d_nnz + m;
2161 
2162   for (i=0; i<m; i++) {
2163     nnz     = I[i+1]- I[i];
2164     JJ      = J + I[i];
2165     nnz_max = PetscMax(nnz_max,nnz);
2166 #if defined(PETSC_OPT_g)
2167     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2168 #endif
2169     for (j=0; j<nnz; j++) {
2170       if (*JJ >= cstart) break;
2171       JJ++;
2172     }
2173     d = 0;
2174     for (; j<nnz; j++) {
2175       if (*JJ++ >= cend) break;
2176       d++;
2177     }
2178     d_nnz[i] = d;
2179     o_nnz[i] = nnz - d;
2180   }
2181   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2182   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2183 
2184   if (v) values = (PetscScalar*)v;
2185   else {
2186     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2187     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2188   }
2189 
2190   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2191   for (i=0; i<m; i++) {
2192     ii   = i + rstart;
2193     nnz  = I[i+1]- I[i];
2194     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2195   }
2196   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2197   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2198   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2199 
2200   if (!v) {
2201     ierr = PetscFree(values);CHKERRQ(ierr);
2202   }
2203   PetscFunctionReturn(0);
2204 }
2205 EXTERN_C_END
2206 
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2209 /*@C
2210    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2211    (the default parallel PETSc format).
2212 
2213    Collective on MPI_Comm
2214 
2215    Input Parameters:
2216 +  A - the matrix
2217 .  i - the indices into j for the start of each local row (starts with zero)
2218 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2219 -  v - optional values in the matrix
2220 
2221    Level: developer
2222 
2223 .keywords: matrix, aij, compressed row, sparse, parallel
2224 
2225 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2226 @*/
2227 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2228 {
2229   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2230 
2231   PetscFunctionBegin;
2232   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2233   if (f) {
2234     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2235   }
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 #undef __FUNCT__
2240 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2241 /*@C
2242    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2243    (the default parallel PETSc format).  For good matrix assembly performance
2244    the user should preallocate the matrix storage by setting the parameters
2245    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2246    performance can be increased by more than a factor of 50.
2247 
2248    Collective on MPI_Comm
2249 
2250    Input Parameters:
2251 +  A - the matrix
2252 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2253            (same value is used for all local rows)
2254 .  d_nnz - array containing the number of nonzeros in the various rows of the
2255            DIAGONAL portion of the local submatrix (possibly different for each row)
2256            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2257            The size of this array is equal to the number of local rows, i.e 'm'.
2258            You must leave room for the diagonal entry even if it is zero.
2259 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2260            submatrix (same value is used for all local rows).
2261 -  o_nnz - array containing the number of nonzeros in the various rows of the
2262            OFF-DIAGONAL portion of the local submatrix (possibly different for
2263            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2264            structure. The size of this array is equal to the number
2265            of local rows, i.e 'm'.
2266 
2267    If the *_nnz parameter is given then the *_nz parameter is ignored
2268 
2269    The AIJ format (also called the Yale sparse matrix format or
2270    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2271    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2272 
2273    The parallel matrix is partitioned such that the first m0 rows belong to
2274    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2275    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2276 
2277    The DIAGONAL portion of the local submatrix of a processor can be defined
2278    as the submatrix which is obtained by extraction the part corresponding
2279    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2280    first row that belongs to the processor, and r2 is the last row belonging
2281    to the this processor. This is a square mxm matrix. The remaining portion
2282    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2283 
2284    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2285 
2286    Example usage:
2287 
2288    Consider the following 8x8 matrix with 34 non-zero values, that is
2289    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2290    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2291    as follows:
2292 
2293 .vb
2294             1  2  0  |  0  3  0  |  0  4
2295     Proc0   0  5  6  |  7  0  0  |  8  0
2296             9  0 10  | 11  0  0  | 12  0
2297     -------------------------------------
2298            13  0 14  | 15 16 17  |  0  0
2299     Proc1   0 18  0  | 19 20 21  |  0  0
2300             0  0  0  | 22 23  0  | 24  0
2301     -------------------------------------
2302     Proc2  25 26 27  |  0  0 28  | 29  0
2303            30  0  0  | 31 32 33  |  0 34
2304 .ve
2305 
2306    This can be represented as a collection of submatrices as:
2307 
2308 .vb
2309       A B C
2310       D E F
2311       G H I
2312 .ve
2313 
2314    Where the submatrices A,B,C are owned by proc0, D,E,F are
2315    owned by proc1, G,H,I are owned by proc2.
2316 
2317    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2318    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2319    The 'M','N' parameters are 8,8, and have the same values on all procs.
2320 
2321    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2322    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2323    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2324    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2325    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2326    matrix, ans [DF] as another SeqAIJ matrix.
2327 
2328    When d_nz, o_nz parameters are specified, d_nz storage elements are
2329    allocated for every row of the local diagonal submatrix, and o_nz
2330    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2331    One way to choose d_nz and o_nz is to use the max nonzerors per local
2332    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2333    In this case, the values of d_nz,o_nz are:
2334 .vb
2335      proc0 : dnz = 2, o_nz = 2
2336      proc1 : dnz = 3, o_nz = 2
2337      proc2 : dnz = 1, o_nz = 4
2338 .ve
2339    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2340    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2341    for proc3. i.e we are using 12+15+10=37 storage locations to store
2342    34 values.
2343 
2344    When d_nnz, o_nnz parameters are specified, the storage is specified
2345    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2346    In the above case the values for d_nnz,o_nnz are:
2347 .vb
2348      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2349      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2350      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2351 .ve
2352    Here the space allocated is sum of all the above values i.e 34, and
2353    hence pre-allocation is perfect.
2354 
2355    Level: intermediate
2356 
2357 .keywords: matrix, aij, compressed row, sparse, parallel
2358 
2359 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2360           MPIAIJ
2361 @*/
2362 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2363 {
2364   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2365 
2366   PetscFunctionBegin;
2367   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2368   if (f) {
2369     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2370   }
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatCreateMPIAIJ"
2376 /*@C
2377    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2378    (the default parallel PETSc format).  For good matrix assembly performance
2379    the user should preallocate the matrix storage by setting the parameters
2380    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2381    performance can be increased by more than a factor of 50.
2382 
2383    Collective on MPI_Comm
2384 
2385    Input Parameters:
2386 +  comm - MPI communicator
2387 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2388            This value should be the same as the local size used in creating the
2389            y vector for the matrix-vector product y = Ax.
2390 .  n - This value should be the same as the local size used in creating the
2391        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2392        calculated if N is given) For square matrices n is almost always m.
2393 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2394 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2395 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2396            (same value is used for all local rows)
2397 .  d_nnz - array containing the number of nonzeros in the various rows of the
2398            DIAGONAL portion of the local submatrix (possibly different for each row)
2399            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2400            The size of this array is equal to the number of local rows, i.e 'm'.
2401            You must leave room for the diagonal entry even if it is zero.
2402 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2403            submatrix (same value is used for all local rows).
2404 -  o_nnz - array containing the number of nonzeros in the various rows of the
2405            OFF-DIAGONAL portion of the local submatrix (possibly different for
2406            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2407            structure. The size of this array is equal to the number
2408            of local rows, i.e 'm'.
2409 
2410    Output Parameter:
2411 .  A - the matrix
2412 
2413    Notes:
2414    If the *_nnz parameter is given then the *_nz parameter is ignored
2415 
2416    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2417    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2418    storage requirements for this matrix.
2419 
2420    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2421    processor than it must be used on all processors that share the object for
2422    that argument.
2423 
2424    The user MUST specify either the local or global matrix dimensions
2425    (possibly both).
2426 
2427    The parallel matrix is partitioned such that the first m0 rows belong to
2428    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2429    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2430 
2431    The DIAGONAL portion of the local submatrix of a processor can be defined
2432    as the submatrix which is obtained by extraction the part corresponding
2433    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2434    first row that belongs to the processor, and r2 is the last row belonging
2435    to the this processor. This is a square mxm matrix. The remaining portion
2436    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2437 
2438    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2439 
2440    When calling this routine with a single process communicator, a matrix of
2441    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2442    type of communicator, use the construction mechanism:
2443      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2444 
2445    By default, this format uses inodes (identical nodes) when possible.
2446    We search for consecutive rows with the same nonzero structure, thereby
2447    reusing matrix information to achieve increased efficiency.
2448 
2449    Options Database Keys:
2450 +  -mat_no_inode  - Do not use inodes
2451 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2452 -  -mat_aij_oneindex - Internally use indexing starting at 1
2453         rather than 0.  Note that when calling MatSetValues(),
2454         the user still MUST index entries starting at 0!
2455 
2456 
2457    Example usage:
2458 
2459    Consider the following 8x8 matrix with 34 non-zero values, that is
2460    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2461    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2462    as follows:
2463 
2464 .vb
2465             1  2  0  |  0  3  0  |  0  4
2466     Proc0   0  5  6  |  7  0  0  |  8  0
2467             9  0 10  | 11  0  0  | 12  0
2468     -------------------------------------
2469            13  0 14  | 15 16 17  |  0  0
2470     Proc1   0 18  0  | 19 20 21  |  0  0
2471             0  0  0  | 22 23  0  | 24  0
2472     -------------------------------------
2473     Proc2  25 26 27  |  0  0 28  | 29  0
2474            30  0  0  | 31 32 33  |  0 34
2475 .ve
2476 
2477    This can be represented as a collection of submatrices as:
2478 
2479 .vb
2480       A B C
2481       D E F
2482       G H I
2483 .ve
2484 
2485    Where the submatrices A,B,C are owned by proc0, D,E,F are
2486    owned by proc1, G,H,I are owned by proc2.
2487 
2488    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2489    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2490    The 'M','N' parameters are 8,8, and have the same values on all procs.
2491 
2492    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2493    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2494    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2495    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2496    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2497    matrix, ans [DF] as another SeqAIJ matrix.
2498 
2499    When d_nz, o_nz parameters are specified, d_nz storage elements are
2500    allocated for every row of the local diagonal submatrix, and o_nz
2501    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2502    One way to choose d_nz and o_nz is to use the max nonzerors per local
2503    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2504    In this case, the values of d_nz,o_nz are:
2505 .vb
2506      proc0 : dnz = 2, o_nz = 2
2507      proc1 : dnz = 3, o_nz = 2
2508      proc2 : dnz = 1, o_nz = 4
2509 .ve
2510    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2511    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2512    for proc3. i.e we are using 12+15+10=37 storage locations to store
2513    34 values.
2514 
2515    When d_nnz, o_nnz parameters are specified, the storage is specified
2516    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2517    In the above case the values for d_nnz,o_nnz are:
2518 .vb
2519      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2520      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2521      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2522 .ve
2523    Here the space allocated is sum of all the above values i.e 34, and
2524    hence pre-allocation is perfect.
2525 
2526    Level: intermediate
2527 
2528 .keywords: matrix, aij, compressed row, sparse, parallel
2529 
2530 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2531           MPIAIJ
2532 @*/
2533 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2534 {
2535   PetscErrorCode ierr;
2536   PetscMPIInt    size;
2537 
2538   PetscFunctionBegin;
2539   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2540   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2541   if (size > 1) {
2542     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2543     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2544   } else {
2545     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2546     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2547   }
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 #undef __FUNCT__
2552 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2553 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2554 {
2555   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2556 
2557   PetscFunctionBegin;
2558   *Ad     = a->A;
2559   *Ao     = a->B;
2560   *colmap = a->garray;
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2566 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2567 {
2568   PetscErrorCode ierr;
2569   PetscInt       i;
2570   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2571 
2572   PetscFunctionBegin;
2573   if (coloring->ctype == IS_COLORING_LOCAL) {
2574     ISColoringValue *allcolors,*colors;
2575     ISColoring      ocoloring;
2576 
2577     /* set coloring for diagonal portion */
2578     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2579 
2580     /* set coloring for off-diagonal portion */
2581     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2582     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2583     for (i=0; i<a->B->n; i++) {
2584       colors[i] = allcolors[a->garray[i]];
2585     }
2586     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2587     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2588     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2589     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2590   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2591     ISColoringValue *colors;
2592     PetscInt             *larray;
2593     ISColoring      ocoloring;
2594 
2595     /* set coloring for diagonal portion */
2596     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2597     for (i=0; i<a->A->n; i++) {
2598       larray[i] = i + a->cstart;
2599     }
2600     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2601     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2602     for (i=0; i<a->A->n; i++) {
2603       colors[i] = coloring->colors[larray[i]];
2604     }
2605     ierr = PetscFree(larray);CHKERRQ(ierr);
2606     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2607     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2608     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2609 
2610     /* set coloring for off-diagonal portion */
2611     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2612     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2613     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2614     for (i=0; i<a->B->n; i++) {
2615       colors[i] = coloring->colors[larray[i]];
2616     }
2617     ierr = PetscFree(larray);CHKERRQ(ierr);
2618     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2619     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2620     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2621   } else {
2622     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2623   }
2624 
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 #if defined(PETSC_HAVE_ADIC)
2629 #undef __FUNCT__
2630 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2631 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2632 {
2633   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2634   PetscErrorCode ierr;
2635 
2636   PetscFunctionBegin;
2637   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2638   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2639   PetscFunctionReturn(0);
2640 }
2641 #endif
2642 
2643 #undef __FUNCT__
2644 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2645 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2646 {
2647   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2648   PetscErrorCode ierr;
2649 
2650   PetscFunctionBegin;
2651   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2652   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 #undef __FUNCT__
2657 #define __FUNCT__ "MatMerge"
2658 /*@C
2659       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2660                  matrices from each processor
2661 
2662     Collective on MPI_Comm
2663 
2664    Input Parameters:
2665 +    comm - the communicators the parallel matrix will live on
2666 .    inmat - the input sequential matrices
2667 .    n - number of local columns (or PETSC_DECIDE)
2668 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2669 
2670    Output Parameter:
2671 .    outmat - the parallel matrix generated
2672 
2673     Level: advanced
2674 
2675    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2676 
2677 @*/
2678 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2679 {
2680   PetscErrorCode ierr;
2681   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2682   PetscInt       *indx;
2683   PetscScalar    *values;
2684   PetscMap       columnmap,rowmap;
2685 
2686   PetscFunctionBegin;
2687     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2688   /*
2689   PetscMPIInt       rank;
2690   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2691   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2692   */
2693   if (scall == MAT_INITIAL_MATRIX){
2694     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2695     if (n == PETSC_DECIDE){
2696       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2697       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2698       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2699       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2700       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2701     }
2702 
2703     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2704     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2705     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2706     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2707     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2708 
2709     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2710     for (i=0;i<m;i++) {
2711       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2712       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2713       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2714     }
2715     /* This routine will ONLY return MPIAIJ type matrix */
2716     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2717     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2718     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2719     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2720 
2721   } else if (scall == MAT_REUSE_MATRIX){
2722     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2723   } else {
2724     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2725   }
2726 
2727   for (i=0;i<m;i++) {
2728     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2729     I    = i + rstart;
2730     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2731     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2732   }
2733   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2734   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2735   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2736 
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatFileSplit"
2742 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2743 {
2744   PetscErrorCode    ierr;
2745   PetscMPIInt       rank;
2746   PetscInt          m,N,i,rstart,nnz;
2747   size_t            len;
2748   const PetscInt    *indx;
2749   PetscViewer       out;
2750   char              *name;
2751   Mat               B;
2752   const PetscScalar *values;
2753 
2754   PetscFunctionBegin;
2755   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2756   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2757   /* Should this be the type of the diagonal block of A? */
2758   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2759   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2760   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2761   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2762   for (i=0;i<m;i++) {
2763     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2764     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2765     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2766   }
2767   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2768   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2769 
2770   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2771   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2772   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2773   sprintf(name,"%s.%d",outfile,rank);
2774   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2775   ierr = PetscFree(name);
2776   ierr = MatView(B,out);CHKERRQ(ierr);
2777   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2778   ierr = MatDestroy(B);CHKERRQ(ierr);
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2783 #undef __FUNCT__
2784 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2785 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2786 {
2787   PetscErrorCode       ierr;
2788   Mat_Merge_SeqsToMPI  *merge;
2789   PetscObjectContainer container;
2790 
2791   PetscFunctionBegin;
2792   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2793   if (container) {
2794     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2795     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2796     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2797     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2798     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2799     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2800     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2801     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2802     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2803     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2804     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2805     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2806 
2807     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2808     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2809   }
2810   ierr = PetscFree(merge);CHKERRQ(ierr);
2811 
2812   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2813   PetscFunctionReturn(0);
2814 }
2815 
2816 #include "src/mat/utils/freespace.h"
2817 #include "petscbt.h"
2818 static PetscEvent logkey_seqstompinum = 0;
2819 #undef __FUNCT__
2820 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2821 /*@C
2822       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2823                  matrices from each processor
2824 
2825     Collective on MPI_Comm
2826 
2827    Input Parameters:
2828 +    comm - the communicators the parallel matrix will live on
2829 .    seqmat - the input sequential matrices
2830 .    m - number of local rows (or PETSC_DECIDE)
2831 .    n - number of local columns (or PETSC_DECIDE)
2832 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2833 
2834    Output Parameter:
2835 .    mpimat - the parallel matrix generated
2836 
2837     Level: advanced
2838 
2839    Notes:
2840      The dimensions of the sequential matrix in each processor MUST be the same.
2841      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2842      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2843 @*/
2844 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2845 {
2846   PetscErrorCode       ierr;
2847   MPI_Comm             comm=mpimat->comm;
2848   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2849   PetscMPIInt          size,rank,taga,*len_s;
2850   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2851   PetscInt             proc,m;
2852   PetscInt             **buf_ri,**buf_rj;
2853   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2854   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2855   MPI_Request          *s_waits,*r_waits;
2856   MPI_Status           *status;
2857   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2858   Mat_Merge_SeqsToMPI  *merge;
2859   PetscObjectContainer container;
2860 
2861   PetscFunctionBegin;
2862   if (!logkey_seqstompinum) {
2863     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2864   }
2865   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2866 
2867   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2868   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2869 
2870   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2871   if (container) {
2872     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2873   }
2874   bi     = merge->bi;
2875   bj     = merge->bj;
2876   buf_ri = merge->buf_ri;
2877   buf_rj = merge->buf_rj;
2878 
2879   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2880   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2881   len_s  = merge->len_s;
2882 
2883   /* send and recv matrix values */
2884   /*-----------------------------*/
2885   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2886   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2887 
2888   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2889   for (proc=0,k=0; proc<size; proc++){
2890     if (!len_s[proc]) continue;
2891     i = owners[proc];
2892     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2893     k++;
2894   }
2895 
2896   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
2897   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
2898   ierr = PetscFree(status);CHKERRQ(ierr);
2899 
2900   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2901   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2902 
2903   /* insert mat values of mpimat */
2904   /*----------------------------*/
2905   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2906   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2907   nextrow = buf_ri_k + merge->nrecv;
2908   nextai  = nextrow + merge->nrecv;
2909 
2910   for (k=0; k<merge->nrecv; k++){
2911     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2912     nrows = *(buf_ri_k[k]);
2913     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2914     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2915   }
2916 
2917   /* set values of ba */
2918   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2919   for (i=0; i<m; i++) {
2920     arow = owners[rank] + i;
2921     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2922     bnzi = bi[i+1] - bi[i];
2923     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2924 
2925     /* add local non-zero vals of this proc's seqmat into ba */
2926     anzi = ai[arow+1] - ai[arow];
2927     aj   = a->j + ai[arow];
2928     aa   = a->a + ai[arow];
2929     nextaj = 0;
2930     for (j=0; nextaj<anzi; j++){
2931       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2932         ba_i[j] += aa[nextaj++];
2933       }
2934     }
2935 
2936     /* add received vals into ba */
2937     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2938       /* i-th row */
2939       if (i == *nextrow[k]) {
2940         anzi = *(nextai[k]+1) - *nextai[k];
2941         aj   = buf_rj[k] + *(nextai[k]);
2942         aa   = abuf_r[k] + *(nextai[k]);
2943         nextaj = 0;
2944         for (j=0; nextaj<anzi; j++){
2945           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2946             ba_i[j] += aa[nextaj++];
2947           }
2948         }
2949         nextrow[k]++; nextai[k]++;
2950       }
2951     }
2952     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2953   }
2954   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2955   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2956 
2957   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
2958   ierr = PetscFree(ba_i);CHKERRQ(ierr);
2959   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
2960   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 static PetscEvent logkey_seqstompisym = 0;
2965 #undef __FUNCT__
2966 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
2967 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
2968 {
2969   PetscErrorCode       ierr;
2970   Mat                  B_mpi;
2971   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2972   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
2973   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
2974   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
2975   PetscInt             len,proc,*dnz,*onz;
2976   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
2977   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
2978   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
2979   MPI_Status           *status;
2980   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
2981   PetscBT              lnkbt;
2982   Mat_Merge_SeqsToMPI  *merge;
2983   PetscObjectContainer container;
2984 
2985   PetscFunctionBegin;
2986   if (!logkey_seqstompisym) {
2987     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
2988   }
2989   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
2990 
2991   /* make sure it is a PETSc comm */
2992   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
2993   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2994   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2995 
2996   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
2997   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2998 
2999   /* determine row ownership */
3000   /*---------------------------------------------------------*/
3001   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3002   if (m == PETSC_DECIDE) {
3003     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3004   } else {
3005     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3006   }
3007   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3008   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3009   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3010 
3011   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3012   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3013 
3014   /* determine the number of messages to send, their lengths */
3015   /*---------------------------------------------------------*/
3016   len_s  = merge->len_s;
3017 
3018   len = 0;  /* length of buf_si[] */
3019   merge->nsend = 0;
3020   for (proc=0; proc<size; proc++){
3021     len_si[proc] = 0;
3022     if (proc == rank){
3023       len_s[proc] = 0;
3024     } else {
3025       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3026       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3027     }
3028     if (len_s[proc]) {
3029       merge->nsend++;
3030       nrows = 0;
3031       for (i=owners[proc]; i<owners[proc+1]; i++){
3032         if (ai[i+1] > ai[i]) nrows++;
3033       }
3034       len_si[proc] = 2*(nrows+1);
3035       len += len_si[proc];
3036     }
3037   }
3038 
3039   /* determine the number and length of messages to receive for ij-structure */
3040   /*-------------------------------------------------------------------------*/
3041   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3042   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3043 
3044   /* post the Irecv of j-structure */
3045   /*-------------------------------*/
3046   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3047   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3048 
3049   /* post the Isend of j-structure */
3050   /*--------------------------------*/
3051   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3052   sj_waits = si_waits + merge->nsend;
3053 
3054   for (proc=0, k=0; proc<size; proc++){
3055     if (!len_s[proc]) continue;
3056     i = owners[proc];
3057     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3058     k++;
3059   }
3060 
3061   /* receives and sends of j-structure are complete */
3062   /*------------------------------------------------*/
3063   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3064   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3065 
3066   /* send and recv i-structure */
3067   /*---------------------------*/
3068   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3069   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3070 
3071   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3072   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3073   for (proc=0,k=0; proc<size; proc++){
3074     if (!len_s[proc]) continue;
3075     /* form outgoing message for i-structure:
3076          buf_si[0]:                 nrows to be sent
3077                [1:nrows]:           row index (global)
3078                [nrows+1:2*nrows+1]: i-structure index
3079     */
3080     /*-------------------------------------------*/
3081     nrows = len_si[proc]/2 - 1;
3082     buf_si_i    = buf_si + nrows+1;
3083     buf_si[0]   = nrows;
3084     buf_si_i[0] = 0;
3085     nrows = 0;
3086     for (i=owners[proc]; i<owners[proc+1]; i++){
3087       anzi = ai[i+1] - ai[i];
3088       if (anzi) {
3089         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3090         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3091         nrows++;
3092       }
3093     }
3094     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3095     k++;
3096     buf_si += len_si[proc];
3097   }
3098 
3099   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3100   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3101 
3102   ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr);
3103   for (i=0; i<merge->nrecv; i++){
3104     ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr);
3105   }
3106 
3107   ierr = PetscFree(len_si);CHKERRQ(ierr);
3108   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3109   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3110   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3111   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3112   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3113   ierr = PetscFree(status);CHKERRQ(ierr);
3114 
3115   /* compute a local seq matrix in each processor */
3116   /*----------------------------------------------*/
3117   /* allocate bi array and free space for accumulating nonzero column info */
3118   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3119   bi[0] = 0;
3120 
3121   /* create and initialize a linked list */
3122   nlnk = N+1;
3123   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3124 
3125   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3126   len = 0;
3127   len  = ai[owners[rank+1]] - ai[owners[rank]];
3128   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3129   current_space = free_space;
3130 
3131   /* determine symbolic info for each local row */
3132   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3133   nextrow = buf_ri_k + merge->nrecv;
3134   nextai  = nextrow + merge->nrecv;
3135   for (k=0; k<merge->nrecv; k++){
3136     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3137     nrows = *buf_ri_k[k];
3138     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3139     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3140   }
3141 
3142   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3143   len = 0;
3144   for (i=0;i<m;i++) {
3145     bnzi   = 0;
3146     /* add local non-zero cols of this proc's seqmat into lnk */
3147     arow   = owners[rank] + i;
3148     anzi   = ai[arow+1] - ai[arow];
3149     aj     = a->j + ai[arow];
3150     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3151     bnzi += nlnk;
3152     /* add received col data into lnk */
3153     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3154       if (i == *nextrow[k]) { /* i-th row */
3155         anzi = *(nextai[k]+1) - *nextai[k];
3156         aj   = buf_rj[k] + *nextai[k];
3157         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3158         bnzi += nlnk;
3159         nextrow[k]++; nextai[k]++;
3160       }
3161     }
3162     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3163 
3164     /* if free space is not available, make more free space */
3165     if (current_space->local_remaining<bnzi) {
3166       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3167       nspacedouble++;
3168     }
3169     /* copy data into free space, then initialize lnk */
3170     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3171     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3172 
3173     current_space->array           += bnzi;
3174     current_space->local_used      += bnzi;
3175     current_space->local_remaining -= bnzi;
3176 
3177     bi[i+1] = bi[i] + bnzi;
3178   }
3179 
3180   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3181 
3182   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3183   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3184   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3185 
3186   /* create symbolic parallel matrix B_mpi */
3187   /*---------------------------------------*/
3188   if (n==PETSC_DECIDE) {
3189     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3190   } else {
3191     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3192   }
3193   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3194   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3195   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3196 
3197   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3198   B_mpi->assembled     = PETSC_FALSE;
3199   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3200   merge->bi            = bi;
3201   merge->bj            = bj;
3202   merge->buf_ri        = buf_ri;
3203   merge->buf_rj        = buf_rj;
3204   merge->coi           = PETSC_NULL;
3205   merge->coj           = PETSC_NULL;
3206   merge->owners_co     = PETSC_NULL;
3207 
3208   /* attach the supporting struct to B_mpi for reuse */
3209   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3210   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3211   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3212   *mpimat = B_mpi;
3213 
3214   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3215   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3216   PetscFunctionReturn(0);
3217 }
3218 
3219 static PetscEvent logkey_seqstompi = 0;
3220 #undef __FUNCT__
3221 #define __FUNCT__ "MatMerge_SeqsToMPI"
3222 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3223 {
3224   PetscErrorCode   ierr;
3225 
3226   PetscFunctionBegin;
3227   if (!logkey_seqstompi) {
3228     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3229   }
3230   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3231   if (scall == MAT_INITIAL_MATRIX){
3232     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3233   }
3234   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3235   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3236   PetscFunctionReturn(0);
3237 }
3238 static PetscEvent logkey_getlocalmat = 0;
3239 #undef __FUNCT__
3240 #define __FUNCT__ "MatGetLocalMat"
3241 /*@C
3242      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3243 
3244     Not Collective
3245 
3246    Input Parameters:
3247 +    A - the matrix
3248 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3249 
3250    Output Parameter:
3251 .    A_loc - the local sequential matrix generated
3252 
3253     Level: developer
3254 
3255 @*/
3256 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3257 {
3258   PetscErrorCode  ierr;
3259   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3260   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3261   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3262   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3263   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3264   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3265 
3266   PetscFunctionBegin;
3267   if (!logkey_getlocalmat) {
3268     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3269   }
3270   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3271   if (scall == MAT_INITIAL_MATRIX){
3272     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3273     ci[0] = 0;
3274     for (i=0; i<am; i++){
3275       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3276     }
3277     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3278     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3279     k = 0;
3280     for (i=0; i<am; i++) {
3281       ncols_o = bi[i+1] - bi[i];
3282       ncols_d = ai[i+1] - ai[i];
3283       /* off-diagonal portion of A */
3284       for (jo=0; jo<ncols_o; jo++) {
3285         col = cmap[*bj];
3286         if (col >= cstart) break;
3287         cj[k]   = col; bj++;
3288         ca[k++] = *ba++;
3289       }
3290       /* diagonal portion of A */
3291       for (j=0; j<ncols_d; j++) {
3292         cj[k]   = cstart + *aj++;
3293         ca[k++] = *aa++;
3294       }
3295       /* off-diagonal portion of A */
3296       for (j=jo; j<ncols_o; j++) {
3297         cj[k]   = cmap[*bj++];
3298         ca[k++] = *ba++;
3299       }
3300     }
3301     /* put together the new matrix */
3302     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3303     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3304     /* Since these are PETSc arrays, change flags to free them as necessary. */
3305     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3306     mat->freedata = PETSC_TRUE;
3307     mat->nonew    = 0;
3308   } else if (scall == MAT_REUSE_MATRIX){
3309     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3310     ci = mat->i; cj = mat->j; ca = mat->a;
3311     for (i=0; i<am; i++) {
3312       /* off-diagonal portion of A */
3313       ncols_o = bi[i+1] - bi[i];
3314       for (jo=0; jo<ncols_o; jo++) {
3315         col = cmap[*bj];
3316         if (col >= cstart) break;
3317         *ca++ = *ba++; bj++;
3318       }
3319       /* diagonal portion of A */
3320       ncols_d = ai[i+1] - ai[i];
3321       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3322       /* off-diagonal portion of A */
3323       for (j=jo; j<ncols_o; j++) {
3324         *ca++ = *ba++; bj++;
3325       }
3326     }
3327   } else {
3328     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3329   }
3330 
3331   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3332   PetscFunctionReturn(0);
3333 }
3334 
3335 static PetscEvent logkey_getlocalmatcondensed = 0;
3336 #undef __FUNCT__
3337 #define __FUNCT__ "MatGetLocalMatCondensed"
3338 /*@C
3339      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3340 
3341     Not Collective
3342 
3343    Input Parameters:
3344 +    A - the matrix
3345 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3346 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3347 
3348    Output Parameter:
3349 .    A_loc - the local sequential matrix generated
3350 
3351     Level: developer
3352 
3353 @*/
3354 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3355 {
3356   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3357   PetscErrorCode    ierr;
3358   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3359   IS                isrowa,iscola;
3360   Mat               *aloc;
3361 
3362   PetscFunctionBegin;
3363   if (!logkey_getlocalmatcondensed) {
3364     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3365   }
3366   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3367   if (!row){
3368     start = a->rstart; end = a->rend;
3369     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3370   } else {
3371     isrowa = *row;
3372   }
3373   if (!col){
3374     start = a->cstart;
3375     cmap  = a->garray;
3376     nzA   = a->A->n;
3377     nzB   = a->B->n;
3378     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3379     ncols = 0;
3380     for (i=0; i<nzB; i++) {
3381       if (cmap[i] < start) idx[ncols++] = cmap[i];
3382       else break;
3383     }
3384     imark = i;
3385     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3386     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3387     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3388     ierr = PetscFree(idx);CHKERRQ(ierr);
3389   } else {
3390     iscola = *col;
3391   }
3392   if (scall != MAT_INITIAL_MATRIX){
3393     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3394     aloc[0] = *A_loc;
3395   }
3396   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3397   *A_loc = aloc[0];
3398   ierr = PetscFree(aloc);CHKERRQ(ierr);
3399   if (!row){
3400     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3401   }
3402   if (!col){
3403     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3404   }
3405   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3406   PetscFunctionReturn(0);
3407 }
3408 
3409 static PetscEvent logkey_GetBrowsOfAcols = 0;
3410 #undef __FUNCT__
3411 #define __FUNCT__ "MatGetBrowsOfAcols"
3412 /*@C
3413     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3414 
3415     Collective on Mat
3416 
3417    Input Parameters:
3418 +    A,B - the matrices in mpiaij format
3419 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3420 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3421 
3422    Output Parameter:
3423 +    rowb, colb - index sets of rows and columns of B to extract
3424 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3425 -    B_seq - the sequential matrix generated
3426 
3427     Level: developer
3428 
3429 @*/
3430 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3431 {
3432   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3433   PetscErrorCode    ierr;
3434   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3435   IS                isrowb,iscolb;
3436   Mat               *bseq;
3437 
3438   PetscFunctionBegin;
3439   if (a->cstart != b->rstart || a->cend != b->rend){
3440     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3441   }
3442   if (!logkey_GetBrowsOfAcols) {
3443     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3444   }
3445   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3446 
3447   if (scall == MAT_INITIAL_MATRIX){
3448     start = a->cstart;
3449     cmap  = a->garray;
3450     nzA   = a->A->n;
3451     nzB   = a->B->n;
3452     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3453     ncols = 0;
3454     for (i=0; i<nzB; i++) {  /* row < local row index */
3455       if (cmap[i] < start) idx[ncols++] = cmap[i];
3456       else break;
3457     }
3458     imark = i;
3459     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3460     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3461     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3462     ierr = PetscFree(idx);CHKERRQ(ierr);
3463     *brstart = imark;
3464     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3465   } else {
3466     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3467     isrowb = *rowb; iscolb = *colb;
3468     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3469     bseq[0] = *B_seq;
3470   }
3471   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3472   *B_seq = bseq[0];
3473   ierr = PetscFree(bseq);CHKERRQ(ierr);
3474   if (!rowb){
3475     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3476   } else {
3477     *rowb = isrowb;
3478   }
3479   if (!colb){
3480     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3481   } else {
3482     *colb = iscolb;
3483   }
3484   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3485   PetscFunctionReturn(0);
3486 }
3487 
3488 static PetscEvent logkey_GetBrowsOfAocols = 0;
3489 #undef __FUNCT__
3490 #define __FUNCT__ "MatGetBrowsOfAoCols"
3491 /*@C
3492     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3493     of the OFF-DIAGONAL portion of local A
3494 
3495     Collective on Mat
3496 
3497    Input Parameters:
3498 +    A,B - the matrices in mpiaij format
3499 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3500 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3501 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3502 
3503    Output Parameter:
3504 +    B_oth - the sequential matrix generated
3505 
3506     Level: developer
3507 
3508 @*/
3509 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3510 {
3511   VecScatter_MPI_General *gen_to,*gen_from;
3512   PetscErrorCode         ierr;
3513   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3514   Mat_SeqAIJ             *b_oth;
3515   VecScatter             ctx=a->Mvctx;
3516   MPI_Comm               comm=ctx->comm;
3517   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3518   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3519   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3520   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3521   MPI_Request            *rwaits,*swaits;
3522   MPI_Status             *sstatus,rstatus;
3523   PetscInt               *cols;
3524   PetscScalar            *vals;
3525   PetscMPIInt            j;
3526 
3527   PetscFunctionBegin;
3528   if (a->cstart != b->rstart || a->cend != b->rend){
3529     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3530   }
3531   if (!logkey_GetBrowsOfAocols) {
3532     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3533   }
3534   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3535   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3536 
3537   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3538   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3539   rvalues  = gen_from->values; /* holds the length of sending row */
3540   svalues  = gen_to->values;   /* holds the length of receiving row */
3541   nrecvs   = gen_from->n;
3542   nsends   = gen_to->n;
3543   rwaits   = gen_from->requests;
3544   swaits   = gen_to->requests;
3545   srow     = gen_to->indices;   /* local row index to be sent */
3546   rstarts  = gen_from->starts;
3547   sstarts  = gen_to->starts;
3548   rprocs   = gen_from->procs;
3549   sprocs   = gen_to->procs;
3550   sstatus  = gen_to->sstatus;
3551 
3552   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3553   if (scall == MAT_INITIAL_MATRIX){
3554     /* i-array */
3555     /*---------*/
3556     /*  post receives */
3557     for (i=0; i<nrecvs; i++){
3558       rowlen = (PetscInt*)rvalues + rstarts[i];
3559       nrows = rstarts[i+1]-rstarts[i];
3560       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3561     }
3562 
3563     /* pack the outgoing message */
3564     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3565     rstartsj = sstartsj + nsends +1;
3566     sstartsj[0] = 0;  rstartsj[0] = 0;
3567     len = 0; /* total length of j or a array to be sent */
3568     k = 0;
3569     for (i=0; i<nsends; i++){
3570       rowlen = (PetscInt*)svalues + sstarts[i];
3571       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3572       for (j=0; j<nrows; j++) {
3573         row = srow[k] + b->rowners[rank]; /* global row idx */
3574         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3575         len += rowlen[j];
3576         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3577         k++;
3578       }
3579       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3580        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3581     }
3582     /* recvs and sends of i-array are completed */
3583     i = nrecvs;
3584     while (i--) {
3585       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3586     }
3587     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3588     /* allocate buffers for sending j and a arrays */
3589     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3590     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3591 
3592     /* create i-array of B_oth */
3593     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3594     b_othi[0] = 0;
3595     len = 0; /* total length of j or a array to be received */
3596     k = 0;
3597     for (i=0; i<nrecvs; i++){
3598       rowlen = (PetscInt*)rvalues + rstarts[i];
3599       nrows = rstarts[i+1]-rstarts[i];
3600       for (j=0; j<nrows; j++) {
3601         b_othi[k+1] = b_othi[k] + rowlen[j];
3602         len += rowlen[j]; k++;
3603       }
3604       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3605     }
3606 
3607     /* allocate space for j and a arrrays of B_oth */
3608     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3609     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3610 
3611     /* j-array */
3612     /*---------*/
3613     /*  post receives of j-array */
3614     for (i=0; i<nrecvs; i++){
3615       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3616       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3617     }
3618     k = 0;
3619     for (i=0; i<nsends; i++){
3620       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3621       bufJ = bufj+sstartsj[i];
3622       for (j=0; j<nrows; j++) {
3623         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3624         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3625         for (l=0; l<ncols; l++){
3626           *bufJ++ = cols[l];
3627         }
3628         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3629       }
3630       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3631     }
3632 
3633     /* recvs and sends of j-array are completed */
3634     i = nrecvs;
3635     while (i--) {
3636       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3637     }
3638     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3639   } else if (scall == MAT_REUSE_MATRIX){
3640     sstartsj = *startsj;
3641     rstartsj = sstartsj + nsends +1;
3642     bufa     = *bufa_ptr;
3643     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3644     b_otha   = b_oth->a;
3645   } else {
3646     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3647   }
3648 
3649   /* a-array */
3650   /*---------*/
3651   /*  post receives of a-array */
3652   for (i=0; i<nrecvs; i++){
3653     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3654     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3655   }
3656   k = 0;
3657   for (i=0; i<nsends; i++){
3658     nrows = sstarts[i+1]-sstarts[i];
3659     bufA = bufa+sstartsj[i];
3660     for (j=0; j<nrows; j++) {
3661       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3662       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3663       for (l=0; l<ncols; l++){
3664         *bufA++ = vals[l];
3665       }
3666       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3667 
3668     }
3669     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3670   }
3671   /* recvs and sends of a-array are completed */
3672   i = nrecvs;
3673   while (i--) {
3674     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3675   }
3676    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3677 
3678   if (scall == MAT_INITIAL_MATRIX){
3679     /* put together the new matrix */
3680     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3681 
3682     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3683     /* Since these are PETSc arrays, change flags to free them as necessary. */
3684     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3685     b_oth->freedata = PETSC_TRUE;
3686     b_oth->nonew    = 0;
3687 
3688     ierr = PetscFree(bufj);CHKERRQ(ierr);
3689     if (!startsj || !bufa_ptr){
3690       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3691       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3692     } else {
3693       *startsj  = sstartsj;
3694       *bufa_ptr = bufa;
3695     }
3696   }
3697   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3698 
3699   PetscFunctionReturn(0);
3700 }
3701 
3702 /*MC
3703    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3704 
3705    Options Database Keys:
3706 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3707 
3708   Level: beginner
3709 
3710 .seealso: MatCreateMPIAIJ
3711 M*/
3712 
3713 EXTERN_C_BEGIN
3714 #undef __FUNCT__
3715 #define __FUNCT__ "MatCreate_MPIAIJ"
3716 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3717 {
3718   Mat_MPIAIJ     *b;
3719   PetscErrorCode ierr;
3720   PetscInt       i;
3721   PetscMPIInt    size;
3722 
3723   PetscFunctionBegin;
3724   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3725 
3726   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3727   B->data         = (void*)b;
3728   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3729   B->factor       = 0;
3730   B->bs           = 1;
3731   B->assembled    = PETSC_FALSE;
3732   B->mapping      = 0;
3733 
3734   B->insertmode      = NOT_SET_VALUES;
3735   b->size            = size;
3736   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3737 
3738   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3739   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3740 
3741   /* the information in the maps duplicates the information computed below, eventually
3742      we should remove the duplicate information that is not contained in the maps */
3743   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3744   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3745 
3746   /* build local table of row and column ownerships */
3747   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3748   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3749   b->cowners = b->rowners + b->size + 2;
3750   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3751   b->rowners[0] = 0;
3752   for (i=2; i<=b->size; i++) {
3753     b->rowners[i] += b->rowners[i-1];
3754   }
3755   b->rstart = b->rowners[b->rank];
3756   b->rend   = b->rowners[b->rank+1];
3757   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3758   b->cowners[0] = 0;
3759   for (i=2; i<=b->size; i++) {
3760     b->cowners[i] += b->cowners[i-1];
3761   }
3762   b->cstart = b->cowners[b->rank];
3763   b->cend   = b->cowners[b->rank+1];
3764 
3765   /* build cache for off array entries formed */
3766   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3767   b->donotstash  = PETSC_FALSE;
3768   b->colmap      = 0;
3769   b->garray      = 0;
3770   b->roworiented = PETSC_TRUE;
3771 
3772   /* stuff used for matrix vector multiply */
3773   b->lvec      = PETSC_NULL;
3774   b->Mvctx     = PETSC_NULL;
3775 
3776   /* stuff for MatGetRow() */
3777   b->rowindices   = 0;
3778   b->rowvalues    = 0;
3779   b->getrowactive = PETSC_FALSE;
3780 
3781   /* Explicitly create 2 MATSEQAIJ matrices. */
3782   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3783   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3784   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3785   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3786   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3787   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3788 
3789   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3790                                      "MatStoreValues_MPIAIJ",
3791                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3792   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3793                                      "MatRetrieveValues_MPIAIJ",
3794                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3795   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3796 				     "MatGetDiagonalBlock_MPIAIJ",
3797                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3798   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3799 				     "MatIsTranspose_MPIAIJ",
3800 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3801   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3802 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3803 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3804   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3805 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3806 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3807   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3808 				     "MatDiagonalScaleLocal_MPIAIJ",
3809 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3810   PetscFunctionReturn(0);
3811 }
3812 EXTERN_C_END
3813