xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 354c94de58a434b7a4e66a8dd3b0b59d9623044d)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (lastcol1 > col) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,rmax1,aa,ai,aj,am,rp1,ap1,aimax);\
59       N = nrow1++ - 1; a->nz++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (lastcol2 > col) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,rmax2,ba,bi,bj,bm,rp2,ap2,bimax);\
93       N = nrow2++ - 1; b->nz++; \
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIAIJ"
107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
110   PetscScalar    value;
111   PetscErrorCode ierr;
112   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
113   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
114   PetscTruth     roworiented = aij->roworiented;
115 
116   /* Some Variables required in the macro */
117   Mat            A = aij->A;
118   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
119   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
120   PetscScalar    *aa = a->a;
121   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
122   Mat            B = aij->B;
123   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
124   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
125   PetscScalar    *ba = b->a;
126 
127   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
128   PetscInt       nonew = a->nonew;
129   PetscScalar    *ap1,*ap2;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (im[i] < 0) continue;
134 #if defined(PETSC_USE_DEBUG)
135     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
136 #endif
137     if (im[i] >= rstart && im[i] < rend) {
138       row      = im[i] - rstart;
139       lastcol1 = -1;
140       rp1      = aj + ai[row];
141       ap1      = aa + ai[row];
142       rmax1    = aimax[row];
143       nrow1    = ailen[row];
144       low1     = 0;
145       high1    = nrow1;
146       lastcol2 = -1;
147       rp2      = bj + bi[row];
148       ap2      = ba + bi[row];
149       rmax2    = bimax[row];
150       nrow2    = bilen[row];
151       low2     = 0;
152       high2    = nrow2;
153 
154       for (j=0; j<n; j++) {
155         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
156         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
157         if (in[j] >= cstart && in[j] < cend){
158           col = in[j] - cstart;
159           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
160         } else if (in[j] < 0) continue;
161 #if defined(PETSC_USE_DEBUG)
162         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
163 #endif
164         else {
165           if (mat->was_assembled) {
166             if (!aij->colmap) {
167               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
168             }
169 #if defined (PETSC_USE_CTABLE)
170             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
171 	    col--;
172 #else
173             col = aij->colmap[in[j]] - 1;
174 #endif
175             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
176               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
177               col =  in[j];
178               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
179               B = aij->B;
180               b = (Mat_SeqAIJ*)B->data;
181               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
182               rp2      = bj + bi[row];
183               ap2      = ba + bi[row];
184               rmax2    = bimax[row];
185               nrow2    = bilen[row];
186               low2     = 0;
187               high2    = nrow2;
188               bm       = aij->B->m;
189               ba = b->a;
190             }
191           } else col = in[j];
192           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
193         }
194       }
195     } else {
196       if (!aij->donotstash) {
197         if (roworiented) {
198           if (ignorezeroentries && v[i*n] == 0.0) continue;
199           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
200         } else {
201           if (ignorezeroentries && v[i] == 0.0) continue;
202           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
203         }
204       }
205     }
206   }
207   PetscFunctionReturn(0);
208 }
209 
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatGetValues_MPIAIJ"
213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
214 {
215   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
216   PetscErrorCode ierr;
217   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
218   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
219 
220   PetscFunctionBegin;
221   for (i=0; i<m; i++) {
222     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
223     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
224     if (idxm[i] >= rstart && idxm[i] < rend) {
225       row = idxm[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
228         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
229         if (idxn[j] >= cstart && idxn[j] < cend){
230           col = idxn[j] - cstart;
231           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
232         } else {
233           if (!aij->colmap) {
234             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
235           }
236 #if defined (PETSC_USE_CTABLE)
237           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
238           col --;
239 #else
240           col = aij->colmap[idxn[j]] - 1;
241 #endif
242           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
243           else {
244             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
245           }
246         }
247       }
248     } else {
249       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
258 {
259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
260   PetscErrorCode ierr;
261   PetscInt       nstash,reallocs;
262   InsertMode     addv;
263 
264   PetscFunctionBegin;
265   if (aij->donotstash) {
266     PetscFunctionReturn(0);
267   }
268 
269   /* make sure all processors are either in INSERTMODE or ADDMODE */
270   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
271   if (addv == (ADD_VALUES|INSERT_VALUES)) {
272     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
273   }
274   mat->insertmode = addv; /* in case this processor had no cache */
275 
276   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
277   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
278   ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
288   PetscErrorCode ierr;
289   PetscMPIInt    n;
290   PetscInt       i,j,rstart,ncols,flg;
291   PetscInt       *row,*col,other_disassembled;
292   PetscScalar    *val;
293   InsertMode     addv = mat->insertmode;
294 
295   PetscFunctionBegin;
296   if (!aij->donotstash) {
297     while (1) {
298       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
299       if (!flg) break;
300 
301       for (i=0; i<n;) {
302         /* Now identify the consecutive vals belonging to the same row */
303         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
304         if (j < n) ncols = j-i;
305         else       ncols = n-i;
306         /* Now assemble all these values with a single function call */
307         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
308         i = j;
309       }
310     }
311     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
312   }
313   a->compressedrow.use     = PETSC_FALSE;
314   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
316 
317   /* determine if any processor has disassembled, if so we must
318      also disassemble ourselfs, in order that we may reassemble. */
319   /*
320      if nonzero structure of submatrix B cannot change then we know that
321      no processor disassembled thus we can skip this stuff
322   */
323   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
324     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
325     if (mat->was_assembled && !other_disassembled) {
326       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
327     }
328   }
329   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
330   b    = (Mat_SeqAIJ *)aij->B->data;
331 
332   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
333     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
334   }
335   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
336   b->compressedrow.use = PETSC_TRUE;
337   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
338   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
339 
340   if (aij->rowvalues) {
341     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
342     aij->rowvalues = 0;
343   }
344 
345   /* used by MatAXPY() */
346   a->xtoy = 0; b->xtoy = 0;
347   a->XtoY = 0; b->XtoY = 0;
348 
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
355 {
356   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
361   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNCT__
366 #define __FUNCT__ "MatZeroRows_MPIAIJ"
367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
368 {
369   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
370   PetscErrorCode ierr;
371   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
372   PetscInt       i,N,*rows,*owners = l->rowners;
373   PetscInt       *nprocs,j,idx,nsends,row;
374   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
375   PetscInt       *rvalues,count,base,slen,*source;
376   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
377   MPI_Comm       comm = A->comm;
378   MPI_Request    *send_waits,*recv_waits;
379   MPI_Status     recv_status,*send_status;
380   IS             istmp;
381 #if defined(PETSC_DEBUG)
382   PetscTruth     found = PETSC_FALSE;
383 #endif
384 
385   PetscFunctionBegin;
386   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
387   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
388 
389   /*  first count number of contributors to each processor */
390   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
391   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
392   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
393   j = 0;
394   for (i=0; i<N; i++) {
395     if (lastidx > (idx = rows[i])) j = 0;
396     lastidx = idx;
397     for (; j<size; j++) {
398       if (idx >= owners[j] && idx < owners[j+1]) {
399         nprocs[2*j]++;
400         nprocs[2*j+1] = 1;
401         owner[i] = j;
402 #if defined(PETSC_DEBUG)
403         found = PETSC_TRUE;
404 #endif
405         break;
406       }
407     }
408 #if defined(PETSC_DEBUG)
409     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
410     found = PETSC_FALSE;
411 #endif
412   }
413   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
414 
415   /* inform other processors of number of messages and max length*/
416   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
417 
418   /* post receives:   */
419   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
420   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
421   for (i=0; i<nrecvs; i++) {
422     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
423   }
424 
425   /* do sends:
426       1) starts[i] gives the starting index in svalues for stuff going to
427          the ith processor
428   */
429   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
430   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
431   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
432   starts[0] = 0;
433   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
434   for (i=0; i<N; i++) {
435     svalues[starts[owner[i]]++] = rows[i];
436   }
437   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
438 
439   starts[0] = 0;
440   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
441   count = 0;
442   for (i=0; i<size; i++) {
443     if (nprocs[2*i+1]) {
444       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
445     }
446   }
447   ierr = PetscFree(starts);CHKERRQ(ierr);
448 
449   base = owners[rank];
450 
451   /*  wait on receives */
452   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
453   source = lens + nrecvs;
454   count  = nrecvs; slen = 0;
455   while (count) {
456     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
457     /* unpack receives into our local space */
458     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
459     source[imdex]  = recv_status.MPI_SOURCE;
460     lens[imdex]    = n;
461     slen          += n;
462     count--;
463   }
464   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
465 
466   /* move the data into the send scatter */
467   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
468   count = 0;
469   for (i=0; i<nrecvs; i++) {
470     values = rvalues + i*nmax;
471     for (j=0; j<lens[i]; j++) {
472       lrows[count++] = values[j] - base;
473     }
474   }
475   ierr = PetscFree(rvalues);CHKERRQ(ierr);
476   ierr = PetscFree(lens);CHKERRQ(ierr);
477   ierr = PetscFree(owner);CHKERRQ(ierr);
478   ierr = PetscFree(nprocs);CHKERRQ(ierr);
479 
480   /* actually zap the local rows */
481   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
482   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
483 
484   /*
485         Zero the required rows. If the "diagonal block" of the matrix
486      is square and the user wishes to set the diagonal we use seperate
487      code so that MatSetValues() is not called for each diagonal allocating
488      new memory, thus calling lots of mallocs and slowing things down.
489 
490        Contributed by: Mathew Knepley
491   */
492   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
493   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
494   if (diag && (l->A->M == l->A->N)) {
495     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
496   } else if (diag) {
497     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
498     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
499       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
500 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
501     }
502     for (i = 0; i < slen; i++) {
503       row  = lrows[i] + rstart;
504       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
505     }
506     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
507     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
508   } else {
509     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
510   }
511   ierr = ISDestroy(istmp);CHKERRQ(ierr);
512   ierr = PetscFree(lrows);CHKERRQ(ierr);
513 
514   /* wait on sends */
515   if (nsends) {
516     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
517     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
518     ierr = PetscFree(send_status);CHKERRQ(ierr);
519   }
520   ierr = PetscFree(send_waits);CHKERRQ(ierr);
521   ierr = PetscFree(svalues);CHKERRQ(ierr);
522 
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatMult_MPIAIJ"
528 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
529 {
530   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
531   PetscErrorCode ierr;
532   PetscInt       nt;
533 
534   PetscFunctionBegin;
535   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
536   if (nt != A->n) {
537     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
538   }
539   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
540   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
541   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
542   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatMultAdd_MPIAIJ"
548 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
549 {
550   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
563 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567   PetscTruth     merged;
568 
569   PetscFunctionBegin;
570   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
571   /* do nondiagonal part */
572   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
573   if (!merged) {
574     /* send it on its way */
575     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
576     /* do local part */
577     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
578     /* receive remote parts: note this assumes the values are not actually */
579     /* added in yy until the next line, */
580     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
581   } else {
582     /* do local part */
583     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
584     /* send it on its way */
585     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
586     /* values actually were received in the Begin() but we need to call this nop */
587     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
588   }
589   PetscFunctionReturn(0);
590 }
591 
592 EXTERN_C_BEGIN
593 #undef __FUNCT__
594 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
595 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
596 {
597   MPI_Comm       comm;
598   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
599   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
600   IS             Me,Notme;
601   PetscErrorCode ierr;
602   PetscInt       M,N,first,last,*notme,i;
603   PetscMPIInt    size;
604 
605   PetscFunctionBegin;
606 
607   /* Easy test: symmetric diagonal block */
608   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
609   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
610   if (!*f) PetscFunctionReturn(0);
611   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
612   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
613   if (size == 1) PetscFunctionReturn(0);
614 
615   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
616   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
617   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
618   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
619   for (i=0; i<first; i++) notme[i] = i;
620   for (i=last; i<M; i++) notme[i-last+first] = i;
621   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
622   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
623   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
624   Aoff = Aoffs[0];
625   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
626   Boff = Boffs[0];
627   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
628   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
629   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
630   ierr = ISDestroy(Me);CHKERRQ(ierr);
631   ierr = ISDestroy(Notme);CHKERRQ(ierr);
632 
633   PetscFunctionReturn(0);
634 }
635 EXTERN_C_END
636 
637 #undef __FUNCT__
638 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
639 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
640 {
641   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
642   PetscErrorCode ierr;
643 
644   PetscFunctionBegin;
645   /* do nondiagonal part */
646   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
647   /* send it on its way */
648   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
649   /* do local part */
650   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
651   /* receive remote parts */
652   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 /*
657   This only works correctly for square matrices where the subblock A->A is the
658    diagonal block
659 */
660 #undef __FUNCT__
661 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
662 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
663 {
664   PetscErrorCode ierr;
665   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
666 
667   PetscFunctionBegin;
668   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
669   if (a->rstart != a->cstart || a->rend != a->cend) {
670     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
671   }
672   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatScale_MPIAIJ"
678 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
679 {
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681   PetscErrorCode ierr;
682 
683   PetscFunctionBegin;
684   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
685   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
686   PetscFunctionReturn(0);
687 }
688 
689 #undef __FUNCT__
690 #define __FUNCT__ "MatDestroy_MPIAIJ"
691 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
692 {
693   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
694   PetscErrorCode ierr;
695 
696   PetscFunctionBegin;
697 #if defined(PETSC_USE_LOG)
698   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
699 #endif
700   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
701   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
702   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
703   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
704 #if defined (PETSC_USE_CTABLE)
705   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
706 #else
707   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
708 #endif
709   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
710   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
711   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
712   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
713   ierr = PetscFree(aij);CHKERRQ(ierr);
714 
715   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
716   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
717   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
718   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
719   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
720   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
721   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPIAIJ_Binary"
727 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
728 {
729   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
730   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
731   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
732   PetscErrorCode    ierr;
733   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
734   int               fd;
735   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
736   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
737   PetscScalar       *column_values;
738 
739   PetscFunctionBegin;
740   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
741   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
742   nz   = A->nz + B->nz;
743   if (!rank) {
744     header[0] = MAT_FILE_COOKIE;
745     header[1] = mat->M;
746     header[2] = mat->N;
747     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
748     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
749     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
750     /* get largest number of rows any processor has */
751     rlen = mat->m;
752     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
753     for (i=1; i<size; i++) {
754       rlen = PetscMax(rlen,range[i+1] - range[i]);
755     }
756   } else {
757     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
758     rlen = mat->m;
759   }
760 
761   /* load up the local row counts */
762   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
763   for (i=0; i<mat->m; i++) {
764     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
765   }
766 
767   /* store the row lengths to the file */
768   if (!rank) {
769     MPI_Status status;
770     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
771     for (i=1; i<size; i++) {
772       rlen = range[i+1] - range[i];
773       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
774       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
775     }
776   } else {
777     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
778   }
779   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
780 
781   /* load up the local column indices */
782   nzmax = nz; /* )th processor needs space a largest processor needs */
783   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
784   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
785   cnt  = 0;
786   for (i=0; i<mat->m; i++) {
787     for (j=B->i[i]; j<B->i[i+1]; j++) {
788       if ( (col = garray[B->j[j]]) > cstart) break;
789       column_indices[cnt++] = col;
790     }
791     for (k=A->i[i]; k<A->i[i+1]; k++) {
792       column_indices[cnt++] = A->j[k] + cstart;
793     }
794     for (; j<B->i[i+1]; j++) {
795       column_indices[cnt++] = garray[B->j[j]];
796     }
797   }
798   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
799 
800   /* store the column indices to the file */
801   if (!rank) {
802     MPI_Status status;
803     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
804     for (i=1; i<size; i++) {
805       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
806       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
807       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
808       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
809     }
810   } else {
811     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
812     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
813   }
814   ierr = PetscFree(column_indices);CHKERRQ(ierr);
815 
816   /* load up the local column values */
817   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
818   cnt  = 0;
819   for (i=0; i<mat->m; i++) {
820     for (j=B->i[i]; j<B->i[i+1]; j++) {
821       if ( garray[B->j[j]] > cstart) break;
822       column_values[cnt++] = B->a[j];
823     }
824     for (k=A->i[i]; k<A->i[i+1]; k++) {
825       column_values[cnt++] = A->a[k];
826     }
827     for (; j<B->i[i+1]; j++) {
828       column_values[cnt++] = B->a[j];
829     }
830   }
831   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
832 
833   /* store the column values to the file */
834   if (!rank) {
835     MPI_Status status;
836     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
837     for (i=1; i<size; i++) {
838       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
839       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
840       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
841       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
842     }
843   } else {
844     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
845     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
846   }
847   ierr = PetscFree(column_values);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
853 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
854 {
855   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
856   PetscErrorCode    ierr;
857   PetscMPIInt       rank = aij->rank,size = aij->size;
858   PetscTruth        isdraw,iascii,isbinary;
859   PetscViewer       sviewer;
860   PetscViewerFormat format;
861 
862   PetscFunctionBegin;
863   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
864   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
865   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
866   if (iascii) {
867     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
868     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
869       MatInfo    info;
870       PetscTruth inodes;
871 
872       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
873       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
874       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
875       if (!inodes) {
876         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
877 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
878       } else {
879         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
880 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
881       }
882       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
883       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
884       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
885       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
886       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
887       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
888       PetscFunctionReturn(0);
889     } else if (format == PETSC_VIEWER_ASCII_INFO) {
890       PetscInt   inodecount,inodelimit,*inodes;
891       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
892       if (inodes) {
893         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
894       } else {
895         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
896       }
897       PetscFunctionReturn(0);
898     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
899       PetscFunctionReturn(0);
900     }
901   } else if (isbinary) {
902     if (size == 1) {
903       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
904       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
905     } else {
906       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
907     }
908     PetscFunctionReturn(0);
909   } else if (isdraw) {
910     PetscDraw  draw;
911     PetscTruth isnull;
912     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
913     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
914   }
915 
916   if (size == 1) {
917     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919   } else {
920     /* assemble the entire matrix onto first processor. */
921     Mat         A;
922     Mat_SeqAIJ  *Aloc;
923     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
924     PetscScalar *a;
925 
926     if (!rank) {
927       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
928     } else {
929       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
930     }
931     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
932     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
933     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
934     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
935 
936     /* copy over the A part */
937     Aloc = (Mat_SeqAIJ*)aij->A->data;
938     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
939     row = aij->rstart;
940     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
941     for (i=0; i<m; i++) {
942       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
943       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
944     }
945     aj = Aloc->j;
946     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
947 
948     /* copy over the B part */
949     Aloc = (Mat_SeqAIJ*)aij->B->data;
950     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
951     row  = aij->rstart;
952     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
953     ct   = cols;
954     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
955     for (i=0; i<m; i++) {
956       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
957       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
958     }
959     ierr = PetscFree(ct);CHKERRQ(ierr);
960     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
961     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
962     /*
963        Everyone has to call to draw the matrix since the graphics waits are
964        synchronized across all processors that share the PetscDraw object
965     */
966     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
967     if (!rank) {
968       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
969       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
970     }
971     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
972     ierr = MatDestroy(A);CHKERRQ(ierr);
973   }
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatView_MPIAIJ"
979 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
980 {
981   PetscErrorCode ierr;
982   PetscTruth     iascii,isdraw,issocket,isbinary;
983 
984   PetscFunctionBegin;
985   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
986   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
987   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
988   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
989   if (iascii || isdraw || isbinary || issocket) {
990     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
991   } else {
992     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
993   }
994   PetscFunctionReturn(0);
995 }
996 
997 
998 
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatRelax_MPIAIJ"
1001 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1002 {
1003   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1004   PetscErrorCode ierr;
1005   Vec            bb1;
1006   PetscScalar    mone=-1.0;
1007 
1008   PetscFunctionBegin;
1009   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1010 
1011   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1012 
1013   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1014     if (flag & SOR_ZERO_INITIAL_GUESS) {
1015       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1016       its--;
1017     }
1018 
1019     while (its--) {
1020       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1021       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1022 
1023       /* update rhs: bb1 = bb - B*x */
1024       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1025       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1026 
1027       /* local sweep */
1028       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1029       CHKERRQ(ierr);
1030     }
1031   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1032     if (flag & SOR_ZERO_INITIAL_GUESS) {
1033       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1034       its--;
1035     }
1036     while (its--) {
1037       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1038       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1039 
1040       /* update rhs: bb1 = bb - B*x */
1041       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1042       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1043 
1044       /* local sweep */
1045       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1046       CHKERRQ(ierr);
1047     }
1048   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1049     if (flag & SOR_ZERO_INITIAL_GUESS) {
1050       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1051       its--;
1052     }
1053     while (its--) {
1054       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1055       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1056 
1057       /* update rhs: bb1 = bb - B*x */
1058       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
1059       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1060 
1061       /* local sweep */
1062       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1063       CHKERRQ(ierr);
1064     }
1065   } else {
1066     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1067   }
1068 
1069   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1075 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1076 {
1077   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1078   Mat            A = mat->A,B = mat->B;
1079   PetscErrorCode ierr;
1080   PetscReal      isend[5],irecv[5];
1081 
1082   PetscFunctionBegin;
1083   info->block_size     = 1.0;
1084   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1085   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1086   isend[3] = info->memory;  isend[4] = info->mallocs;
1087   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1088   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1089   isend[3] += info->memory;  isend[4] += info->mallocs;
1090   if (flag == MAT_LOCAL) {
1091     info->nz_used      = isend[0];
1092     info->nz_allocated = isend[1];
1093     info->nz_unneeded  = isend[2];
1094     info->memory       = isend[3];
1095     info->mallocs      = isend[4];
1096   } else if (flag == MAT_GLOBAL_MAX) {
1097     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1098     info->nz_used      = irecv[0];
1099     info->nz_allocated = irecv[1];
1100     info->nz_unneeded  = irecv[2];
1101     info->memory       = irecv[3];
1102     info->mallocs      = irecv[4];
1103   } else if (flag == MAT_GLOBAL_SUM) {
1104     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1105     info->nz_used      = irecv[0];
1106     info->nz_allocated = irecv[1];
1107     info->nz_unneeded  = irecv[2];
1108     info->memory       = irecv[3];
1109     info->mallocs      = irecv[4];
1110   }
1111   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1112   info->fill_ratio_needed = 0;
1113   info->factor_mallocs    = 0;
1114   info->rows_global       = (double)matin->M;
1115   info->columns_global    = (double)matin->N;
1116   info->rows_local        = (double)matin->m;
1117   info->columns_local     = (double)matin->N;
1118 
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatSetOption_MPIAIJ"
1124 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1125 {
1126   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1127   PetscErrorCode ierr;
1128 
1129   PetscFunctionBegin;
1130   switch (op) {
1131   case MAT_NO_NEW_NONZERO_LOCATIONS:
1132   case MAT_YES_NEW_NONZERO_LOCATIONS:
1133   case MAT_COLUMNS_UNSORTED:
1134   case MAT_COLUMNS_SORTED:
1135   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1136   case MAT_KEEP_ZEROED_ROWS:
1137   case MAT_NEW_NONZERO_LOCATION_ERR:
1138   case MAT_USE_INODES:
1139   case MAT_DO_NOT_USE_INODES:
1140   case MAT_IGNORE_ZERO_ENTRIES:
1141     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1142     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1143     break;
1144   case MAT_ROW_ORIENTED:
1145     a->roworiented = PETSC_TRUE;
1146     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1147     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1148     break;
1149   case MAT_ROWS_SORTED:
1150   case MAT_ROWS_UNSORTED:
1151   case MAT_YES_NEW_DIAGONALS:
1152     ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr);
1153     break;
1154   case MAT_COLUMN_ORIENTED:
1155     a->roworiented = PETSC_FALSE;
1156     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1157     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1158     break;
1159   case MAT_IGNORE_OFF_PROC_ENTRIES:
1160     a->donotstash = PETSC_TRUE;
1161     break;
1162   case MAT_NO_NEW_DIAGONALS:
1163     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1164   case MAT_SYMMETRIC:
1165   case MAT_STRUCTURALLY_SYMMETRIC:
1166   case MAT_HERMITIAN:
1167   case MAT_SYMMETRY_ETERNAL:
1168     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1169     break;
1170   case MAT_NOT_SYMMETRIC:
1171   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1172   case MAT_NOT_HERMITIAN:
1173   case MAT_NOT_SYMMETRY_ETERNAL:
1174     break;
1175   default:
1176     SETERRQ(PETSC_ERR_SUP,"unknown option");
1177   }
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatGetRow_MPIAIJ"
1183 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1184 {
1185   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1186   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1187   PetscErrorCode ierr;
1188   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1189   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1190   PetscInt       *cmap,*idx_p;
1191 
1192   PetscFunctionBegin;
1193   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1194   mat->getrowactive = PETSC_TRUE;
1195 
1196   if (!mat->rowvalues && (idx || v)) {
1197     /*
1198         allocate enough space to hold information from the longest row.
1199     */
1200     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1201     PetscInt     max = 1,tmp;
1202     for (i=0; i<matin->m; i++) {
1203       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1204       if (max < tmp) { max = tmp; }
1205     }
1206     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1207     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1208   }
1209 
1210   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1211   lrow = row - rstart;
1212 
1213   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1214   if (!v)   {pvA = 0; pvB = 0;}
1215   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1216   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1217   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1218   nztot = nzA + nzB;
1219 
1220   cmap  = mat->garray;
1221   if (v  || idx) {
1222     if (nztot) {
1223       /* Sort by increasing column numbers, assuming A and B already sorted */
1224       PetscInt imark = -1;
1225       if (v) {
1226         *v = v_p = mat->rowvalues;
1227         for (i=0; i<nzB; i++) {
1228           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1229           else break;
1230         }
1231         imark = i;
1232         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1233         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1234       }
1235       if (idx) {
1236         *idx = idx_p = mat->rowindices;
1237         if (imark > -1) {
1238           for (i=0; i<imark; i++) {
1239             idx_p[i] = cmap[cworkB[i]];
1240           }
1241         } else {
1242           for (i=0; i<nzB; i++) {
1243             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1244             else break;
1245           }
1246           imark = i;
1247         }
1248         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1249         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1250       }
1251     } else {
1252       if (idx) *idx = 0;
1253       if (v)   *v   = 0;
1254     }
1255   }
1256   *nz = nztot;
1257   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1258   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1264 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1265 {
1266   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1267 
1268   PetscFunctionBegin;
1269   if (!aij->getrowactive) {
1270     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1271   }
1272   aij->getrowactive = PETSC_FALSE;
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatNorm_MPIAIJ"
1278 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1279 {
1280   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1281   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1282   PetscErrorCode ierr;
1283   PetscInt       i,j,cstart = aij->cstart;
1284   PetscReal      sum = 0.0;
1285   PetscScalar    *v;
1286 
1287   PetscFunctionBegin;
1288   if (aij->size == 1) {
1289     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1290   } else {
1291     if (type == NORM_FROBENIUS) {
1292       v = amat->a;
1293       for (i=0; i<amat->nz; i++) {
1294 #if defined(PETSC_USE_COMPLEX)
1295         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1296 #else
1297         sum += (*v)*(*v); v++;
1298 #endif
1299       }
1300       v = bmat->a;
1301       for (i=0; i<bmat->nz; i++) {
1302 #if defined(PETSC_USE_COMPLEX)
1303         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1304 #else
1305         sum += (*v)*(*v); v++;
1306 #endif
1307       }
1308       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1309       *norm = sqrt(*norm);
1310     } else if (type == NORM_1) { /* max column norm */
1311       PetscReal *tmp,*tmp2;
1312       PetscInt    *jj,*garray = aij->garray;
1313       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1314       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1315       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1316       *norm = 0.0;
1317       v = amat->a; jj = amat->j;
1318       for (j=0; j<amat->nz; j++) {
1319         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1320       }
1321       v = bmat->a; jj = bmat->j;
1322       for (j=0; j<bmat->nz; j++) {
1323         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1324       }
1325       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1326       for (j=0; j<mat->N; j++) {
1327         if (tmp2[j] > *norm) *norm = tmp2[j];
1328       }
1329       ierr = PetscFree(tmp);CHKERRQ(ierr);
1330       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1331     } else if (type == NORM_INFINITY) { /* max row norm */
1332       PetscReal ntemp = 0.0;
1333       for (j=0; j<aij->A->m; j++) {
1334         v = amat->a + amat->i[j];
1335         sum = 0.0;
1336         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1337           sum += PetscAbsScalar(*v); v++;
1338         }
1339         v = bmat->a + bmat->i[j];
1340         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1341           sum += PetscAbsScalar(*v); v++;
1342         }
1343         if (sum > ntemp) ntemp = sum;
1344       }
1345       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1346     } else {
1347       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1348     }
1349   }
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatTranspose_MPIAIJ"
1355 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1356 {
1357   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1358   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1359   PetscErrorCode ierr;
1360   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1361   Mat            B;
1362   PetscScalar    *array;
1363 
1364   PetscFunctionBegin;
1365   if (!matout && M != N) {
1366     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1367   }
1368 
1369   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1370   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1371   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1372 
1373   /* copy over the A part */
1374   Aloc = (Mat_SeqAIJ*)a->A->data;
1375   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1376   row = a->rstart;
1377   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1378   for (i=0; i<m; i++) {
1379     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1380     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1381   }
1382   aj = Aloc->j;
1383   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1384 
1385   /* copy over the B part */
1386   Aloc = (Mat_SeqAIJ*)a->B->data;
1387   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1388   row  = a->rstart;
1389   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1390   ct   = cols;
1391   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1392   for (i=0; i<m; i++) {
1393     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1394     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1395   }
1396   ierr = PetscFree(ct);CHKERRQ(ierr);
1397   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1398   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1399   if (matout) {
1400     *matout = B;
1401   } else {
1402     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1409 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1410 {
1411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1412   Mat            a = aij->A,b = aij->B;
1413   PetscErrorCode ierr;
1414   PetscInt       s1,s2,s3;
1415 
1416   PetscFunctionBegin;
1417   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1418   if (rr) {
1419     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1420     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1421     /* Overlap communication with computation. */
1422     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1423   }
1424   if (ll) {
1425     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1426     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1427     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1428   }
1429   /* scale  the diagonal block */
1430   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1431 
1432   if (rr) {
1433     /* Do a scatter end and then right scale the off-diagonal block */
1434     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1435     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1436   }
1437 
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1444 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1445 {
1446   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1447   PetscErrorCode ierr;
1448 
1449   PetscFunctionBegin;
1450   if (!a->rank) {
1451     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1452   }
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1458 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1459 {
1460   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1461   PetscErrorCode ierr;
1462 
1463   PetscFunctionBegin;
1464   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1465   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1466   PetscFunctionReturn(0);
1467 }
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1470 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1471 {
1472   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1473   PetscErrorCode ierr;
1474 
1475   PetscFunctionBegin;
1476   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatEqual_MPIAIJ"
1482 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1483 {
1484   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1485   Mat            a,b,c,d;
1486   PetscTruth     flg;
1487   PetscErrorCode ierr;
1488 
1489   PetscFunctionBegin;
1490   a = matA->A; b = matA->B;
1491   c = matB->A; d = matB->B;
1492 
1493   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1494   if (flg) {
1495     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1496   }
1497   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatCopy_MPIAIJ"
1503 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1504 {
1505   PetscErrorCode ierr;
1506   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1507   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1508 
1509   PetscFunctionBegin;
1510   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1511   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1512     /* because of the column compression in the off-processor part of the matrix a->B,
1513        the number of columns in a->B and b->B may be different, hence we cannot call
1514        the MatCopy() directly on the two parts. If need be, we can provide a more
1515        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1516        then copying the submatrices */
1517     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1518   } else {
1519     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1520     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1521   }
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1527 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1528 {
1529   PetscErrorCode ierr;
1530 
1531   PetscFunctionBegin;
1532   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #include "petscblaslapack.h"
1537 #undef __FUNCT__
1538 #define __FUNCT__ "MatAXPY_MPIAIJ"
1539 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1540 {
1541   PetscErrorCode ierr;
1542   PetscInt       i;
1543   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1544   PetscBLASInt   bnz,one=1;
1545   Mat_SeqAIJ     *x,*y;
1546 
1547   PetscFunctionBegin;
1548   if (str == SAME_NONZERO_PATTERN) {
1549     x = (Mat_SeqAIJ *)xx->A->data;
1550     y = (Mat_SeqAIJ *)yy->A->data;
1551     bnz = (PetscBLASInt)x->nz;
1552     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1553     x = (Mat_SeqAIJ *)xx->B->data;
1554     y = (Mat_SeqAIJ *)yy->B->data;
1555     bnz = (PetscBLASInt)x->nz;
1556     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1557   } else if (str == SUBSET_NONZERO_PATTERN) {
1558     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1559 
1560     x = (Mat_SeqAIJ *)xx->B->data;
1561     y = (Mat_SeqAIJ *)yy->B->data;
1562     if (y->xtoy && y->XtoY != xx->B) {
1563       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1564       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1565     }
1566     if (!y->xtoy) { /* get xtoy */
1567       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1568       y->XtoY = xx->B;
1569     }
1570     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1571   } else {
1572     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1573   }
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatConjugate_MPIAIJ"
1581 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1582 {
1583 #if defined(PETSC_USE_COMPLEX)
1584   PetscErrorCode ierr;
1585   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1586 
1587   PetscFunctionBegin;
1588   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1589   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1590 #else
1591   PetscFunctionBegin;
1592 #endif
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 /* -------------------------------------------------------------------*/
1597 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1598        MatGetRow_MPIAIJ,
1599        MatRestoreRow_MPIAIJ,
1600        MatMult_MPIAIJ,
1601 /* 4*/ MatMultAdd_MPIAIJ,
1602        MatMultTranspose_MPIAIJ,
1603        MatMultTransposeAdd_MPIAIJ,
1604        0,
1605        0,
1606        0,
1607 /*10*/ 0,
1608        0,
1609        0,
1610        MatRelax_MPIAIJ,
1611        MatTranspose_MPIAIJ,
1612 /*15*/ MatGetInfo_MPIAIJ,
1613        MatEqual_MPIAIJ,
1614        MatGetDiagonal_MPIAIJ,
1615        MatDiagonalScale_MPIAIJ,
1616        MatNorm_MPIAIJ,
1617 /*20*/ MatAssemblyBegin_MPIAIJ,
1618        MatAssemblyEnd_MPIAIJ,
1619        0,
1620        MatSetOption_MPIAIJ,
1621        MatZeroEntries_MPIAIJ,
1622 /*25*/ MatZeroRows_MPIAIJ,
1623        0,
1624        0,
1625        0,
1626        0,
1627 /*30*/ MatSetUpPreallocation_MPIAIJ,
1628        0,
1629        0,
1630        0,
1631        0,
1632 /*35*/ MatDuplicate_MPIAIJ,
1633        0,
1634        0,
1635        0,
1636        0,
1637 /*40*/ MatAXPY_MPIAIJ,
1638        MatGetSubMatrices_MPIAIJ,
1639        MatIncreaseOverlap_MPIAIJ,
1640        MatGetValues_MPIAIJ,
1641        MatCopy_MPIAIJ,
1642 /*45*/ MatPrintHelp_MPIAIJ,
1643        MatScale_MPIAIJ,
1644        0,
1645        0,
1646        0,
1647 /*50*/ MatSetBlockSize_MPIAIJ,
1648        0,
1649        0,
1650        0,
1651        0,
1652 /*55*/ MatFDColoringCreate_MPIAIJ,
1653        0,
1654        MatSetUnfactored_MPIAIJ,
1655        0,
1656        0,
1657 /*60*/ MatGetSubMatrix_MPIAIJ,
1658        MatDestroy_MPIAIJ,
1659        MatView_MPIAIJ,
1660        MatGetPetscMaps_Petsc,
1661        0,
1662 /*65*/ 0,
1663        0,
1664        0,
1665        0,
1666        0,
1667 /*70*/ 0,
1668        0,
1669        MatSetColoring_MPIAIJ,
1670 #if defined(PETSC_HAVE_ADIC)
1671        MatSetValuesAdic_MPIAIJ,
1672 #else
1673        0,
1674 #endif
1675        MatSetValuesAdifor_MPIAIJ,
1676 /*75*/ 0,
1677        0,
1678        0,
1679        0,
1680        0,
1681 /*80*/ 0,
1682        0,
1683        0,
1684        0,
1685 /*84*/ MatLoad_MPIAIJ,
1686        0,
1687        0,
1688        0,
1689        0,
1690        0,
1691 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1692        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1693        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1694        MatPtAP_Basic,
1695        MatPtAPSymbolic_MPIAIJ,
1696 /*95*/ MatPtAPNumeric_MPIAIJ,
1697        0,
1698        0,
1699        0,
1700        0,
1701 /*100*/0,
1702        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1703        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1704        MatConjugate_MPIAIJ
1705 };
1706 
1707 /* ----------------------------------------------------------------------------------------*/
1708 
1709 EXTERN_C_BEGIN
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1712 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1713 {
1714   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1719   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1720   PetscFunctionReturn(0);
1721 }
1722 EXTERN_C_END
1723 
1724 EXTERN_C_BEGIN
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1727 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1728 {
1729   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1730   PetscErrorCode ierr;
1731 
1732   PetscFunctionBegin;
1733   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1734   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1735   PetscFunctionReturn(0);
1736 }
1737 EXTERN_C_END
1738 
1739 #include "petscpc.h"
1740 EXTERN_C_BEGIN
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1743 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1744 {
1745   Mat_MPIAIJ     *b;
1746   PetscErrorCode ierr;
1747   PetscInt       i;
1748 
1749   PetscFunctionBegin;
1750   B->preallocated = PETSC_TRUE;
1751   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1752   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1753   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1754   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1755   if (d_nnz) {
1756     for (i=0; i<B->m; i++) {
1757       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1758     }
1759   }
1760   if (o_nnz) {
1761     for (i=0; i<B->m; i++) {
1762       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1763     }
1764   }
1765   b = (Mat_MPIAIJ*)B->data;
1766   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1767   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1768 
1769   PetscFunctionReturn(0);
1770 }
1771 EXTERN_C_END
1772 
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1775 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1776 {
1777   Mat            mat;
1778   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1779   PetscErrorCode ierr;
1780 
1781   PetscFunctionBegin;
1782   *newmat       = 0;
1783   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1784   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1785   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1786   a    = (Mat_MPIAIJ*)mat->data;
1787 
1788   mat->factor       = matin->factor;
1789   mat->bs           = matin->bs;
1790   mat->assembled    = PETSC_TRUE;
1791   mat->insertmode   = NOT_SET_VALUES;
1792   mat->preallocated = PETSC_TRUE;
1793 
1794   a->rstart       = oldmat->rstart;
1795   a->rend         = oldmat->rend;
1796   a->cstart       = oldmat->cstart;
1797   a->cend         = oldmat->cend;
1798   a->size         = oldmat->size;
1799   a->rank         = oldmat->rank;
1800   a->donotstash   = oldmat->donotstash;
1801   a->roworiented  = oldmat->roworiented;
1802   a->rowindices   = 0;
1803   a->rowvalues    = 0;
1804   a->getrowactive = PETSC_FALSE;
1805 
1806   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1807   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1808   if (oldmat->colmap) {
1809 #if defined (PETSC_USE_CTABLE)
1810     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1811 #else
1812     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1813     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1814     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1815 #endif
1816   } else a->colmap = 0;
1817   if (oldmat->garray) {
1818     PetscInt len;
1819     len  = oldmat->B->n;
1820     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1821     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1822     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1823   } else a->garray = 0;
1824 
1825   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1826   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1827   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1828   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1829   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1830   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1831   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1832   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1833   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1834   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1835   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1836   *newmat = mat;
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #include "petscsys.h"
1841 
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatLoad_MPIAIJ"
1844 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
1845 {
1846   Mat            A;
1847   PetscScalar    *vals,*svals;
1848   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1849   MPI_Status     status;
1850   PetscErrorCode ierr;
1851   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1852   PetscInt       i,nz,j,rstart,rend,mmax;
1853   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1854   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1855   PetscInt       cend,cstart,n,*rowners;
1856   int            fd;
1857 
1858   PetscFunctionBegin;
1859   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1860   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1861   if (!rank) {
1862     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1863     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1864     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1865   }
1866 
1867   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1868   M = header[1]; N = header[2];
1869   /* determine ownership of all rows */
1870   m    = M/size + ((M % size) > rank);
1871   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1872   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1873 
1874   /* First process needs enough room for process with most rows */
1875   if (!rank) {
1876     mmax       = rowners[1];
1877     for (i=2; i<size; i++) {
1878       mmax = PetscMax(mmax,rowners[i]);
1879     }
1880   } else mmax = m;
1881 
1882   rowners[0] = 0;
1883   for (i=2; i<=size; i++) {
1884     mmax       = PetscMax(mmax,rowners[i]);
1885     rowners[i] += rowners[i-1];
1886   }
1887   rstart = rowners[rank];
1888   rend   = rowners[rank+1];
1889 
1890   /* distribute row lengths to all processors */
1891   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1892   if (!rank) {
1893     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1894     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1895     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1896     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1897     for (j=0; j<m; j++) {
1898       procsnz[0] += ourlens[j];
1899     }
1900     for (i=1; i<size; i++) {
1901       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1902       /* calculate the number of nonzeros on each processor */
1903       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1904         procsnz[i] += rowlengths[j];
1905       }
1906       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1907     }
1908     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1909   } else {
1910     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1911   }
1912 
1913   if (!rank) {
1914     /* determine max buffer needed and allocate it */
1915     maxnz = 0;
1916     for (i=0; i<size; i++) {
1917       maxnz = PetscMax(maxnz,procsnz[i]);
1918     }
1919     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1920 
1921     /* read in my part of the matrix column indices  */
1922     nz   = procsnz[0];
1923     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1924     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1925 
1926     /* read in every one elses and ship off */
1927     for (i=1; i<size; i++) {
1928       nz   = procsnz[i];
1929       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1930       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1931     }
1932     ierr = PetscFree(cols);CHKERRQ(ierr);
1933   } else {
1934     /* determine buffer space needed for message */
1935     nz = 0;
1936     for (i=0; i<m; i++) {
1937       nz += ourlens[i];
1938     }
1939     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1940 
1941     /* receive message of column indices*/
1942     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1943     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1944     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1945   }
1946 
1947   /* determine column ownership if matrix is not square */
1948   if (N != M) {
1949     n      = N/size + ((N % size) > rank);
1950     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1951     cstart = cend - n;
1952   } else {
1953     cstart = rstart;
1954     cend   = rend;
1955     n      = cend - cstart;
1956   }
1957 
1958   /* loop over local rows, determining number of off diagonal entries */
1959   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1960   jj = 0;
1961   for (i=0; i<m; i++) {
1962     for (j=0; j<ourlens[i]; j++) {
1963       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1964       jj++;
1965     }
1966   }
1967 
1968   /* create our matrix */
1969   for (i=0; i<m; i++) {
1970     ourlens[i] -= offlens[i];
1971   }
1972   ierr = MatCreate(comm,m,n,M,N,&A);CHKERRQ(ierr);
1973   ierr = MatSetType(A,type);CHKERRQ(ierr);
1974   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
1975 
1976   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1977   for (i=0; i<m; i++) {
1978     ourlens[i] += offlens[i];
1979   }
1980 
1981   if (!rank) {
1982     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1983 
1984     /* read in my part of the matrix numerical values  */
1985     nz   = procsnz[0];
1986     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1987 
1988     /* insert into matrix */
1989     jj      = rstart;
1990     smycols = mycols;
1991     svals   = vals;
1992     for (i=0; i<m; i++) {
1993       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1994       smycols += ourlens[i];
1995       svals   += ourlens[i];
1996       jj++;
1997     }
1998 
1999     /* read in other processors and ship out */
2000     for (i=1; i<size; i++) {
2001       nz   = procsnz[i];
2002       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2003       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2004     }
2005     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2006   } else {
2007     /* receive numeric values */
2008     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2009 
2010     /* receive message of values*/
2011     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2012     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2013     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2014 
2015     /* insert into matrix */
2016     jj      = rstart;
2017     smycols = mycols;
2018     svals   = vals;
2019     for (i=0; i<m; i++) {
2020       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2021       smycols += ourlens[i];
2022       svals   += ourlens[i];
2023       jj++;
2024     }
2025   }
2026   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2027   ierr = PetscFree(vals);CHKERRQ(ierr);
2028   ierr = PetscFree(mycols);CHKERRQ(ierr);
2029   ierr = PetscFree(rowners);CHKERRQ(ierr);
2030 
2031   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2032   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2033   *newmat = A;
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2039 /*
2040     Not great since it makes two copies of the submatrix, first an SeqAIJ
2041   in local and then by concatenating the local matrices the end result.
2042   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2043 */
2044 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2045 {
2046   PetscErrorCode ierr;
2047   PetscMPIInt    rank,size;
2048   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2049   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2050   Mat            *local,M,Mreuse;
2051   PetscScalar    *vwork,*aa;
2052   MPI_Comm       comm = mat->comm;
2053   Mat_SeqAIJ     *aij;
2054 
2055 
2056   PetscFunctionBegin;
2057   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2058   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2059 
2060   if (call ==  MAT_REUSE_MATRIX) {
2061     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2062     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2063     local = &Mreuse;
2064     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2065   } else {
2066     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2067     Mreuse = *local;
2068     ierr   = PetscFree(local);CHKERRQ(ierr);
2069   }
2070 
2071   /*
2072       m - number of local rows
2073       n - number of columns (same on all processors)
2074       rstart - first row in new global matrix generated
2075   */
2076   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2077   if (call == MAT_INITIAL_MATRIX) {
2078     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2079     ii  = aij->i;
2080     jj  = aij->j;
2081 
2082     /*
2083         Determine the number of non-zeros in the diagonal and off-diagonal
2084         portions of the matrix in order to do correct preallocation
2085     */
2086 
2087     /* first get start and end of "diagonal" columns */
2088     if (csize == PETSC_DECIDE) {
2089       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2090       if (mglobal == n) { /* square matrix */
2091 	nlocal = m;
2092       } else {
2093         nlocal = n/size + ((n % size) > rank);
2094       }
2095     } else {
2096       nlocal = csize;
2097     }
2098     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2099     rstart = rend - nlocal;
2100     if (rank == size - 1 && rend != n) {
2101       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2102     }
2103 
2104     /* next, compute all the lengths */
2105     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2106     olens = dlens + m;
2107     for (i=0; i<m; i++) {
2108       jend = ii[i+1] - ii[i];
2109       olen = 0;
2110       dlen = 0;
2111       for (j=0; j<jend; j++) {
2112         if (*jj < rstart || *jj >= rend) olen++;
2113         else dlen++;
2114         jj++;
2115       }
2116       olens[i] = olen;
2117       dlens[i] = dlen;
2118     }
2119     ierr = MatCreate(comm,m,nlocal,PETSC_DECIDE,n,&M);CHKERRQ(ierr);
2120     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2121     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2122     ierr = PetscFree(dlens);CHKERRQ(ierr);
2123   } else {
2124     PetscInt ml,nl;
2125 
2126     M = *newmat;
2127     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2128     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2129     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2130     /*
2131          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2132        rather than the slower MatSetValues().
2133     */
2134     M->was_assembled = PETSC_TRUE;
2135     M->assembled     = PETSC_FALSE;
2136   }
2137   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2138   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2139   ii  = aij->i;
2140   jj  = aij->j;
2141   aa  = aij->a;
2142   for (i=0; i<m; i++) {
2143     row   = rstart + i;
2144     nz    = ii[i+1] - ii[i];
2145     cwork = jj;     jj += nz;
2146     vwork = aa;     aa += nz;
2147     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2148   }
2149 
2150   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2151   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2152   *newmat = M;
2153 
2154   /* save submatrix used in processor for next request */
2155   if (call ==  MAT_INITIAL_MATRIX) {
2156     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2157     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2158   }
2159 
2160   PetscFunctionReturn(0);
2161 }
2162 
2163 EXTERN_C_BEGIN
2164 #undef __FUNCT__
2165 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2166 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2167 {
2168   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2169   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2170   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2171   const PetscInt *JJ;
2172   PetscScalar    *values;
2173   PetscErrorCode ierr;
2174 
2175   PetscFunctionBegin;
2176 #if defined(PETSC_OPT_g)
2177   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2178 #endif
2179   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2180   o_nnz = d_nnz + m;
2181 
2182   for (i=0; i<m; i++) {
2183     nnz     = I[i+1]- I[i];
2184     JJ      = J + I[i];
2185     nnz_max = PetscMax(nnz_max,nnz);
2186 #if defined(PETSC_OPT_g)
2187     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2188 #endif
2189     for (j=0; j<nnz; j++) {
2190       if (*JJ >= cstart) break;
2191       JJ++;
2192     }
2193     d = 0;
2194     for (; j<nnz; j++) {
2195       if (*JJ++ >= cend) break;
2196       d++;
2197     }
2198     d_nnz[i] = d;
2199     o_nnz[i] = nnz - d;
2200   }
2201   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2202   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2203 
2204   if (v) values = (PetscScalar*)v;
2205   else {
2206     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2207     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2208   }
2209 
2210   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2211   for (i=0; i<m; i++) {
2212     ii   = i + rstart;
2213     nnz  = I[i+1]- I[i];
2214     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2215   }
2216   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2217   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2218   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2219 
2220   if (!v) {
2221     ierr = PetscFree(values);CHKERRQ(ierr);
2222   }
2223   PetscFunctionReturn(0);
2224 }
2225 EXTERN_C_END
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2229 /*@C
2230    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2231    (the default parallel PETSc format).
2232 
2233    Collective on MPI_Comm
2234 
2235    Input Parameters:
2236 +  A - the matrix
2237 .  i - the indices into j for the start of each local row (starts with zero)
2238 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2239 -  v - optional values in the matrix
2240 
2241    Level: developer
2242 
2243 .keywords: matrix, aij, compressed row, sparse, parallel
2244 
2245 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2246 @*/
2247 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2248 {
2249   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2250 
2251   PetscFunctionBegin;
2252   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2253   if (f) {
2254     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2255   }
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2261 /*@C
2262    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2263    (the default parallel PETSc format).  For good matrix assembly performance
2264    the user should preallocate the matrix storage by setting the parameters
2265    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2266    performance can be increased by more than a factor of 50.
2267 
2268    Collective on MPI_Comm
2269 
2270    Input Parameters:
2271 +  A - the matrix
2272 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2273            (same value is used for all local rows)
2274 .  d_nnz - array containing the number of nonzeros in the various rows of the
2275            DIAGONAL portion of the local submatrix (possibly different for each row)
2276            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2277            The size of this array is equal to the number of local rows, i.e 'm'.
2278            You must leave room for the diagonal entry even if it is zero.
2279 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2280            submatrix (same value is used for all local rows).
2281 -  o_nnz - array containing the number of nonzeros in the various rows of the
2282            OFF-DIAGONAL portion of the local submatrix (possibly different for
2283            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2284            structure. The size of this array is equal to the number
2285            of local rows, i.e 'm'.
2286 
2287    If the *_nnz parameter is given then the *_nz parameter is ignored
2288 
2289    The AIJ format (also called the Yale sparse matrix format or
2290    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2291    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2292 
2293    The parallel matrix is partitioned such that the first m0 rows belong to
2294    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2295    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2296 
2297    The DIAGONAL portion of the local submatrix of a processor can be defined
2298    as the submatrix which is obtained by extraction the part corresponding
2299    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2300    first row that belongs to the processor, and r2 is the last row belonging
2301    to the this processor. This is a square mxm matrix. The remaining portion
2302    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2303 
2304    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2305 
2306    Example usage:
2307 
2308    Consider the following 8x8 matrix with 34 non-zero values, that is
2309    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2310    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2311    as follows:
2312 
2313 .vb
2314             1  2  0  |  0  3  0  |  0  4
2315     Proc0   0  5  6  |  7  0  0  |  8  0
2316             9  0 10  | 11  0  0  | 12  0
2317     -------------------------------------
2318            13  0 14  | 15 16 17  |  0  0
2319     Proc1   0 18  0  | 19 20 21  |  0  0
2320             0  0  0  | 22 23  0  | 24  0
2321     -------------------------------------
2322     Proc2  25 26 27  |  0  0 28  | 29  0
2323            30  0  0  | 31 32 33  |  0 34
2324 .ve
2325 
2326    This can be represented as a collection of submatrices as:
2327 
2328 .vb
2329       A B C
2330       D E F
2331       G H I
2332 .ve
2333 
2334    Where the submatrices A,B,C are owned by proc0, D,E,F are
2335    owned by proc1, G,H,I are owned by proc2.
2336 
2337    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2338    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2339    The 'M','N' parameters are 8,8, and have the same values on all procs.
2340 
2341    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2342    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2343    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2344    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2345    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2346    matrix, ans [DF] as another SeqAIJ matrix.
2347 
2348    When d_nz, o_nz parameters are specified, d_nz storage elements are
2349    allocated for every row of the local diagonal submatrix, and o_nz
2350    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2351    One way to choose d_nz and o_nz is to use the max nonzerors per local
2352    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2353    In this case, the values of d_nz,o_nz are:
2354 .vb
2355      proc0 : dnz = 2, o_nz = 2
2356      proc1 : dnz = 3, o_nz = 2
2357      proc2 : dnz = 1, o_nz = 4
2358 .ve
2359    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2360    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2361    for proc3. i.e we are using 12+15+10=37 storage locations to store
2362    34 values.
2363 
2364    When d_nnz, o_nnz parameters are specified, the storage is specified
2365    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2366    In the above case the values for d_nnz,o_nnz are:
2367 .vb
2368      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2369      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2370      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2371 .ve
2372    Here the space allocated is sum of all the above values i.e 34, and
2373    hence pre-allocation is perfect.
2374 
2375    Level: intermediate
2376 
2377 .keywords: matrix, aij, compressed row, sparse, parallel
2378 
2379 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2380           MPIAIJ
2381 @*/
2382 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2383 {
2384   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2385 
2386   PetscFunctionBegin;
2387   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2388   if (f) {
2389     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2390   }
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatCreateMPIAIJ"
2396 /*@C
2397    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2398    (the default parallel PETSc format).  For good matrix assembly performance
2399    the user should preallocate the matrix storage by setting the parameters
2400    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2401    performance can be increased by more than a factor of 50.
2402 
2403    Collective on MPI_Comm
2404 
2405    Input Parameters:
2406 +  comm - MPI communicator
2407 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2408            This value should be the same as the local size used in creating the
2409            y vector for the matrix-vector product y = Ax.
2410 .  n - This value should be the same as the local size used in creating the
2411        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2412        calculated if N is given) For square matrices n is almost always m.
2413 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2414 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2415 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2416            (same value is used for all local rows)
2417 .  d_nnz - array containing the number of nonzeros in the various rows of the
2418            DIAGONAL portion of the local submatrix (possibly different for each row)
2419            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2420            The size of this array is equal to the number of local rows, i.e 'm'.
2421            You must leave room for the diagonal entry even if it is zero.
2422 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2423            submatrix (same value is used for all local rows).
2424 -  o_nnz - array containing the number of nonzeros in the various rows of the
2425            OFF-DIAGONAL portion of the local submatrix (possibly different for
2426            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2427            structure. The size of this array is equal to the number
2428            of local rows, i.e 'm'.
2429 
2430    Output Parameter:
2431 .  A - the matrix
2432 
2433    Notes:
2434    If the *_nnz parameter is given then the *_nz parameter is ignored
2435 
2436    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2437    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2438    storage requirements for this matrix.
2439 
2440    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2441    processor than it must be used on all processors that share the object for
2442    that argument.
2443 
2444    The user MUST specify either the local or global matrix dimensions
2445    (possibly both).
2446 
2447    The parallel matrix is partitioned such that the first m0 rows belong to
2448    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2449    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2450 
2451    The DIAGONAL portion of the local submatrix of a processor can be defined
2452    as the submatrix which is obtained by extraction the part corresponding
2453    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2454    first row that belongs to the processor, and r2 is the last row belonging
2455    to the this processor. This is a square mxm matrix. The remaining portion
2456    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2457 
2458    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2459 
2460    When calling this routine with a single process communicator, a matrix of
2461    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2462    type of communicator, use the construction mechanism:
2463      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2464 
2465    By default, this format uses inodes (identical nodes) when possible.
2466    We search for consecutive rows with the same nonzero structure, thereby
2467    reusing matrix information to achieve increased efficiency.
2468 
2469    Options Database Keys:
2470 +  -mat_no_inode  - Do not use inodes
2471 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2472 -  -mat_aij_oneindex - Internally use indexing starting at 1
2473         rather than 0.  Note that when calling MatSetValues(),
2474         the user still MUST index entries starting at 0!
2475 
2476 
2477    Example usage:
2478 
2479    Consider the following 8x8 matrix with 34 non-zero values, that is
2480    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2481    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2482    as follows:
2483 
2484 .vb
2485             1  2  0  |  0  3  0  |  0  4
2486     Proc0   0  5  6  |  7  0  0  |  8  0
2487             9  0 10  | 11  0  0  | 12  0
2488     -------------------------------------
2489            13  0 14  | 15 16 17  |  0  0
2490     Proc1   0 18  0  | 19 20 21  |  0  0
2491             0  0  0  | 22 23  0  | 24  0
2492     -------------------------------------
2493     Proc2  25 26 27  |  0  0 28  | 29  0
2494            30  0  0  | 31 32 33  |  0 34
2495 .ve
2496 
2497    This can be represented as a collection of submatrices as:
2498 
2499 .vb
2500       A B C
2501       D E F
2502       G H I
2503 .ve
2504 
2505    Where the submatrices A,B,C are owned by proc0, D,E,F are
2506    owned by proc1, G,H,I are owned by proc2.
2507 
2508    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2509    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2510    The 'M','N' parameters are 8,8, and have the same values on all procs.
2511 
2512    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2513    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2514    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2515    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2516    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2517    matrix, ans [DF] as another SeqAIJ matrix.
2518 
2519    When d_nz, o_nz parameters are specified, d_nz storage elements are
2520    allocated for every row of the local diagonal submatrix, and o_nz
2521    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2522    One way to choose d_nz and o_nz is to use the max nonzerors per local
2523    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2524    In this case, the values of d_nz,o_nz are:
2525 .vb
2526      proc0 : dnz = 2, o_nz = 2
2527      proc1 : dnz = 3, o_nz = 2
2528      proc2 : dnz = 1, o_nz = 4
2529 .ve
2530    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2531    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2532    for proc3. i.e we are using 12+15+10=37 storage locations to store
2533    34 values.
2534 
2535    When d_nnz, o_nnz parameters are specified, the storage is specified
2536    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2537    In the above case the values for d_nnz,o_nnz are:
2538 .vb
2539      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2540      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2541      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2542 .ve
2543    Here the space allocated is sum of all the above values i.e 34, and
2544    hence pre-allocation is perfect.
2545 
2546    Level: intermediate
2547 
2548 .keywords: matrix, aij, compressed row, sparse, parallel
2549 
2550 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2551           MPIAIJ
2552 @*/
2553 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2554 {
2555   PetscErrorCode ierr;
2556   PetscMPIInt    size;
2557 
2558   PetscFunctionBegin;
2559   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2560   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2561   if (size > 1) {
2562     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2563     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2564   } else {
2565     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2566     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2567   }
2568   PetscFunctionReturn(0);
2569 }
2570 
2571 #undef __FUNCT__
2572 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2573 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2574 {
2575   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2576 
2577   PetscFunctionBegin;
2578   *Ad     = a->A;
2579   *Ao     = a->B;
2580   *colmap = a->garray;
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2586 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2587 {
2588   PetscErrorCode ierr;
2589   PetscInt       i;
2590   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2591 
2592   PetscFunctionBegin;
2593   if (coloring->ctype == IS_COLORING_LOCAL) {
2594     ISColoringValue *allcolors,*colors;
2595     ISColoring      ocoloring;
2596 
2597     /* set coloring for diagonal portion */
2598     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2599 
2600     /* set coloring for off-diagonal portion */
2601     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2602     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2603     for (i=0; i<a->B->n; i++) {
2604       colors[i] = allcolors[a->garray[i]];
2605     }
2606     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2607     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2608     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2609     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2610   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2611     ISColoringValue *colors;
2612     PetscInt             *larray;
2613     ISColoring      ocoloring;
2614 
2615     /* set coloring for diagonal portion */
2616     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2617     for (i=0; i<a->A->n; i++) {
2618       larray[i] = i + a->cstart;
2619     }
2620     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2621     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2622     for (i=0; i<a->A->n; i++) {
2623       colors[i] = coloring->colors[larray[i]];
2624     }
2625     ierr = PetscFree(larray);CHKERRQ(ierr);
2626     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2627     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2628     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2629 
2630     /* set coloring for off-diagonal portion */
2631     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2632     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2633     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2634     for (i=0; i<a->B->n; i++) {
2635       colors[i] = coloring->colors[larray[i]];
2636     }
2637     ierr = PetscFree(larray);CHKERRQ(ierr);
2638     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2639     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2640     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2641   } else {
2642     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2643   }
2644 
2645   PetscFunctionReturn(0);
2646 }
2647 
2648 #if defined(PETSC_HAVE_ADIC)
2649 #undef __FUNCT__
2650 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2651 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2652 {
2653   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2654   PetscErrorCode ierr;
2655 
2656   PetscFunctionBegin;
2657   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2658   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2659   PetscFunctionReturn(0);
2660 }
2661 #endif
2662 
2663 #undef __FUNCT__
2664 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2665 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2666 {
2667   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2668   PetscErrorCode ierr;
2669 
2670   PetscFunctionBegin;
2671   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2672   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2673   PetscFunctionReturn(0);
2674 }
2675 
2676 #undef __FUNCT__
2677 #define __FUNCT__ "MatMerge"
2678 /*@C
2679       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2680                  matrices from each processor
2681 
2682     Collective on MPI_Comm
2683 
2684    Input Parameters:
2685 +    comm - the communicators the parallel matrix will live on
2686 .    inmat - the input sequential matrices
2687 .    n - number of local columns (or PETSC_DECIDE)
2688 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2689 
2690    Output Parameter:
2691 .    outmat - the parallel matrix generated
2692 
2693     Level: advanced
2694 
2695    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2696 
2697 @*/
2698 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2699 {
2700   PetscErrorCode ierr;
2701   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2702   PetscInt       *indx;
2703   PetscScalar    *values;
2704   PetscMap       columnmap,rowmap;
2705 
2706   PetscFunctionBegin;
2707     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2708   /*
2709   PetscMPIInt       rank;
2710   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2711   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2712   */
2713   if (scall == MAT_INITIAL_MATRIX){
2714     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2715     if (n == PETSC_DECIDE){
2716       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2717       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2718       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2719       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2720       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2721     }
2722 
2723     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2724     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2725     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2726     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2727     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2728 
2729     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2730     for (i=0;i<m;i++) {
2731       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2732       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2733       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2734     }
2735     /* This routine will ONLY return MPIAIJ type matrix */
2736     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,outmat);CHKERRQ(ierr);
2737     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2738     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2739     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2740 
2741   } else if (scall == MAT_REUSE_MATRIX){
2742     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2743   } else {
2744     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2745   }
2746 
2747   for (i=0;i<m;i++) {
2748     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2749     I    = i + rstart;
2750     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2751     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2752   }
2753   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2754   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2755   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2756 
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 #undef __FUNCT__
2761 #define __FUNCT__ "MatFileSplit"
2762 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2763 {
2764   PetscErrorCode    ierr;
2765   PetscMPIInt       rank;
2766   PetscInt          m,N,i,rstart,nnz;
2767   size_t            len;
2768   const PetscInt    *indx;
2769   PetscViewer       out;
2770   char              *name;
2771   Mat               B;
2772   const PetscScalar *values;
2773 
2774   PetscFunctionBegin;
2775   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2776   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2777   /* Should this be the type of the diagonal block of A? */
2778   ierr = MatCreate(PETSC_COMM_SELF,m,N,m,N,&B);CHKERRQ(ierr);
2779   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2780   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2781   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2782   for (i=0;i<m;i++) {
2783     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2784     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2785     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2786   }
2787   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2788   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2789 
2790   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2791   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2792   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2793   sprintf(name,"%s.%d",outfile,rank);
2794   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2795   ierr = PetscFree(name);
2796   ierr = MatView(B,out);CHKERRQ(ierr);
2797   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2798   ierr = MatDestroy(B);CHKERRQ(ierr);
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2805 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2806 {
2807   PetscErrorCode       ierr;
2808   Mat_Merge_SeqsToMPI  *merge;
2809   PetscObjectContainer container;
2810 
2811   PetscFunctionBegin;
2812   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2813   if (container) {
2814     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2815     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2816     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2817     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2818     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2819     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2820     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2821     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2822     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2823     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2824     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2825     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2826 
2827     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2828     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2829   }
2830   ierr = PetscFree(merge);CHKERRQ(ierr);
2831 
2832   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 #include "src/mat/utils/freespace.h"
2837 #include "petscbt.h"
2838 static PetscEvent logkey_seqstompinum = 0;
2839 #undef __FUNCT__
2840 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2841 /*@C
2842       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2843                  matrices from each processor
2844 
2845     Collective on MPI_Comm
2846 
2847    Input Parameters:
2848 +    comm - the communicators the parallel matrix will live on
2849 .    seqmat - the input sequential matrices
2850 .    m - number of local rows (or PETSC_DECIDE)
2851 .    n - number of local columns (or PETSC_DECIDE)
2852 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2853 
2854    Output Parameter:
2855 .    mpimat - the parallel matrix generated
2856 
2857     Level: advanced
2858 
2859    Notes:
2860      The dimensions of the sequential matrix in each processor MUST be the same.
2861      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2862      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2863 @*/
2864 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2865 {
2866   PetscErrorCode       ierr;
2867   MPI_Comm             comm=mpimat->comm;
2868   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2869   PetscMPIInt          size,rank,taga,*len_s;
2870   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2871   PetscInt             proc,m;
2872   PetscInt             **buf_ri,**buf_rj;
2873   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2874   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2875   MPI_Request          *s_waits,*r_waits;
2876   MPI_Status           *status;
2877   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2878   Mat_Merge_SeqsToMPI  *merge;
2879   PetscObjectContainer container;
2880 
2881   PetscFunctionBegin;
2882   if (!logkey_seqstompinum) {
2883     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2884   }
2885   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2886 
2887   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2888   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2889 
2890   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2891   if (container) {
2892     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2893   }
2894   bi     = merge->bi;
2895   bj     = merge->bj;
2896   buf_ri = merge->buf_ri;
2897   buf_rj = merge->buf_rj;
2898 
2899   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2900   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2901   len_s  = merge->len_s;
2902 
2903   /* send and recv matrix values */
2904   /*-----------------------------*/
2905   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2906   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2907 
2908   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2909   for (proc=0,k=0; proc<size; proc++){
2910     if (!len_s[proc]) continue;
2911     i = owners[proc];
2912     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2913     k++;
2914   }
2915 
2916   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
2917   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
2918   ierr = PetscFree(status);CHKERRQ(ierr);
2919 
2920   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2921   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2922 
2923   /* insert mat values of mpimat */
2924   /*----------------------------*/
2925   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2926   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2927   nextrow = buf_ri_k + merge->nrecv;
2928   nextai  = nextrow + merge->nrecv;
2929 
2930   for (k=0; k<merge->nrecv; k++){
2931     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2932     nrows = *(buf_ri_k[k]);
2933     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2934     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2935   }
2936 
2937   /* set values of ba */
2938   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2939   for (i=0; i<m; i++) {
2940     arow = owners[rank] + i;
2941     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2942     bnzi = bi[i+1] - bi[i];
2943     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2944 
2945     /* add local non-zero vals of this proc's seqmat into ba */
2946     anzi = ai[arow+1] - ai[arow];
2947     aj   = a->j + ai[arow];
2948     aa   = a->a + ai[arow];
2949     nextaj = 0;
2950     for (j=0; nextaj<anzi; j++){
2951       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2952         ba_i[j] += aa[nextaj++];
2953       }
2954     }
2955 
2956     /* add received vals into ba */
2957     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2958       /* i-th row */
2959       if (i == *nextrow[k]) {
2960         anzi = *(nextai[k]+1) - *nextai[k];
2961         aj   = buf_rj[k] + *(nextai[k]);
2962         aa   = abuf_r[k] + *(nextai[k]);
2963         nextaj = 0;
2964         for (j=0; nextaj<anzi; j++){
2965           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2966             ba_i[j] += aa[nextaj++];
2967           }
2968         }
2969         nextrow[k]++; nextai[k]++;
2970       }
2971     }
2972     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2973   }
2974   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2976 
2977   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
2978   ierr = PetscFree(ba_i);CHKERRQ(ierr);
2979   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
2980   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 static PetscEvent logkey_seqstompisym = 0;
2985 #undef __FUNCT__
2986 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
2987 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
2988 {
2989   PetscErrorCode       ierr;
2990   Mat                  B_mpi;
2991   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2992   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
2993   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
2994   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
2995   PetscInt             len,proc,*dnz,*onz;
2996   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
2997   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
2998   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
2999   MPI_Status           *status;
3000   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3001   PetscBT              lnkbt;
3002   Mat_Merge_SeqsToMPI  *merge;
3003   PetscObjectContainer container;
3004 
3005   PetscFunctionBegin;
3006   if (!logkey_seqstompisym) {
3007     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3008   }
3009   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3010 
3011   /* make sure it is a PETSc comm */
3012   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3013   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3014   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3015 
3016   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3017   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3018 
3019   /* determine row ownership */
3020   /*---------------------------------------------------------*/
3021   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3022   if (m == PETSC_DECIDE) {
3023     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3024   } else {
3025     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3026   }
3027   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3028   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3029   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3030 
3031   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3032   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3033 
3034   /* determine the number of messages to send, their lengths */
3035   /*---------------------------------------------------------*/
3036   len_s  = merge->len_s;
3037 
3038   len = 0;  /* length of buf_si[] */
3039   merge->nsend = 0;
3040   for (proc=0; proc<size; proc++){
3041     len_si[proc] = 0;
3042     if (proc == rank){
3043       len_s[proc] = 0;
3044     } else {
3045       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3046       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3047     }
3048     if (len_s[proc]) {
3049       merge->nsend++;
3050       nrows = 0;
3051       for (i=owners[proc]; i<owners[proc+1]; i++){
3052         if (ai[i+1] > ai[i]) nrows++;
3053       }
3054       len_si[proc] = 2*(nrows+1);
3055       len += len_si[proc];
3056     }
3057   }
3058 
3059   /* determine the number and length of messages to receive for ij-structure */
3060   /*-------------------------------------------------------------------------*/
3061   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3062   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3063 
3064   /* post the Irecv of j-structure */
3065   /*-------------------------------*/
3066   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3067   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3068 
3069   /* post the Isend of j-structure */
3070   /*--------------------------------*/
3071   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3072   sj_waits = si_waits + merge->nsend;
3073 
3074   for (proc=0, k=0; proc<size; proc++){
3075     if (!len_s[proc]) continue;
3076     i = owners[proc];
3077     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3078     k++;
3079   }
3080 
3081   /* receives and sends of j-structure are complete */
3082   /*------------------------------------------------*/
3083   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3084   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3085 
3086   /* send and recv i-structure */
3087   /*---------------------------*/
3088   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3089   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3090 
3091   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3092   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3093   for (proc=0,k=0; proc<size; proc++){
3094     if (!len_s[proc]) continue;
3095     /* form outgoing message for i-structure:
3096          buf_si[0]:                 nrows to be sent
3097                [1:nrows]:           row index (global)
3098                [nrows+1:2*nrows+1]: i-structure index
3099     */
3100     /*-------------------------------------------*/
3101     nrows = len_si[proc]/2 - 1;
3102     buf_si_i    = buf_si + nrows+1;
3103     buf_si[0]   = nrows;
3104     buf_si_i[0] = 0;
3105     nrows = 0;
3106     for (i=owners[proc]; i<owners[proc+1]; i++){
3107       anzi = ai[i+1] - ai[i];
3108       if (anzi) {
3109         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3110         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3111         nrows++;
3112       }
3113     }
3114     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3115     k++;
3116     buf_si += len_si[proc];
3117   }
3118 
3119   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3120   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3121 
3122   ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr);
3123   for (i=0; i<merge->nrecv; i++){
3124     ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr);
3125   }
3126 
3127   ierr = PetscFree(len_si);CHKERRQ(ierr);
3128   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3129   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3130   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3131   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3132   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3133   ierr = PetscFree(status);CHKERRQ(ierr);
3134 
3135   /* compute a local seq matrix in each processor */
3136   /*----------------------------------------------*/
3137   /* allocate bi array and free space for accumulating nonzero column info */
3138   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3139   bi[0] = 0;
3140 
3141   /* create and initialize a linked list */
3142   nlnk = N+1;
3143   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3144 
3145   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3146   len = 0;
3147   len  = ai[owners[rank+1]] - ai[owners[rank]];
3148   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3149   current_space = free_space;
3150 
3151   /* determine symbolic info for each local row */
3152   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3153   nextrow = buf_ri_k + merge->nrecv;
3154   nextai  = nextrow + merge->nrecv;
3155   for (k=0; k<merge->nrecv; k++){
3156     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3157     nrows = *buf_ri_k[k];
3158     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3159     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3160   }
3161 
3162   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3163   len = 0;
3164   for (i=0;i<m;i++) {
3165     bnzi   = 0;
3166     /* add local non-zero cols of this proc's seqmat into lnk */
3167     arow   = owners[rank] + i;
3168     anzi   = ai[arow+1] - ai[arow];
3169     aj     = a->j + ai[arow];
3170     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3171     bnzi += nlnk;
3172     /* add received col data into lnk */
3173     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3174       if (i == *nextrow[k]) { /* i-th row */
3175         anzi = *(nextai[k]+1) - *nextai[k];
3176         aj   = buf_rj[k] + *nextai[k];
3177         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3178         bnzi += nlnk;
3179         nextrow[k]++; nextai[k]++;
3180       }
3181     }
3182     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3183 
3184     /* if free space is not available, make more free space */
3185     if (current_space->local_remaining<bnzi) {
3186       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3187       nspacedouble++;
3188     }
3189     /* copy data into free space, then initialize lnk */
3190     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3191     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3192 
3193     current_space->array           += bnzi;
3194     current_space->local_used      += bnzi;
3195     current_space->local_remaining -= bnzi;
3196 
3197     bi[i+1] = bi[i] + bnzi;
3198   }
3199 
3200   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3201 
3202   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3203   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3204   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3205 
3206   /* create symbolic parallel matrix B_mpi */
3207   /*---------------------------------------*/
3208   if (n==PETSC_DECIDE) {
3209     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,N,&B_mpi);CHKERRQ(ierr);
3210   } else {
3211     ierr = MatCreate(comm,m,n,PETSC_DETERMINE,PETSC_DETERMINE,&B_mpi);CHKERRQ(ierr);
3212   }
3213   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3214   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3215   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3216 
3217   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3218   B_mpi->assembled     = PETSC_FALSE;
3219   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3220   merge->bi            = bi;
3221   merge->bj            = bj;
3222   merge->buf_ri        = buf_ri;
3223   merge->buf_rj        = buf_rj;
3224   merge->coi           = PETSC_NULL;
3225   merge->coj           = PETSC_NULL;
3226   merge->owners_co     = PETSC_NULL;
3227 
3228   /* attach the supporting struct to B_mpi for reuse */
3229   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3230   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3231   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3232   *mpimat = B_mpi;
3233 
3234   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3235   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3236   PetscFunctionReturn(0);
3237 }
3238 
3239 static PetscEvent logkey_seqstompi = 0;
3240 #undef __FUNCT__
3241 #define __FUNCT__ "MatMerge_SeqsToMPI"
3242 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3243 {
3244   PetscErrorCode   ierr;
3245 
3246   PetscFunctionBegin;
3247   if (!logkey_seqstompi) {
3248     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3249   }
3250   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3251   if (scall == MAT_INITIAL_MATRIX){
3252     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3253   }
3254   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3255   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3256   PetscFunctionReturn(0);
3257 }
3258 static PetscEvent logkey_getlocalmat = 0;
3259 #undef __FUNCT__
3260 #define __FUNCT__ "MatGetLocalMat"
3261 /*@C
3262      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3263 
3264     Not Collective
3265 
3266    Input Parameters:
3267 +    A - the matrix
3268 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3269 
3270    Output Parameter:
3271 .    A_loc - the local sequential matrix generated
3272 
3273     Level: developer
3274 
3275 @*/
3276 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3277 {
3278   PetscErrorCode  ierr;
3279   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3280   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3281   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3282   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3283   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3284   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3285 
3286   PetscFunctionBegin;
3287   if (!logkey_getlocalmat) {
3288     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3289   }
3290   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3291   if (scall == MAT_INITIAL_MATRIX){
3292     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3293     ci[0] = 0;
3294     for (i=0; i<am; i++){
3295       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3296     }
3297     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3298     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3299     k = 0;
3300     for (i=0; i<am; i++) {
3301       ncols_o = bi[i+1] - bi[i];
3302       ncols_d = ai[i+1] - ai[i];
3303       /* off-diagonal portion of A */
3304       for (jo=0; jo<ncols_o; jo++) {
3305         col = cmap[*bj];
3306         if (col >= cstart) break;
3307         cj[k]   = col; bj++;
3308         ca[k++] = *ba++;
3309       }
3310       /* diagonal portion of A */
3311       for (j=0; j<ncols_d; j++) {
3312         cj[k]   = cstart + *aj++;
3313         ca[k++] = *aa++;
3314       }
3315       /* off-diagonal portion of A */
3316       for (j=jo; j<ncols_o; j++) {
3317         cj[k]   = cmap[*bj++];
3318         ca[k++] = *ba++;
3319       }
3320     }
3321     /* put together the new matrix */
3322     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3323     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3324     /* Since these are PETSc arrays, change flags to free them as necessary. */
3325     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3326     mat->freedata = PETSC_TRUE;
3327     mat->nonew    = 0;
3328   } else if (scall == MAT_REUSE_MATRIX){
3329     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3330     ci = mat->i; cj = mat->j; ca = mat->a;
3331     for (i=0; i<am; i++) {
3332       /* off-diagonal portion of A */
3333       ncols_o = bi[i+1] - bi[i];
3334       for (jo=0; jo<ncols_o; jo++) {
3335         col = cmap[*bj];
3336         if (col >= cstart) break;
3337         *ca++ = *ba++; bj++;
3338       }
3339       /* diagonal portion of A */
3340       ncols_d = ai[i+1] - ai[i];
3341       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3342       /* off-diagonal portion of A */
3343       for (j=jo; j<ncols_o; j++) {
3344         *ca++ = *ba++; bj++;
3345       }
3346     }
3347   } else {
3348     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3349   }
3350 
3351   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3352   PetscFunctionReturn(0);
3353 }
3354 
3355 static PetscEvent logkey_getlocalmatcondensed = 0;
3356 #undef __FUNCT__
3357 #define __FUNCT__ "MatGetLocalMatCondensed"
3358 /*@C
3359      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3360 
3361     Not Collective
3362 
3363    Input Parameters:
3364 +    A - the matrix
3365 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3366 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3367 
3368    Output Parameter:
3369 .    A_loc - the local sequential matrix generated
3370 
3371     Level: developer
3372 
3373 @*/
3374 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3375 {
3376   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3377   PetscErrorCode    ierr;
3378   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3379   IS                isrowa,iscola;
3380   Mat               *aloc;
3381 
3382   PetscFunctionBegin;
3383   if (!logkey_getlocalmatcondensed) {
3384     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3385   }
3386   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3387   if (!row){
3388     start = a->rstart; end = a->rend;
3389     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3390   } else {
3391     isrowa = *row;
3392   }
3393   if (!col){
3394     start = a->cstart;
3395     cmap  = a->garray;
3396     nzA   = a->A->n;
3397     nzB   = a->B->n;
3398     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3399     ncols = 0;
3400     for (i=0; i<nzB; i++) {
3401       if (cmap[i] < start) idx[ncols++] = cmap[i];
3402       else break;
3403     }
3404     imark = i;
3405     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3406     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3407     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3408     ierr = PetscFree(idx);CHKERRQ(ierr);
3409   } else {
3410     iscola = *col;
3411   }
3412   if (scall != MAT_INITIAL_MATRIX){
3413     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3414     aloc[0] = *A_loc;
3415   }
3416   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3417   *A_loc = aloc[0];
3418   ierr = PetscFree(aloc);CHKERRQ(ierr);
3419   if (!row){
3420     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3421   }
3422   if (!col){
3423     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3424   }
3425   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3426   PetscFunctionReturn(0);
3427 }
3428 
3429 static PetscEvent logkey_GetBrowsOfAcols = 0;
3430 #undef __FUNCT__
3431 #define __FUNCT__ "MatGetBrowsOfAcols"
3432 /*@C
3433     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3434 
3435     Collective on Mat
3436 
3437    Input Parameters:
3438 +    A,B - the matrices in mpiaij format
3439 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3440 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3441 
3442    Output Parameter:
3443 +    rowb, colb - index sets of rows and columns of B to extract
3444 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3445 -    B_seq - the sequential matrix generated
3446 
3447     Level: developer
3448 
3449 @*/
3450 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3451 {
3452   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3453   PetscErrorCode    ierr;
3454   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3455   IS                isrowb,iscolb;
3456   Mat               *bseq;
3457 
3458   PetscFunctionBegin;
3459   if (a->cstart != b->rstart || a->cend != b->rend){
3460     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3461   }
3462   if (!logkey_GetBrowsOfAcols) {
3463     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3464   }
3465   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3466 
3467   if (scall == MAT_INITIAL_MATRIX){
3468     start = a->cstart;
3469     cmap  = a->garray;
3470     nzA   = a->A->n;
3471     nzB   = a->B->n;
3472     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3473     ncols = 0;
3474     for (i=0; i<nzB; i++) {  /* row < local row index */
3475       if (cmap[i] < start) idx[ncols++] = cmap[i];
3476       else break;
3477     }
3478     imark = i;
3479     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3480     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3481     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3482     ierr = PetscFree(idx);CHKERRQ(ierr);
3483     *brstart = imark;
3484     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3485   } else {
3486     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3487     isrowb = *rowb; iscolb = *colb;
3488     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3489     bseq[0] = *B_seq;
3490   }
3491   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3492   *B_seq = bseq[0];
3493   ierr = PetscFree(bseq);CHKERRQ(ierr);
3494   if (!rowb){
3495     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3496   } else {
3497     *rowb = isrowb;
3498   }
3499   if (!colb){
3500     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3501   } else {
3502     *colb = iscolb;
3503   }
3504   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3505   PetscFunctionReturn(0);
3506 }
3507 
3508 static PetscEvent logkey_GetBrowsOfAocols = 0;
3509 #undef __FUNCT__
3510 #define __FUNCT__ "MatGetBrowsOfAoCols"
3511 /*@C
3512     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3513     of the OFF-DIAGONAL portion of local A
3514 
3515     Collective on Mat
3516 
3517    Input Parameters:
3518 +    A,B - the matrices in mpiaij format
3519 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3520 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3521 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3522 
3523    Output Parameter:
3524 +    B_oth - the sequential matrix generated
3525 
3526     Level: developer
3527 
3528 @*/
3529 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3530 {
3531   VecScatter_MPI_General *gen_to,*gen_from;
3532   PetscErrorCode         ierr;
3533   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3534   Mat_SeqAIJ             *b_oth;
3535   VecScatter             ctx=a->Mvctx;
3536   MPI_Comm               comm=ctx->comm;
3537   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3538   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3539   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3540   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3541   MPI_Request            *rwaits,*swaits;
3542   MPI_Status             *sstatus,rstatus;
3543   PetscInt               *cols;
3544   PetscScalar            *vals;
3545   PetscMPIInt            j;
3546 
3547   PetscFunctionBegin;
3548   if (a->cstart != b->rstart || a->cend != b->rend){
3549     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3550   }
3551   if (!logkey_GetBrowsOfAocols) {
3552     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3553   }
3554   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3555   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3556 
3557   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3558   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3559   rvalues  = gen_from->values; /* holds the length of sending row */
3560   svalues  = gen_to->values;   /* holds the length of receiving row */
3561   nrecvs   = gen_from->n;
3562   nsends   = gen_to->n;
3563   rwaits   = gen_from->requests;
3564   swaits   = gen_to->requests;
3565   srow     = gen_to->indices;   /* local row index to be sent */
3566   rstarts  = gen_from->starts;
3567   sstarts  = gen_to->starts;
3568   rprocs   = gen_from->procs;
3569   sprocs   = gen_to->procs;
3570   sstatus  = gen_to->sstatus;
3571 
3572   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3573   if (scall == MAT_INITIAL_MATRIX){
3574     /* i-array */
3575     /*---------*/
3576     /*  post receives */
3577     for (i=0; i<nrecvs; i++){
3578       rowlen = (PetscInt*)rvalues + rstarts[i];
3579       nrows = rstarts[i+1]-rstarts[i];
3580       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3581     }
3582 
3583     /* pack the outgoing message */
3584     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3585     rstartsj = sstartsj + nsends +1;
3586     sstartsj[0] = 0;  rstartsj[0] = 0;
3587     len = 0; /* total length of j or a array to be sent */
3588     k = 0;
3589     for (i=0; i<nsends; i++){
3590       rowlen = (PetscInt*)svalues + sstarts[i];
3591       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3592       for (j=0; j<nrows; j++) {
3593         row = srow[k] + b->rowners[rank]; /* global row idx */
3594         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3595         len += rowlen[j];
3596         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3597         k++;
3598       }
3599       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3600        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3601     }
3602     /* recvs and sends of i-array are completed */
3603     i = nrecvs;
3604     while (i--) {
3605       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3606     }
3607     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3608     /* allocate buffers for sending j and a arrays */
3609     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3610     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3611 
3612     /* create i-array of B_oth */
3613     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3614     b_othi[0] = 0;
3615     len = 0; /* total length of j or a array to be received */
3616     k = 0;
3617     for (i=0; i<nrecvs; i++){
3618       rowlen = (PetscInt*)rvalues + rstarts[i];
3619       nrows = rstarts[i+1]-rstarts[i];
3620       for (j=0; j<nrows; j++) {
3621         b_othi[k+1] = b_othi[k] + rowlen[j];
3622         len += rowlen[j]; k++;
3623       }
3624       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3625     }
3626 
3627     /* allocate space for j and a arrrays of B_oth */
3628     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3629     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3630 
3631     /* j-array */
3632     /*---------*/
3633     /*  post receives of j-array */
3634     for (i=0; i<nrecvs; i++){
3635       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3636       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3637     }
3638     k = 0;
3639     for (i=0; i<nsends; i++){
3640       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3641       bufJ = bufj+sstartsj[i];
3642       for (j=0; j<nrows; j++) {
3643         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3644         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3645         for (l=0; l<ncols; l++){
3646           *bufJ++ = cols[l];
3647         }
3648         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3649       }
3650       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3651     }
3652 
3653     /* recvs and sends of j-array are completed */
3654     i = nrecvs;
3655     while (i--) {
3656       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3657     }
3658     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3659   } else if (scall == MAT_REUSE_MATRIX){
3660     sstartsj = *startsj;
3661     rstartsj = sstartsj + nsends +1;
3662     bufa     = *bufa_ptr;
3663     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3664     b_otha   = b_oth->a;
3665   } else {
3666     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3667   }
3668 
3669   /* a-array */
3670   /*---------*/
3671   /*  post receives of a-array */
3672   for (i=0; i<nrecvs; i++){
3673     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3674     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3675   }
3676   k = 0;
3677   for (i=0; i<nsends; i++){
3678     nrows = sstarts[i+1]-sstarts[i];
3679     bufA = bufa+sstartsj[i];
3680     for (j=0; j<nrows; j++) {
3681       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3682       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3683       for (l=0; l<ncols; l++){
3684         *bufA++ = vals[l];
3685       }
3686       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3687 
3688     }
3689     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3690   }
3691   /* recvs and sends of a-array are completed */
3692   i = nrecvs;
3693   while (i--) {
3694     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3695   }
3696    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3697 
3698   if (scall == MAT_INITIAL_MATRIX){
3699     /* put together the new matrix */
3700     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3701 
3702     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3703     /* Since these are PETSc arrays, change flags to free them as necessary. */
3704     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3705     b_oth->freedata = PETSC_TRUE;
3706     b_oth->nonew    = 0;
3707 
3708     ierr = PetscFree(bufj);CHKERRQ(ierr);
3709     if (!startsj || !bufa_ptr){
3710       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3711       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3712     } else {
3713       *startsj  = sstartsj;
3714       *bufa_ptr = bufa;
3715     }
3716   }
3717   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3718 
3719   PetscFunctionReturn(0);
3720 }
3721 
3722 /*MC
3723    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3724 
3725    Options Database Keys:
3726 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3727 
3728   Level: beginner
3729 
3730 .seealso: MatCreateMPIAIJ
3731 M*/
3732 
3733 EXTERN_C_BEGIN
3734 #undef __FUNCT__
3735 #define __FUNCT__ "MatCreate_MPIAIJ"
3736 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3737 {
3738   Mat_MPIAIJ     *b;
3739   PetscErrorCode ierr;
3740   PetscInt       i;
3741   PetscMPIInt    size;
3742 
3743   PetscFunctionBegin;
3744   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3745 
3746   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3747   B->data         = (void*)b;
3748   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3749   B->factor       = 0;
3750   B->bs           = 1;
3751   B->assembled    = PETSC_FALSE;
3752   B->mapping      = 0;
3753 
3754   B->insertmode      = NOT_SET_VALUES;
3755   b->size            = size;
3756   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3757 
3758   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3759   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3760 
3761   /* the information in the maps duplicates the information computed below, eventually
3762      we should remove the duplicate information that is not contained in the maps */
3763   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3764   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3765 
3766   /* build local table of row and column ownerships */
3767   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3768   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3769   b->cowners = b->rowners + b->size + 2;
3770   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3771   b->rowners[0] = 0;
3772   for (i=2; i<=b->size; i++) {
3773     b->rowners[i] += b->rowners[i-1];
3774   }
3775   b->rstart = b->rowners[b->rank];
3776   b->rend   = b->rowners[b->rank+1];
3777   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3778   b->cowners[0] = 0;
3779   for (i=2; i<=b->size; i++) {
3780     b->cowners[i] += b->cowners[i-1];
3781   }
3782   b->cstart = b->cowners[b->rank];
3783   b->cend   = b->cowners[b->rank+1];
3784 
3785   /* build cache for off array entries formed */
3786   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3787   b->donotstash  = PETSC_FALSE;
3788   b->colmap      = 0;
3789   b->garray      = 0;
3790   b->roworiented = PETSC_TRUE;
3791 
3792   /* stuff used for matrix vector multiply */
3793   b->lvec      = PETSC_NULL;
3794   b->Mvctx     = PETSC_NULL;
3795 
3796   /* stuff for MatGetRow() */
3797   b->rowindices   = 0;
3798   b->rowvalues    = 0;
3799   b->getrowactive = PETSC_FALSE;
3800 
3801   /* Explicitly create 2 MATSEQAIJ matrices. */
3802   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
3803   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3804   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3805   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
3806   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3807   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3808 
3809   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3810                                      "MatStoreValues_MPIAIJ",
3811                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3812   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3813                                      "MatRetrieveValues_MPIAIJ",
3814                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3815   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3816 				     "MatGetDiagonalBlock_MPIAIJ",
3817                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3819 				     "MatIsTranspose_MPIAIJ",
3820 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3822 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3823 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3825 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3826 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3828 				     "MatDiagonalScaleLocal_MPIAIJ",
3829 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3830   PetscFunctionReturn(0);
3831 }
3832 EXTERN_C_END
3833