xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision ae4341f39f6a059b178d7ae3e9237d4c396ca0fc)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (lastcol1 > col) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (lastcol2 > col) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; \
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIAIJ"
107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
110   PetscScalar    value;
111   PetscErrorCode ierr;
112   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
113   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
114   PetscTruth     roworiented = aij->roworiented;
115 
116   /* Some Variables required in the macro */
117   Mat            A = aij->A;
118   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
119   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
120   PetscScalar    *aa = a->a;
121   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
122   Mat            B = aij->B;
123   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
124   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
125   PetscScalar    *ba = b->a;
126 
127   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
128   PetscInt       nonew = a->nonew;
129   PetscScalar    *ap1,*ap2;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (im[i] < 0) continue;
134 #if defined(PETSC_USE_DEBUG)
135     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
136 #endif
137     if (im[i] >= rstart && im[i] < rend) {
138       row      = im[i] - rstart;
139       lastcol1 = -1;
140       rp1      = aj + ai[row];
141       ap1      = aa + ai[row];
142       rmax1    = aimax[row];
143       nrow1    = ailen[row];
144       low1     = 0;
145       high1    = nrow1;
146       lastcol2 = -1;
147       rp2      = bj + bi[row];
148       ap2      = ba + bi[row];
149       rmax2    = bimax[row];
150       nrow2    = bilen[row];
151       low2     = 0;
152       high2    = nrow2;
153 
154       for (j=0; j<n; j++) {
155         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
156         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
157         if (in[j] >= cstart && in[j] < cend){
158           col = in[j] - cstart;
159           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
160         } else if (in[j] < 0) continue;
161 #if defined(PETSC_USE_DEBUG)
162         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
163 #endif
164         else {
165           if (mat->was_assembled) {
166             if (!aij->colmap) {
167               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
168             }
169 #if defined (PETSC_USE_CTABLE)
170             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
171 	    col--;
172 #else
173             col = aij->colmap[in[j]] - 1;
174 #endif
175             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
176               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
177               col =  in[j];
178               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
179               B = aij->B;
180               b = (Mat_SeqAIJ*)B->data;
181               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
182               rp2      = bj + bi[row];
183               ap2      = ba + bi[row];
184               rmax2    = bimax[row];
185               nrow2    = bilen[row];
186               low2     = 0;
187               high2    = nrow2;
188               bm       = aij->B->m;
189               ba = b->a;
190             }
191           } else col = in[j];
192           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
193         }
194       }
195     } else {
196       if (!aij->donotstash) {
197         if (roworiented) {
198           if (ignorezeroentries && v[i*n] == 0.0) continue;
199           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
200         } else {
201           if (ignorezeroentries && v[i] == 0.0) continue;
202           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
203         }
204       }
205     }
206   }
207   PetscFunctionReturn(0);
208 }
209 
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatGetValues_MPIAIJ"
213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
214 {
215   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
216   PetscErrorCode ierr;
217   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
218   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
219 
220   PetscFunctionBegin;
221   for (i=0; i<m; i++) {
222     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
223     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
224     if (idxm[i] >= rstart && idxm[i] < rend) {
225       row = idxm[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
228         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
229         if (idxn[j] >= cstart && idxn[j] < cend){
230           col = idxn[j] - cstart;
231           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
232         } else {
233           if (!aij->colmap) {
234             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
235           }
236 #if defined (PETSC_USE_CTABLE)
237           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
238           col --;
239 #else
240           col = aij->colmap[idxn[j]] - 1;
241 #endif
242           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
243           else {
244             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
245           }
246         }
247       }
248     } else {
249       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
258 {
259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
260   PetscErrorCode ierr;
261   PetscInt       nstash,reallocs;
262   InsertMode     addv;
263 
264   PetscFunctionBegin;
265   if (aij->donotstash) {
266     PetscFunctionReturn(0);
267   }
268 
269   /* make sure all processors are either in INSERTMODE or ADDMODE */
270   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
271   if (addv == (ADD_VALUES|INSERT_VALUES)) {
272     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
273   }
274   mat->insertmode = addv; /* in case this processor had no cache */
275 
276   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
277   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
278   ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
288   PetscErrorCode ierr;
289   PetscMPIInt    n;
290   PetscInt       i,j,rstart,ncols,flg;
291   PetscInt       *row,*col,other_disassembled;
292   PetscScalar    *val;
293   InsertMode     addv = mat->insertmode;
294 
295   PetscFunctionBegin;
296   if (!aij->donotstash) {
297     while (1) {
298       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
299       if (!flg) break;
300 
301       for (i=0; i<n;) {
302         /* Now identify the consecutive vals belonging to the same row */
303         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
304         if (j < n) ncols = j-i;
305         else       ncols = n-i;
306         /* Now assemble all these values with a single function call */
307         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
308         i = j;
309       }
310     }
311     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
312   }
313   a->compressedrow.use     = PETSC_FALSE;
314   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
316 
317   /* determine if any processor has disassembled, if so we must
318      also disassemble ourselfs, in order that we may reassemble. */
319   /*
320      if nonzero structure of submatrix B cannot change then we know that
321      no processor disassembled thus we can skip this stuff
322   */
323   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
324     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
325     if (mat->was_assembled && !other_disassembled) {
326       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
327     }
328   }
329   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
330   b    = (Mat_SeqAIJ *)aij->B->data;
331 
332   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
333     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
334   }
335   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
336   b->compressedrow.use = PETSC_TRUE;
337   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
338   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
339 
340   if (aij->rowvalues) {
341     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
342     aij->rowvalues = 0;
343   }
344 
345   /* used by MatAXPY() */
346   a->xtoy = 0; b->xtoy = 0;
347   a->XtoY = 0; b->XtoY = 0;
348 
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
355 {
356   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
361   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNCT__
366 #define __FUNCT__ "MatZeroRows_MPIAIJ"
367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
368 {
369   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
370   PetscErrorCode ierr;
371   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
372   PetscInt       i,*owners = l->rowners;
373   PetscInt       *nprocs,j,idx,nsends,row;
374   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
375   PetscInt       *rvalues,count,base,slen,*source;
376   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
377   MPI_Comm       comm = A->comm;
378   MPI_Request    *send_waits,*recv_waits;
379   MPI_Status     recv_status,*send_status;
380 #if defined(PETSC_DEBUG)
381   PetscTruth     found = PETSC_FALSE;
382 #endif
383 
384   PetscFunctionBegin;
385   /*  first count number of contributors to each processor */
386   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
387   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
388   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
389   j = 0;
390   for (i=0; i<N; i++) {
391     if (lastidx > (idx = rows[i])) j = 0;
392     lastidx = idx;
393     for (; j<size; j++) {
394       if (idx >= owners[j] && idx < owners[j+1]) {
395         nprocs[2*j]++;
396         nprocs[2*j+1] = 1;
397         owner[i] = j;
398 #if defined(PETSC_DEBUG)
399         found = PETSC_TRUE;
400 #endif
401         break;
402       }
403     }
404 #if defined(PETSC_DEBUG)
405     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
406     found = PETSC_FALSE;
407 #endif
408   }
409   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
410 
411   /* inform other processors of number of messages and max length*/
412   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
413 
414   /* post receives:   */
415   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
416   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
417   for (i=0; i<nrecvs; i++) {
418     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
419   }
420 
421   /* do sends:
422       1) starts[i] gives the starting index in svalues for stuff going to
423          the ith processor
424   */
425   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
426   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
427   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
428   starts[0] = 0;
429   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
430   for (i=0; i<N; i++) {
431     svalues[starts[owner[i]]++] = rows[i];
432   }
433 
434   starts[0] = 0;
435   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
436   count = 0;
437   for (i=0; i<size; i++) {
438     if (nprocs[2*i+1]) {
439       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
440     }
441   }
442   ierr = PetscFree(starts);CHKERRQ(ierr);
443 
444   base = owners[rank];
445 
446   /*  wait on receives */
447   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
448   source = lens + nrecvs;
449   count  = nrecvs; slen = 0;
450   while (count) {
451     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
452     /* unpack receives into our local space */
453     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
454     source[imdex]  = recv_status.MPI_SOURCE;
455     lens[imdex]    = n;
456     slen          += n;
457     count--;
458   }
459   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
460 
461   /* move the data into the send scatter */
462   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
463   count = 0;
464   for (i=0; i<nrecvs; i++) {
465     values = rvalues + i*nmax;
466     for (j=0; j<lens[i]; j++) {
467       lrows[count++] = values[j] - base;
468     }
469   }
470   ierr = PetscFree(rvalues);CHKERRQ(ierr);
471   ierr = PetscFree(lens);CHKERRQ(ierr);
472   ierr = PetscFree(owner);CHKERRQ(ierr);
473   ierr = PetscFree(nprocs);CHKERRQ(ierr);
474 
475   /* actually zap the local rows */
476   /*
477         Zero the required rows. If the "diagonal block" of the matrix
478      is square and the user wishes to set the diagonal we use seperate
479      code so that MatSetValues() is not called for each diagonal allocating
480      new memory, thus calling lots of mallocs and slowing things down.
481 
482        Contributed by: Matthew Knepley
483   */
484   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
485   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
486   if ((diag != 0.0) && (l->A->M == l->A->N)) {
487     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
488   } else if (diag != 0.0) {
489     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
490     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
491       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
492 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
493     }
494     for (i = 0; i < slen; i++) {
495       row  = lrows[i] + rstart;
496       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
497     }
498     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
499     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
500   } else {
501     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
502   }
503   ierr = PetscFree(lrows);CHKERRQ(ierr);
504 
505   /* wait on sends */
506   if (nsends) {
507     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
508     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
509     ierr = PetscFree(send_status);CHKERRQ(ierr);
510   }
511   ierr = PetscFree(send_waits);CHKERRQ(ierr);
512   ierr = PetscFree(svalues);CHKERRQ(ierr);
513 
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNCT__
518 #define __FUNCT__ "MatMult_MPIAIJ"
519 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
520 {
521   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
522   PetscErrorCode ierr;
523   PetscInt       nt;
524 
525   PetscFunctionBegin;
526   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
527   if (nt != A->n) {
528     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
529   }
530   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
531   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
532   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
533   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
534   PetscFunctionReturn(0);
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatMultAdd_MPIAIJ"
539 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
540 {
541   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
542   PetscErrorCode ierr;
543 
544   PetscFunctionBegin;
545   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
546   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
547   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
548   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
554 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
555 {
556   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
557   PetscErrorCode ierr;
558   PetscTruth     merged;
559 
560   PetscFunctionBegin;
561   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
562   /* do nondiagonal part */
563   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
564   if (!merged) {
565     /* send it on its way */
566     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
567     /* do local part */
568     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
569     /* receive remote parts: note this assumes the values are not actually */
570     /* added in yy until the next line, */
571     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
572   } else {
573     /* do local part */
574     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
575     /* send it on its way */
576     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
577     /* values actually were received in the Begin() but we need to call this nop */
578     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
579   }
580   PetscFunctionReturn(0);
581 }
582 
583 EXTERN_C_BEGIN
584 #undef __FUNCT__
585 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
586 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
587 {
588   MPI_Comm       comm;
589   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
590   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
591   IS             Me,Notme;
592   PetscErrorCode ierr;
593   PetscInt       M,N,first,last,*notme,i;
594   PetscMPIInt    size;
595 
596   PetscFunctionBegin;
597 
598   /* Easy test: symmetric diagonal block */
599   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
600   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
601   if (!*f) PetscFunctionReturn(0);
602   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
603   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
604   if (size == 1) PetscFunctionReturn(0);
605 
606   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
607   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
608   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
609   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
610   for (i=0; i<first; i++) notme[i] = i;
611   for (i=last; i<M; i++) notme[i-last+first] = i;
612   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
613   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
614   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
615   Aoff = Aoffs[0];
616   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
617   Boff = Boffs[0];
618   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
619   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
620   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
621   ierr = ISDestroy(Me);CHKERRQ(ierr);
622   ierr = ISDestroy(Notme);CHKERRQ(ierr);
623 
624   PetscFunctionReturn(0);
625 }
626 EXTERN_C_END
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
630 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
631 {
632   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
633   PetscErrorCode ierr;
634 
635   PetscFunctionBegin;
636   /* do nondiagonal part */
637   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
638   /* send it on its way */
639   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
640   /* do local part */
641   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
642   /* receive remote parts */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
653 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   PetscErrorCode ierr;
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatScale_MPIAIJ"
669 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
670 {
671   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
672   PetscErrorCode ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
676   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatDestroy_MPIAIJ"
682 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
685   PetscErrorCode ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705 
706   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
707   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
708   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
709   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
710   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
711   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
712   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatView_MPIAIJ_Binary"
718 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
719 {
720   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
721   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
722   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
723   PetscErrorCode    ierr;
724   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
725   int               fd;
726   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
727   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
728   PetscScalar       *column_values;
729 
730   PetscFunctionBegin;
731   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
732   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
733   nz   = A->nz + B->nz;
734   if (!rank) {
735     header[0] = MAT_FILE_COOKIE;
736     header[1] = mat->M;
737     header[2] = mat->N;
738     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
739     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
740     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
741     /* get largest number of rows any processor has */
742     rlen = mat->m;
743     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
744     for (i=1; i<size; i++) {
745       rlen = PetscMax(rlen,range[i+1] - range[i]);
746     }
747   } else {
748     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
749     rlen = mat->m;
750   }
751 
752   /* load up the local row counts */
753   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
754   for (i=0; i<mat->m; i++) {
755     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
756   }
757 
758   /* store the row lengths to the file */
759   if (!rank) {
760     MPI_Status status;
761     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
762     for (i=1; i<size; i++) {
763       rlen = range[i+1] - range[i];
764       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
765       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
766     }
767   } else {
768     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
769   }
770   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
771 
772   /* load up the local column indices */
773   nzmax = nz; /* )th processor needs space a largest processor needs */
774   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
775   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
776   cnt  = 0;
777   for (i=0; i<mat->m; i++) {
778     for (j=B->i[i]; j<B->i[i+1]; j++) {
779       if ( (col = garray[B->j[j]]) > cstart) break;
780       column_indices[cnt++] = col;
781     }
782     for (k=A->i[i]; k<A->i[i+1]; k++) {
783       column_indices[cnt++] = A->j[k] + cstart;
784     }
785     for (; j<B->i[i+1]; j++) {
786       column_indices[cnt++] = garray[B->j[j]];
787     }
788   }
789   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
790 
791   /* store the column indices to the file */
792   if (!rank) {
793     MPI_Status status;
794     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
795     for (i=1; i<size; i++) {
796       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
797       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
798       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
799       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
800     }
801   } else {
802     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
803     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
804   }
805   ierr = PetscFree(column_indices);CHKERRQ(ierr);
806 
807   /* load up the local column values */
808   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
809   cnt  = 0;
810   for (i=0; i<mat->m; i++) {
811     for (j=B->i[i]; j<B->i[i+1]; j++) {
812       if ( garray[B->j[j]] > cstart) break;
813       column_values[cnt++] = B->a[j];
814     }
815     for (k=A->i[i]; k<A->i[i+1]; k++) {
816       column_values[cnt++] = A->a[k];
817     }
818     for (; j<B->i[i+1]; j++) {
819       column_values[cnt++] = B->a[j];
820     }
821   }
822   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
823 
824   /* store the column values to the file */
825   if (!rank) {
826     MPI_Status status;
827     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
828     for (i=1; i<size; i++) {
829       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
830       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
831       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
832       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
833     }
834   } else {
835     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
836     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
837   }
838   ierr = PetscFree(column_values);CHKERRQ(ierr);
839   PetscFunctionReturn(0);
840 }
841 
842 #undef __FUNCT__
843 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
844 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
845 {
846   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
847   PetscErrorCode    ierr;
848   PetscMPIInt       rank = aij->rank,size = aij->size;
849   PetscTruth        isdraw,iascii,isbinary;
850   PetscViewer       sviewer;
851   PetscViewerFormat format;
852 
853   PetscFunctionBegin;
854   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
855   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
856   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
857   if (iascii) {
858     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
859     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
860       MatInfo    info;
861       PetscTruth inodes;
862 
863       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
864       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
865       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
866       if (!inodes) {
867         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
868 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
869       } else {
870         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
871 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
872       }
873       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
874       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
875       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
876       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
877       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
878       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
879       PetscFunctionReturn(0);
880     } else if (format == PETSC_VIEWER_ASCII_INFO) {
881       PetscInt   inodecount,inodelimit,*inodes;
882       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
883       if (inodes) {
884         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
885       } else {
886         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
887       }
888       PetscFunctionReturn(0);
889     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
890       PetscFunctionReturn(0);
891     }
892   } else if (isbinary) {
893     if (size == 1) {
894       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
895       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
896     } else {
897       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
898     }
899     PetscFunctionReturn(0);
900   } else if (isdraw) {
901     PetscDraw  draw;
902     PetscTruth isnull;
903     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
904     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
905   }
906 
907   if (size == 1) {
908     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
909     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
910   } else {
911     /* assemble the entire matrix onto first processor. */
912     Mat         A;
913     Mat_SeqAIJ  *Aloc;
914     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
915     PetscScalar *a;
916 
917     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
918     if (!rank) {
919       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
920     } else {
921       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
922     }
923     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
924     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
925     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
926     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
927 
928     /* copy over the A part */
929     Aloc = (Mat_SeqAIJ*)aij->A->data;
930     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
931     row = aij->rstart;
932     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
933     for (i=0; i<m; i++) {
934       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
935       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
936     }
937     aj = Aloc->j;
938     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
939 
940     /* copy over the B part */
941     Aloc = (Mat_SeqAIJ*)aij->B->data;
942     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
943     row  = aij->rstart;
944     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
945     ct   = cols;
946     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
947     for (i=0; i<m; i++) {
948       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
949       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
950     }
951     ierr = PetscFree(ct);CHKERRQ(ierr);
952     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
953     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
954     /*
955        Everyone has to call to draw the matrix since the graphics waits are
956        synchronized across all processors that share the PetscDraw object
957     */
958     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
959     if (!rank) {
960       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
961       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
962     }
963     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
964     ierr = MatDestroy(A);CHKERRQ(ierr);
965   }
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatView_MPIAIJ"
971 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
972 {
973   PetscErrorCode ierr;
974   PetscTruth     iascii,isdraw,issocket,isbinary;
975 
976   PetscFunctionBegin;
977   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
978   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
979   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
980   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
981   if (iascii || isdraw || isbinary || issocket) {
982     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
983   } else {
984     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
985   }
986   PetscFunctionReturn(0);
987 }
988 
989 
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatRelax_MPIAIJ"
993 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
994 {
995   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
996   PetscErrorCode ierr;
997   Vec            bb1;
998   PetscScalar    mone=-1.0;
999 
1000   PetscFunctionBegin;
1001   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1002 
1003   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1004 
1005   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1006     if (flag & SOR_ZERO_INITIAL_GUESS) {
1007       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1008       its--;
1009     }
1010 
1011     while (its--) {
1012       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1013       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1014 
1015       /* update rhs: bb1 = bb - B*x */
1016       ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr);
1017       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1018 
1019       /* local sweep */
1020       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1021       CHKERRQ(ierr);
1022     }
1023   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1024     if (flag & SOR_ZERO_INITIAL_GUESS) {
1025       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1026       its--;
1027     }
1028     while (its--) {
1029       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1030       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1031 
1032       /* update rhs: bb1 = bb - B*x */
1033       ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr);
1034       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1035 
1036       /* local sweep */
1037       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1038       CHKERRQ(ierr);
1039     }
1040   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1041     if (flag & SOR_ZERO_INITIAL_GUESS) {
1042       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1043       its--;
1044     }
1045     while (its--) {
1046       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1047       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1048 
1049       /* update rhs: bb1 = bb - B*x */
1050       ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr);
1051       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1052 
1053       /* local sweep */
1054       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1055       CHKERRQ(ierr);
1056     }
1057   } else {
1058     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1059   }
1060 
1061   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1067 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1068 {
1069   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1070   Mat            A = mat->A,B = mat->B;
1071   PetscErrorCode ierr;
1072   PetscReal      isend[5],irecv[5];
1073 
1074   PetscFunctionBegin;
1075   info->block_size     = 1.0;
1076   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1077   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1078   isend[3] = info->memory;  isend[4] = info->mallocs;
1079   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1080   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1081   isend[3] += info->memory;  isend[4] += info->mallocs;
1082   if (flag == MAT_LOCAL) {
1083     info->nz_used      = isend[0];
1084     info->nz_allocated = isend[1];
1085     info->nz_unneeded  = isend[2];
1086     info->memory       = isend[3];
1087     info->mallocs      = isend[4];
1088   } else if (flag == MAT_GLOBAL_MAX) {
1089     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1090     info->nz_used      = irecv[0];
1091     info->nz_allocated = irecv[1];
1092     info->nz_unneeded  = irecv[2];
1093     info->memory       = irecv[3];
1094     info->mallocs      = irecv[4];
1095   } else if (flag == MAT_GLOBAL_SUM) {
1096     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1097     info->nz_used      = irecv[0];
1098     info->nz_allocated = irecv[1];
1099     info->nz_unneeded  = irecv[2];
1100     info->memory       = irecv[3];
1101     info->mallocs      = irecv[4];
1102   }
1103   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1104   info->fill_ratio_needed = 0;
1105   info->factor_mallocs    = 0;
1106   info->rows_global       = (double)matin->M;
1107   info->columns_global    = (double)matin->N;
1108   info->rows_local        = (double)matin->m;
1109   info->columns_local     = (double)matin->N;
1110 
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatSetOption_MPIAIJ"
1116 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   switch (op) {
1123   case MAT_NO_NEW_NONZERO_LOCATIONS:
1124   case MAT_YES_NEW_NONZERO_LOCATIONS:
1125   case MAT_COLUMNS_UNSORTED:
1126   case MAT_COLUMNS_SORTED:
1127   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1128   case MAT_KEEP_ZEROED_ROWS:
1129   case MAT_NEW_NONZERO_LOCATION_ERR:
1130   case MAT_USE_INODES:
1131   case MAT_DO_NOT_USE_INODES:
1132   case MAT_IGNORE_ZERO_ENTRIES:
1133     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1134     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1135     break;
1136   case MAT_ROW_ORIENTED:
1137     a->roworiented = PETSC_TRUE;
1138     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1139     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1140     break;
1141   case MAT_ROWS_SORTED:
1142   case MAT_ROWS_UNSORTED:
1143   case MAT_YES_NEW_DIAGONALS:
1144     ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr);
1145     break;
1146   case MAT_COLUMN_ORIENTED:
1147     a->roworiented = PETSC_FALSE;
1148     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1149     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1150     break;
1151   case MAT_IGNORE_OFF_PROC_ENTRIES:
1152     a->donotstash = PETSC_TRUE;
1153     break;
1154   case MAT_NO_NEW_DIAGONALS:
1155     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1156   case MAT_SYMMETRIC:
1157   case MAT_STRUCTURALLY_SYMMETRIC:
1158   case MAT_HERMITIAN:
1159   case MAT_SYMMETRY_ETERNAL:
1160     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1161     break;
1162   case MAT_NOT_SYMMETRIC:
1163   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1164   case MAT_NOT_HERMITIAN:
1165   case MAT_NOT_SYMMETRY_ETERNAL:
1166     break;
1167   default:
1168     SETERRQ(PETSC_ERR_SUP,"unknown option");
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatGetRow_MPIAIJ"
1175 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1176 {
1177   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1178   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1179   PetscErrorCode ierr;
1180   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1181   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1182   PetscInt       *cmap,*idx_p;
1183 
1184   PetscFunctionBegin;
1185   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1186   mat->getrowactive = PETSC_TRUE;
1187 
1188   if (!mat->rowvalues && (idx || v)) {
1189     /*
1190         allocate enough space to hold information from the longest row.
1191     */
1192     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1193     PetscInt     max = 1,tmp;
1194     for (i=0; i<matin->m; i++) {
1195       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1196       if (max < tmp) { max = tmp; }
1197     }
1198     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1199     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1200   }
1201 
1202   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1203   lrow = row - rstart;
1204 
1205   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1206   if (!v)   {pvA = 0; pvB = 0;}
1207   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1208   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1209   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1210   nztot = nzA + nzB;
1211 
1212   cmap  = mat->garray;
1213   if (v  || idx) {
1214     if (nztot) {
1215       /* Sort by increasing column numbers, assuming A and B already sorted */
1216       PetscInt imark = -1;
1217       if (v) {
1218         *v = v_p = mat->rowvalues;
1219         for (i=0; i<nzB; i++) {
1220           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1221           else break;
1222         }
1223         imark = i;
1224         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1225         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1226       }
1227       if (idx) {
1228         *idx = idx_p = mat->rowindices;
1229         if (imark > -1) {
1230           for (i=0; i<imark; i++) {
1231             idx_p[i] = cmap[cworkB[i]];
1232           }
1233         } else {
1234           for (i=0; i<nzB; i++) {
1235             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1236             else break;
1237           }
1238           imark = i;
1239         }
1240         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1241         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1242       }
1243     } else {
1244       if (idx) *idx = 0;
1245       if (v)   *v   = 0;
1246     }
1247   }
1248   *nz = nztot;
1249   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1250   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1256 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1257 {
1258   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1259 
1260   PetscFunctionBegin;
1261   if (!aij->getrowactive) {
1262     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1263   }
1264   aij->getrowactive = PETSC_FALSE;
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatNorm_MPIAIJ"
1270 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1274   PetscErrorCode ierr;
1275   PetscInt       i,j,cstart = aij->cstart;
1276   PetscReal      sum = 0.0;
1277   PetscScalar    *v;
1278 
1279   PetscFunctionBegin;
1280   if (aij->size == 1) {
1281     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1282   } else {
1283     if (type == NORM_FROBENIUS) {
1284       v = amat->a;
1285       for (i=0; i<amat->nz; i++) {
1286 #if defined(PETSC_USE_COMPLEX)
1287         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1288 #else
1289         sum += (*v)*(*v); v++;
1290 #endif
1291       }
1292       v = bmat->a;
1293       for (i=0; i<bmat->nz; i++) {
1294 #if defined(PETSC_USE_COMPLEX)
1295         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1296 #else
1297         sum += (*v)*(*v); v++;
1298 #endif
1299       }
1300       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1301       *norm = sqrt(*norm);
1302     } else if (type == NORM_1) { /* max column norm */
1303       PetscReal *tmp,*tmp2;
1304       PetscInt    *jj,*garray = aij->garray;
1305       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1306       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1307       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1308       *norm = 0.0;
1309       v = amat->a; jj = amat->j;
1310       for (j=0; j<amat->nz; j++) {
1311         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1312       }
1313       v = bmat->a; jj = bmat->j;
1314       for (j=0; j<bmat->nz; j++) {
1315         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1316       }
1317       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1318       for (j=0; j<mat->N; j++) {
1319         if (tmp2[j] > *norm) *norm = tmp2[j];
1320       }
1321       ierr = PetscFree(tmp);CHKERRQ(ierr);
1322       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1323     } else if (type == NORM_INFINITY) { /* max row norm */
1324       PetscReal ntemp = 0.0;
1325       for (j=0; j<aij->A->m; j++) {
1326         v = amat->a + amat->i[j];
1327         sum = 0.0;
1328         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1329           sum += PetscAbsScalar(*v); v++;
1330         }
1331         v = bmat->a + bmat->i[j];
1332         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1333           sum += PetscAbsScalar(*v); v++;
1334         }
1335         if (sum > ntemp) ntemp = sum;
1336       }
1337       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1338     } else {
1339       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1340     }
1341   }
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatTranspose_MPIAIJ"
1347 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1348 {
1349   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1350   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1351   PetscErrorCode ierr;
1352   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1353   Mat            B;
1354   PetscScalar    *array;
1355 
1356   PetscFunctionBegin;
1357   if (!matout && M != N) {
1358     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1359   }
1360 
1361   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1362   ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr);
1363   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1364   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1365 
1366   /* copy over the A part */
1367   Aloc = (Mat_SeqAIJ*)a->A->data;
1368   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1369   row = a->rstart;
1370   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1371   for (i=0; i<m; i++) {
1372     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1373     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1374   }
1375   aj = Aloc->j;
1376   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1377 
1378   /* copy over the B part */
1379   Aloc = (Mat_SeqAIJ*)a->B->data;
1380   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1381   row  = a->rstart;
1382   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1383   ct   = cols;
1384   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1385   for (i=0; i<m; i++) {
1386     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1387     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1388   }
1389   ierr = PetscFree(ct);CHKERRQ(ierr);
1390   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1392   if (matout) {
1393     *matout = B;
1394   } else {
1395     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1396   }
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1402 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1403 {
1404   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1405   Mat            a = aij->A,b = aij->B;
1406   PetscErrorCode ierr;
1407   PetscInt       s1,s2,s3;
1408 
1409   PetscFunctionBegin;
1410   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1411   if (rr) {
1412     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1413     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1414     /* Overlap communication with computation. */
1415     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1416   }
1417   if (ll) {
1418     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1419     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1420     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1421   }
1422   /* scale  the diagonal block */
1423   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1424 
1425   if (rr) {
1426     /* Do a scatter end and then right scale the off-diagonal block */
1427     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1428     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1429   }
1430 
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1437 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1438 {
1439   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1440   PetscErrorCode ierr;
1441 
1442   PetscFunctionBegin;
1443   if (!a->rank) {
1444     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1445   }
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 #undef __FUNCT__
1450 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1451 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1452 {
1453   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1454   PetscErrorCode ierr;
1455 
1456   PetscFunctionBegin;
1457   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1458   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1459   PetscFunctionReturn(0);
1460 }
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1463 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1464 {
1465   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1466   PetscErrorCode ierr;
1467 
1468   PetscFunctionBegin;
1469   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatEqual_MPIAIJ"
1475 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1476 {
1477   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1478   Mat            a,b,c,d;
1479   PetscTruth     flg;
1480   PetscErrorCode ierr;
1481 
1482   PetscFunctionBegin;
1483   a = matA->A; b = matA->B;
1484   c = matB->A; d = matB->B;
1485 
1486   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1487   if (flg) {
1488     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1489   }
1490   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatCopy_MPIAIJ"
1496 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1497 {
1498   PetscErrorCode ierr;
1499   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1500   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1501 
1502   PetscFunctionBegin;
1503   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1504   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1505     /* because of the column compression in the off-processor part of the matrix a->B,
1506        the number of columns in a->B and b->B may be different, hence we cannot call
1507        the MatCopy() directly on the two parts. If need be, we can provide a more
1508        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1509        then copying the submatrices */
1510     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1511   } else {
1512     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1513     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1514   }
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1520 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1521 {
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 #include "petscblaslapack.h"
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatAXPY_MPIAIJ"
1532 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1533 {
1534   PetscErrorCode ierr;
1535   PetscInt       i;
1536   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1537   PetscBLASInt   bnz,one=1;
1538   Mat_SeqAIJ     *x,*y;
1539 
1540   PetscFunctionBegin;
1541   if (str == SAME_NONZERO_PATTERN) {
1542     PetscScalar alpha = a;
1543     x = (Mat_SeqAIJ *)xx->A->data;
1544     y = (Mat_SeqAIJ *)yy->A->data;
1545     bnz = (PetscBLASInt)x->nz;
1546     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1547     x = (Mat_SeqAIJ *)xx->B->data;
1548     y = (Mat_SeqAIJ *)yy->B->data;
1549     bnz = (PetscBLASInt)x->nz;
1550     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1551   } else if (str == SUBSET_NONZERO_PATTERN) {
1552     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1553 
1554     x = (Mat_SeqAIJ *)xx->B->data;
1555     y = (Mat_SeqAIJ *)yy->B->data;
1556     if (y->xtoy && y->XtoY != xx->B) {
1557       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1558       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1559     }
1560     if (!y->xtoy) { /* get xtoy */
1561       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1562       y->XtoY = xx->B;
1563     }
1564     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1565   } else {
1566     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatConjugate_MPIAIJ"
1575 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1576 {
1577 #if defined(PETSC_USE_COMPLEX)
1578   PetscErrorCode ierr;
1579   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1580 
1581   PetscFunctionBegin;
1582   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1583   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1584 #else
1585   PetscFunctionBegin;
1586 #endif
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 /* -------------------------------------------------------------------*/
1591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1592        MatGetRow_MPIAIJ,
1593        MatRestoreRow_MPIAIJ,
1594        MatMult_MPIAIJ,
1595 /* 4*/ MatMultAdd_MPIAIJ,
1596        MatMultTranspose_MPIAIJ,
1597        MatMultTransposeAdd_MPIAIJ,
1598        0,
1599        0,
1600        0,
1601 /*10*/ 0,
1602        0,
1603        0,
1604        MatRelax_MPIAIJ,
1605        MatTranspose_MPIAIJ,
1606 /*15*/ MatGetInfo_MPIAIJ,
1607        MatEqual_MPIAIJ,
1608        MatGetDiagonal_MPIAIJ,
1609        MatDiagonalScale_MPIAIJ,
1610        MatNorm_MPIAIJ,
1611 /*20*/ MatAssemblyBegin_MPIAIJ,
1612        MatAssemblyEnd_MPIAIJ,
1613        0,
1614        MatSetOption_MPIAIJ,
1615        MatZeroEntries_MPIAIJ,
1616 /*25*/ MatZeroRows_MPIAIJ,
1617        0,
1618        0,
1619        0,
1620        0,
1621 /*30*/ MatSetUpPreallocation_MPIAIJ,
1622        0,
1623        0,
1624        0,
1625        0,
1626 /*35*/ MatDuplicate_MPIAIJ,
1627        0,
1628        0,
1629        0,
1630        0,
1631 /*40*/ MatAXPY_MPIAIJ,
1632        MatGetSubMatrices_MPIAIJ,
1633        MatIncreaseOverlap_MPIAIJ,
1634        MatGetValues_MPIAIJ,
1635        MatCopy_MPIAIJ,
1636 /*45*/ MatPrintHelp_MPIAIJ,
1637        MatScale_MPIAIJ,
1638        0,
1639        0,
1640        0,
1641 /*50*/ MatSetBlockSize_MPIAIJ,
1642        0,
1643        0,
1644        0,
1645        0,
1646 /*55*/ MatFDColoringCreate_MPIAIJ,
1647        0,
1648        MatSetUnfactored_MPIAIJ,
1649        0,
1650        0,
1651 /*60*/ MatGetSubMatrix_MPIAIJ,
1652        MatDestroy_MPIAIJ,
1653        MatView_MPIAIJ,
1654        MatGetPetscMaps_Petsc,
1655        0,
1656 /*65*/ 0,
1657        0,
1658        0,
1659        0,
1660        0,
1661 /*70*/ 0,
1662        0,
1663        MatSetColoring_MPIAIJ,
1664 #if defined(PETSC_HAVE_ADIC)
1665        MatSetValuesAdic_MPIAIJ,
1666 #else
1667        0,
1668 #endif
1669        MatSetValuesAdifor_MPIAIJ,
1670 /*75*/ 0,
1671        0,
1672        0,
1673        0,
1674        0,
1675 /*80*/ 0,
1676        0,
1677        0,
1678        0,
1679 /*84*/ MatLoad_MPIAIJ,
1680        0,
1681        0,
1682        0,
1683        0,
1684        0,
1685 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1686        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1687        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1688        MatPtAP_Basic,
1689        MatPtAPSymbolic_MPIAIJ,
1690 /*95*/ MatPtAPNumeric_MPIAIJ,
1691        0,
1692        0,
1693        0,
1694        0,
1695 /*100*/0,
1696        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1697        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1698        MatConjugate_MPIAIJ
1699 };
1700 
1701 /* ----------------------------------------------------------------------------------------*/
1702 
1703 EXTERN_C_BEGIN
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1706 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1707 {
1708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1709   PetscErrorCode ierr;
1710 
1711   PetscFunctionBegin;
1712   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1713   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1714   PetscFunctionReturn(0);
1715 }
1716 EXTERN_C_END
1717 
1718 EXTERN_C_BEGIN
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1721 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1722 {
1723   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1724   PetscErrorCode ierr;
1725 
1726   PetscFunctionBegin;
1727   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1728   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1729   PetscFunctionReturn(0);
1730 }
1731 EXTERN_C_END
1732 
1733 #include "petscpc.h"
1734 EXTERN_C_BEGIN
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1737 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1738 {
1739   Mat_MPIAIJ     *b;
1740   PetscErrorCode ierr;
1741   PetscInt       i;
1742 
1743   PetscFunctionBegin;
1744   B->preallocated = PETSC_TRUE;
1745   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1746   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1747   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1748   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1749   if (d_nnz) {
1750     for (i=0; i<B->m; i++) {
1751       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1752     }
1753   }
1754   if (o_nnz) {
1755     for (i=0; i<B->m; i++) {
1756       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1757     }
1758   }
1759   b = (Mat_MPIAIJ*)B->data;
1760   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1761   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1762 
1763   PetscFunctionReturn(0);
1764 }
1765 EXTERN_C_END
1766 
1767 #undef __FUNCT__
1768 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1769 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1770 {
1771   Mat            mat;
1772   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1773   PetscErrorCode ierr;
1774 
1775   PetscFunctionBegin;
1776   *newmat       = 0;
1777   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1778   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1779   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1780   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1781   a    = (Mat_MPIAIJ*)mat->data;
1782 
1783   mat->factor       = matin->factor;
1784   mat->bs           = matin->bs;
1785   mat->assembled    = PETSC_TRUE;
1786   mat->insertmode   = NOT_SET_VALUES;
1787   mat->preallocated = PETSC_TRUE;
1788 
1789   a->rstart       = oldmat->rstart;
1790   a->rend         = oldmat->rend;
1791   a->cstart       = oldmat->cstart;
1792   a->cend         = oldmat->cend;
1793   a->size         = oldmat->size;
1794   a->rank         = oldmat->rank;
1795   a->donotstash   = oldmat->donotstash;
1796   a->roworiented  = oldmat->roworiented;
1797   a->rowindices   = 0;
1798   a->rowvalues    = 0;
1799   a->getrowactive = PETSC_FALSE;
1800 
1801   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1802   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1803   if (oldmat->colmap) {
1804 #if defined (PETSC_USE_CTABLE)
1805     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1806 #else
1807     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1808     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1809     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1810 #endif
1811   } else a->colmap = 0;
1812   if (oldmat->garray) {
1813     PetscInt len;
1814     len  = oldmat->B->n;
1815     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1816     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1817     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1818   } else a->garray = 0;
1819 
1820   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1821   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1822   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1823   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1824   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1825   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1826   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1827   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1828   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1829   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1830   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1831   *newmat = mat;
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 #include "petscsys.h"
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatLoad_MPIAIJ"
1839 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1840 {
1841   Mat            A;
1842   PetscScalar    *vals,*svals;
1843   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1844   MPI_Status     status;
1845   PetscErrorCode ierr;
1846   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1847   PetscInt       i,nz,j,rstart,rend,mmax;
1848   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1849   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1850   PetscInt       cend,cstart,n,*rowners;
1851   int            fd;
1852 
1853   PetscFunctionBegin;
1854   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1855   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1856   if (!rank) {
1857     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1858     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1859     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1860   }
1861 
1862   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1863   M = header[1]; N = header[2];
1864   /* determine ownership of all rows */
1865   m    = M/size + ((M % size) > rank);
1866   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1867   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1868 
1869   /* First process needs enough room for process with most rows */
1870   if (!rank) {
1871     mmax       = rowners[1];
1872     for (i=2; i<size; i++) {
1873       mmax = PetscMax(mmax,rowners[i]);
1874     }
1875   } else mmax = m;
1876 
1877   rowners[0] = 0;
1878   for (i=2; i<=size; i++) {
1879     mmax       = PetscMax(mmax,rowners[i]);
1880     rowners[i] += rowners[i-1];
1881   }
1882   rstart = rowners[rank];
1883   rend   = rowners[rank+1];
1884 
1885   /* distribute row lengths to all processors */
1886   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1887   if (!rank) {
1888     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1889     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1890     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1891     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1892     for (j=0; j<m; j++) {
1893       procsnz[0] += ourlens[j];
1894     }
1895     for (i=1; i<size; i++) {
1896       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1897       /* calculate the number of nonzeros on each processor */
1898       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1899         procsnz[i] += rowlengths[j];
1900       }
1901       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1902     }
1903     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1904   } else {
1905     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1906   }
1907 
1908   if (!rank) {
1909     /* determine max buffer needed and allocate it */
1910     maxnz = 0;
1911     for (i=0; i<size; i++) {
1912       maxnz = PetscMax(maxnz,procsnz[i]);
1913     }
1914     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1915 
1916     /* read in my part of the matrix column indices  */
1917     nz   = procsnz[0];
1918     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1919     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1920 
1921     /* read in every one elses and ship off */
1922     for (i=1; i<size; i++) {
1923       nz   = procsnz[i];
1924       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1925       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1926     }
1927     ierr = PetscFree(cols);CHKERRQ(ierr);
1928   } else {
1929     /* determine buffer space needed for message */
1930     nz = 0;
1931     for (i=0; i<m; i++) {
1932       nz += ourlens[i];
1933     }
1934     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1935 
1936     /* receive message of column indices*/
1937     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1938     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1939     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1940   }
1941 
1942   /* determine column ownership if matrix is not square */
1943   if (N != M) {
1944     n      = N/size + ((N % size) > rank);
1945     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1946     cstart = cend - n;
1947   } else {
1948     cstart = rstart;
1949     cend   = rend;
1950     n      = cend - cstart;
1951   }
1952 
1953   /* loop over local rows, determining number of off diagonal entries */
1954   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1955   jj = 0;
1956   for (i=0; i<m; i++) {
1957     for (j=0; j<ourlens[i]; j++) {
1958       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1959       jj++;
1960     }
1961   }
1962 
1963   /* create our matrix */
1964   for (i=0; i<m; i++) {
1965     ourlens[i] -= offlens[i];
1966   }
1967   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
1968   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
1969   ierr = MatSetType(A,type);CHKERRQ(ierr);
1970   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
1971 
1972   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1973   for (i=0; i<m; i++) {
1974     ourlens[i] += offlens[i];
1975   }
1976 
1977   if (!rank) {
1978     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1979 
1980     /* read in my part of the matrix numerical values  */
1981     nz   = procsnz[0];
1982     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1983 
1984     /* insert into matrix */
1985     jj      = rstart;
1986     smycols = mycols;
1987     svals   = vals;
1988     for (i=0; i<m; i++) {
1989       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1990       smycols += ourlens[i];
1991       svals   += ourlens[i];
1992       jj++;
1993     }
1994 
1995     /* read in other processors and ship out */
1996     for (i=1; i<size; i++) {
1997       nz   = procsnz[i];
1998       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1999       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2000     }
2001     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2002   } else {
2003     /* receive numeric values */
2004     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2005 
2006     /* receive message of values*/
2007     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2008     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2009     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2010 
2011     /* insert into matrix */
2012     jj      = rstart;
2013     smycols = mycols;
2014     svals   = vals;
2015     for (i=0; i<m; i++) {
2016       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2017       smycols += ourlens[i];
2018       svals   += ourlens[i];
2019       jj++;
2020     }
2021   }
2022   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2023   ierr = PetscFree(vals);CHKERRQ(ierr);
2024   ierr = PetscFree(mycols);CHKERRQ(ierr);
2025   ierr = PetscFree(rowners);CHKERRQ(ierr);
2026 
2027   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2028   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2029   *newmat = A;
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2035 /*
2036     Not great since it makes two copies of the submatrix, first an SeqAIJ
2037   in local and then by concatenating the local matrices the end result.
2038   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2039 */
2040 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2041 {
2042   PetscErrorCode ierr;
2043   PetscMPIInt    rank,size;
2044   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2045   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2046   Mat            *local,M,Mreuse;
2047   PetscScalar    *vwork,*aa;
2048   MPI_Comm       comm = mat->comm;
2049   Mat_SeqAIJ     *aij;
2050 
2051 
2052   PetscFunctionBegin;
2053   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2054   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2055 
2056   if (call ==  MAT_REUSE_MATRIX) {
2057     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2058     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2059     local = &Mreuse;
2060     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2061   } else {
2062     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2063     Mreuse = *local;
2064     ierr   = PetscFree(local);CHKERRQ(ierr);
2065   }
2066 
2067   /*
2068       m - number of local rows
2069       n - number of columns (same on all processors)
2070       rstart - first row in new global matrix generated
2071   */
2072   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2073   if (call == MAT_INITIAL_MATRIX) {
2074     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2075     ii  = aij->i;
2076     jj  = aij->j;
2077 
2078     /*
2079         Determine the number of non-zeros in the diagonal and off-diagonal
2080         portions of the matrix in order to do correct preallocation
2081     */
2082 
2083     /* first get start and end of "diagonal" columns */
2084     if (csize == PETSC_DECIDE) {
2085       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2086       if (mglobal == n) { /* square matrix */
2087 	nlocal = m;
2088       } else {
2089         nlocal = n/size + ((n % size) > rank);
2090       }
2091     } else {
2092       nlocal = csize;
2093     }
2094     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2095     rstart = rend - nlocal;
2096     if (rank == size - 1 && rend != n) {
2097       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2098     }
2099 
2100     /* next, compute all the lengths */
2101     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2102     olens = dlens + m;
2103     for (i=0; i<m; i++) {
2104       jend = ii[i+1] - ii[i];
2105       olen = 0;
2106       dlen = 0;
2107       for (j=0; j<jend; j++) {
2108         if (*jj < rstart || *jj >= rend) olen++;
2109         else dlen++;
2110         jj++;
2111       }
2112       olens[i] = olen;
2113       dlens[i] = dlen;
2114     }
2115     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2116     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2117     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2118     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2119     ierr = PetscFree(dlens);CHKERRQ(ierr);
2120   } else {
2121     PetscInt ml,nl;
2122 
2123     M = *newmat;
2124     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2125     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2126     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2127     /*
2128          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2129        rather than the slower MatSetValues().
2130     */
2131     M->was_assembled = PETSC_TRUE;
2132     M->assembled     = PETSC_FALSE;
2133   }
2134   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2135   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2136   ii  = aij->i;
2137   jj  = aij->j;
2138   aa  = aij->a;
2139   for (i=0; i<m; i++) {
2140     row   = rstart + i;
2141     nz    = ii[i+1] - ii[i];
2142     cwork = jj;     jj += nz;
2143     vwork = aa;     aa += nz;
2144     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2145   }
2146 
2147   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2148   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2149   *newmat = M;
2150 
2151   /* save submatrix used in processor for next request */
2152   if (call ==  MAT_INITIAL_MATRIX) {
2153     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2154     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2155   }
2156 
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 EXTERN_C_BEGIN
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2163 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2164 {
2165   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2166   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2167   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2168   const PetscInt *JJ;
2169   PetscScalar    *values;
2170   PetscErrorCode ierr;
2171 
2172   PetscFunctionBegin;
2173   if (I[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"I[0] must be 0 it is %D",I[0]);
2174   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2175   o_nnz = d_nnz + m;
2176 
2177   for (i=0; i<m; i++) {
2178     nnz     = I[i+1]- I[i];
2179     JJ      = J + I[i];
2180     nnz_max = PetscMax(nnz_max,nnz);
2181     if (nnz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
2182     for (j=0; j<nnz; j++) {
2183       if (*JJ >= cstart) break;
2184       JJ++;
2185     }
2186     d = 0;
2187     for (; j<nnz; j++) {
2188       if (*JJ++ >= cend) break;
2189       d++;
2190     }
2191     d_nnz[i] = d;
2192     o_nnz[i] = nnz - d;
2193   }
2194   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2195   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2196 
2197   if (v) values = (PetscScalar*)v;
2198   else {
2199     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2200     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2201   }
2202 
2203   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2204   for (i=0; i<m; i++) {
2205     ii   = i + rstart;
2206     nnz  = I[i+1]- I[i];
2207     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2208   }
2209   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2210   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2211   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2212 
2213   if (!v) {
2214     ierr = PetscFree(values);CHKERRQ(ierr);
2215   }
2216   PetscFunctionReturn(0);
2217 }
2218 EXTERN_C_END
2219 
2220 #undef __FUNCT__
2221 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2222 /*@C
2223    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2224    (the default parallel PETSc format).
2225 
2226    Collective on MPI_Comm
2227 
2228    Input Parameters:
2229 +  B - the matrix
2230 .  i - the indices into j for the start of each local row (starts with zero)
2231 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2232 -  v - optional values in the matrix
2233 
2234    Level: developer
2235 
2236 .keywords: matrix, aij, compressed row, sparse, parallel
2237 
2238 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2239 @*/
2240 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2241 {
2242   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2243 
2244   PetscFunctionBegin;
2245   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2246   if (f) {
2247     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2248   }
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2254 /*@C
2255    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2256    (the default parallel PETSc format).  For good matrix assembly performance
2257    the user should preallocate the matrix storage by setting the parameters
2258    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2259    performance can be increased by more than a factor of 50.
2260 
2261    Collective on MPI_Comm
2262 
2263    Input Parameters:
2264 +  A - the matrix
2265 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2266            (same value is used for all local rows)
2267 .  d_nnz - array containing the number of nonzeros in the various rows of the
2268            DIAGONAL portion of the local submatrix (possibly different for each row)
2269            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2270            The size of this array is equal to the number of local rows, i.e 'm'.
2271            You must leave room for the diagonal entry even if it is zero.
2272 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2273            submatrix (same value is used for all local rows).
2274 -  o_nnz - array containing the number of nonzeros in the various rows of the
2275            OFF-DIAGONAL portion of the local submatrix (possibly different for
2276            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2277            structure. The size of this array is equal to the number
2278            of local rows, i.e 'm'.
2279 
2280    If the *_nnz parameter is given then the *_nz parameter is ignored
2281 
2282    The AIJ format (also called the Yale sparse matrix format or
2283    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2284    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2285 
2286    The parallel matrix is partitioned such that the first m0 rows belong to
2287    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2288    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2289 
2290    The DIAGONAL portion of the local submatrix of a processor can be defined
2291    as the submatrix which is obtained by extraction the part corresponding
2292    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2293    first row that belongs to the processor, and r2 is the last row belonging
2294    to the this processor. This is a square mxm matrix. The remaining portion
2295    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2296 
2297    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2298 
2299    Example usage:
2300 
2301    Consider the following 8x8 matrix with 34 non-zero values, that is
2302    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2303    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2304    as follows:
2305 
2306 .vb
2307             1  2  0  |  0  3  0  |  0  4
2308     Proc0   0  5  6  |  7  0  0  |  8  0
2309             9  0 10  | 11  0  0  | 12  0
2310     -------------------------------------
2311            13  0 14  | 15 16 17  |  0  0
2312     Proc1   0 18  0  | 19 20 21  |  0  0
2313             0  0  0  | 22 23  0  | 24  0
2314     -------------------------------------
2315     Proc2  25 26 27  |  0  0 28  | 29  0
2316            30  0  0  | 31 32 33  |  0 34
2317 .ve
2318 
2319    This can be represented as a collection of submatrices as:
2320 
2321 .vb
2322       A B C
2323       D E F
2324       G H I
2325 .ve
2326 
2327    Where the submatrices A,B,C are owned by proc0, D,E,F are
2328    owned by proc1, G,H,I are owned by proc2.
2329 
2330    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2331    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2332    The 'M','N' parameters are 8,8, and have the same values on all procs.
2333 
2334    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2335    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2336    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2337    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2338    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2339    matrix, ans [DF] as another SeqAIJ matrix.
2340 
2341    When d_nz, o_nz parameters are specified, d_nz storage elements are
2342    allocated for every row of the local diagonal submatrix, and o_nz
2343    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2344    One way to choose d_nz and o_nz is to use the max nonzerors per local
2345    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2346    In this case, the values of d_nz,o_nz are:
2347 .vb
2348      proc0 : dnz = 2, o_nz = 2
2349      proc1 : dnz = 3, o_nz = 2
2350      proc2 : dnz = 1, o_nz = 4
2351 .ve
2352    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2353    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2354    for proc3. i.e we are using 12+15+10=37 storage locations to store
2355    34 values.
2356 
2357    When d_nnz, o_nnz parameters are specified, the storage is specified
2358    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2359    In the above case the values for d_nnz,o_nnz are:
2360 .vb
2361      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2362      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2363      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2364 .ve
2365    Here the space allocated is sum of all the above values i.e 34, and
2366    hence pre-allocation is perfect.
2367 
2368    Level: intermediate
2369 
2370 .keywords: matrix, aij, compressed row, sparse, parallel
2371 
2372 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2373           MPIAIJ
2374 @*/
2375 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2376 {
2377   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2378 
2379   PetscFunctionBegin;
2380   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2381   if (f) {
2382     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2383   }
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatCreateMPIAIJ"
2389 /*@C
2390    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2391    (the default parallel PETSc format).  For good matrix assembly performance
2392    the user should preallocate the matrix storage by setting the parameters
2393    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2394    performance can be increased by more than a factor of 50.
2395 
2396    Collective on MPI_Comm
2397 
2398    Input Parameters:
2399 +  comm - MPI communicator
2400 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2401            This value should be the same as the local size used in creating the
2402            y vector for the matrix-vector product y = Ax.
2403 .  n - This value should be the same as the local size used in creating the
2404        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2405        calculated if N is given) For square matrices n is almost always m.
2406 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2407 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2408 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2409            (same value is used for all local rows)
2410 .  d_nnz - array containing the number of nonzeros in the various rows of the
2411            DIAGONAL portion of the local submatrix (possibly different for each row)
2412            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2413            The size of this array is equal to the number of local rows, i.e 'm'.
2414            You must leave room for the diagonal entry even if it is zero.
2415 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2416            submatrix (same value is used for all local rows).
2417 -  o_nnz - array containing the number of nonzeros in the various rows of the
2418            OFF-DIAGONAL portion of the local submatrix (possibly different for
2419            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2420            structure. The size of this array is equal to the number
2421            of local rows, i.e 'm'.
2422 
2423    Output Parameter:
2424 .  A - the matrix
2425 
2426    Notes:
2427    If the *_nnz parameter is given then the *_nz parameter is ignored
2428 
2429    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2430    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2431    storage requirements for this matrix.
2432 
2433    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2434    processor than it must be used on all processors that share the object for
2435    that argument.
2436 
2437    The user MUST specify either the local or global matrix dimensions
2438    (possibly both).
2439 
2440    The parallel matrix is partitioned such that the first m0 rows belong to
2441    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2442    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2443 
2444    The DIAGONAL portion of the local submatrix of a processor can be defined
2445    as the submatrix which is obtained by extraction the part corresponding
2446    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2447    first row that belongs to the processor, and r2 is the last row belonging
2448    to the this processor. This is a square mxm matrix. The remaining portion
2449    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2450 
2451    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2452 
2453    When calling this routine with a single process communicator, a matrix of
2454    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2455    type of communicator, use the construction mechanism:
2456      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2457 
2458    By default, this format uses inodes (identical nodes) when possible.
2459    We search for consecutive rows with the same nonzero structure, thereby
2460    reusing matrix information to achieve increased efficiency.
2461 
2462    Options Database Keys:
2463 +  -mat_no_inode  - Do not use inodes
2464 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2465 -  -mat_aij_oneindex - Internally use indexing starting at 1
2466         rather than 0.  Note that when calling MatSetValues(),
2467         the user still MUST index entries starting at 0!
2468 
2469 
2470    Example usage:
2471 
2472    Consider the following 8x8 matrix with 34 non-zero values, that is
2473    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2474    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2475    as follows:
2476 
2477 .vb
2478             1  2  0  |  0  3  0  |  0  4
2479     Proc0   0  5  6  |  7  0  0  |  8  0
2480             9  0 10  | 11  0  0  | 12  0
2481     -------------------------------------
2482            13  0 14  | 15 16 17  |  0  0
2483     Proc1   0 18  0  | 19 20 21  |  0  0
2484             0  0  0  | 22 23  0  | 24  0
2485     -------------------------------------
2486     Proc2  25 26 27  |  0  0 28  | 29  0
2487            30  0  0  | 31 32 33  |  0 34
2488 .ve
2489 
2490    This can be represented as a collection of submatrices as:
2491 
2492 .vb
2493       A B C
2494       D E F
2495       G H I
2496 .ve
2497 
2498    Where the submatrices A,B,C are owned by proc0, D,E,F are
2499    owned by proc1, G,H,I are owned by proc2.
2500 
2501    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2502    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2503    The 'M','N' parameters are 8,8, and have the same values on all procs.
2504 
2505    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2506    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2507    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2508    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2509    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2510    matrix, ans [DF] as another SeqAIJ matrix.
2511 
2512    When d_nz, o_nz parameters are specified, d_nz storage elements are
2513    allocated for every row of the local diagonal submatrix, and o_nz
2514    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2515    One way to choose d_nz and o_nz is to use the max nonzerors per local
2516    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2517    In this case, the values of d_nz,o_nz are:
2518 .vb
2519      proc0 : dnz = 2, o_nz = 2
2520      proc1 : dnz = 3, o_nz = 2
2521      proc2 : dnz = 1, o_nz = 4
2522 .ve
2523    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2524    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2525    for proc3. i.e we are using 12+15+10=37 storage locations to store
2526    34 values.
2527 
2528    When d_nnz, o_nnz parameters are specified, the storage is specified
2529    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2530    In the above case the values for d_nnz,o_nnz are:
2531 .vb
2532      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2533      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2534      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2535 .ve
2536    Here the space allocated is sum of all the above values i.e 34, and
2537    hence pre-allocation is perfect.
2538 
2539    Level: intermediate
2540 
2541 .keywords: matrix, aij, compressed row, sparse, parallel
2542 
2543 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2544           MPIAIJ
2545 @*/
2546 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2547 {
2548   PetscErrorCode ierr;
2549   PetscMPIInt    size;
2550 
2551   PetscFunctionBegin;
2552   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2553   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2554   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2555   if (size > 1) {
2556     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2557     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2558   } else {
2559     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2560     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2561   }
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2567 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2568 {
2569   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2570 
2571   PetscFunctionBegin;
2572   *Ad     = a->A;
2573   *Ao     = a->B;
2574   *colmap = a->garray;
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 #undef __FUNCT__
2579 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2580 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2581 {
2582   PetscErrorCode ierr;
2583   PetscInt       i;
2584   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2585 
2586   PetscFunctionBegin;
2587   if (coloring->ctype == IS_COLORING_LOCAL) {
2588     ISColoringValue *allcolors,*colors;
2589     ISColoring      ocoloring;
2590 
2591     /* set coloring for diagonal portion */
2592     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2593 
2594     /* set coloring for off-diagonal portion */
2595     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2596     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2597     for (i=0; i<a->B->n; i++) {
2598       colors[i] = allcolors[a->garray[i]];
2599     }
2600     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2601     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2602     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2603     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2604   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2605     ISColoringValue *colors;
2606     PetscInt             *larray;
2607     ISColoring      ocoloring;
2608 
2609     /* set coloring for diagonal portion */
2610     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2611     for (i=0; i<a->A->n; i++) {
2612       larray[i] = i + a->cstart;
2613     }
2614     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2615     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2616     for (i=0; i<a->A->n; i++) {
2617       colors[i] = coloring->colors[larray[i]];
2618     }
2619     ierr = PetscFree(larray);CHKERRQ(ierr);
2620     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2621     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2622     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2623 
2624     /* set coloring for off-diagonal portion */
2625     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2626     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2627     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2628     for (i=0; i<a->B->n; i++) {
2629       colors[i] = coloring->colors[larray[i]];
2630     }
2631     ierr = PetscFree(larray);CHKERRQ(ierr);
2632     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2633     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2634     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2635   } else {
2636     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2637   }
2638 
2639   PetscFunctionReturn(0);
2640 }
2641 
2642 #if defined(PETSC_HAVE_ADIC)
2643 #undef __FUNCT__
2644 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2645 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2646 {
2647   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2648   PetscErrorCode ierr;
2649 
2650   PetscFunctionBegin;
2651   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2652   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2653   PetscFunctionReturn(0);
2654 }
2655 #endif
2656 
2657 #undef __FUNCT__
2658 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2659 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2660 {
2661   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2662   PetscErrorCode ierr;
2663 
2664   PetscFunctionBegin;
2665   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2666   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 #undef __FUNCT__
2671 #define __FUNCT__ "MatMerge"
2672 /*@C
2673       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2674                  matrices from each processor
2675 
2676     Collective on MPI_Comm
2677 
2678    Input Parameters:
2679 +    comm - the communicators the parallel matrix will live on
2680 .    inmat - the input sequential matrices
2681 .    n - number of local columns (or PETSC_DECIDE)
2682 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2683 
2684    Output Parameter:
2685 .    outmat - the parallel matrix generated
2686 
2687     Level: advanced
2688 
2689    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2690 
2691 @*/
2692 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2693 {
2694   PetscErrorCode ierr;
2695   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2696   PetscInt       *indx;
2697   PetscScalar    *values;
2698   PetscMap       columnmap,rowmap;
2699 
2700   PetscFunctionBegin;
2701     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2702   /*
2703   PetscMPIInt       rank;
2704   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2705   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2706   */
2707   if (scall == MAT_INITIAL_MATRIX){
2708     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2709     if (n == PETSC_DECIDE){
2710       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2711       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2712       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2713       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2714       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2715     }
2716 
2717     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2718     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2719     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2720     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2721     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2722 
2723     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2724     for (i=0;i<m;i++) {
2725       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2726       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2727       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2728     }
2729     /* This routine will ONLY return MPIAIJ type matrix */
2730     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2731     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2732     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2733     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2734     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2735 
2736   } else if (scall == MAT_REUSE_MATRIX){
2737     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2738   } else {
2739     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2740   }
2741 
2742   for (i=0;i<m;i++) {
2743     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2744     I    = i + rstart;
2745     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2746     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2747   }
2748   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2749   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2750   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2751 
2752   PetscFunctionReturn(0);
2753 }
2754 
2755 #undef __FUNCT__
2756 #define __FUNCT__ "MatFileSplit"
2757 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2758 {
2759   PetscErrorCode    ierr;
2760   PetscMPIInt       rank;
2761   PetscInt          m,N,i,rstart,nnz;
2762   size_t            len;
2763   const PetscInt    *indx;
2764   PetscViewer       out;
2765   char              *name;
2766   Mat               B;
2767   const PetscScalar *values;
2768 
2769   PetscFunctionBegin;
2770   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2771   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2772   /* Should this be the type of the diagonal block of A? */
2773   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2774   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2775   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2776   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2777   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2778   for (i=0;i<m;i++) {
2779     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2780     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2781     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2782   }
2783   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2784   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2785 
2786   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2787   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2788   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2789   sprintf(name,"%s.%d",outfile,rank);
2790   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2791   ierr = PetscFree(name);
2792   ierr = MatView(B,out);CHKERRQ(ierr);
2793   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2794   ierr = MatDestroy(B);CHKERRQ(ierr);
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2799 #undef __FUNCT__
2800 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2801 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2802 {
2803   PetscErrorCode       ierr;
2804   Mat_Merge_SeqsToMPI  *merge;
2805   PetscObjectContainer container;
2806 
2807   PetscFunctionBegin;
2808   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2809   if (container) {
2810     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2811     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2812     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2813     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2814     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2815     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2816     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2817     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2818     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2819     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2820     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2821     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2822 
2823     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2824     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2825   }
2826   ierr = PetscFree(merge);CHKERRQ(ierr);
2827 
2828   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 #include "src/mat/utils/freespace.h"
2833 #include "petscbt.h"
2834 static PetscEvent logkey_seqstompinum = 0;
2835 #undef __FUNCT__
2836 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2837 /*@C
2838       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2839                  matrices from each processor
2840 
2841     Collective on MPI_Comm
2842 
2843    Input Parameters:
2844 +    comm - the communicators the parallel matrix will live on
2845 .    seqmat - the input sequential matrices
2846 .    m - number of local rows (or PETSC_DECIDE)
2847 .    n - number of local columns (or PETSC_DECIDE)
2848 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2849 
2850    Output Parameter:
2851 .    mpimat - the parallel matrix generated
2852 
2853     Level: advanced
2854 
2855    Notes:
2856      The dimensions of the sequential matrix in each processor MUST be the same.
2857      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2858      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2859 @*/
2860 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2861 {
2862   PetscErrorCode       ierr;
2863   MPI_Comm             comm=mpimat->comm;
2864   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2865   PetscMPIInt          size,rank,taga,*len_s;
2866   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2867   PetscInt             proc,m;
2868   PetscInt             **buf_ri,**buf_rj;
2869   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2870   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2871   MPI_Request          *s_waits,*r_waits;
2872   MPI_Status           *status;
2873   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2874   Mat_Merge_SeqsToMPI  *merge;
2875   PetscObjectContainer container;
2876 
2877   PetscFunctionBegin;
2878   if (!logkey_seqstompinum) {
2879     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2880   }
2881   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2882 
2883   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2884   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2885 
2886   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2887   if (container) {
2888     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2889   }
2890   bi     = merge->bi;
2891   bj     = merge->bj;
2892   buf_ri = merge->buf_ri;
2893   buf_rj = merge->buf_rj;
2894 
2895   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2896   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2897   len_s  = merge->len_s;
2898 
2899   /* send and recv matrix values */
2900   /*-----------------------------*/
2901   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2902   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2903 
2904   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2905   for (proc=0,k=0; proc<size; proc++){
2906     if (!len_s[proc]) continue;
2907     i = owners[proc];
2908     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2909     k++;
2910   }
2911 
2912   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
2913   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
2914   ierr = PetscFree(status);CHKERRQ(ierr);
2915 
2916   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2917   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2918 
2919   /* insert mat values of mpimat */
2920   /*----------------------------*/
2921   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2922   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2923   nextrow = buf_ri_k + merge->nrecv;
2924   nextai  = nextrow + merge->nrecv;
2925 
2926   for (k=0; k<merge->nrecv; k++){
2927     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2928     nrows = *(buf_ri_k[k]);
2929     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2930     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2931   }
2932 
2933   /* set values of ba */
2934   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2935   for (i=0; i<m; i++) {
2936     arow = owners[rank] + i;
2937     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2938     bnzi = bi[i+1] - bi[i];
2939     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2940 
2941     /* add local non-zero vals of this proc's seqmat into ba */
2942     anzi = ai[arow+1] - ai[arow];
2943     aj   = a->j + ai[arow];
2944     aa   = a->a + ai[arow];
2945     nextaj = 0;
2946     for (j=0; nextaj<anzi; j++){
2947       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2948         ba_i[j] += aa[nextaj++];
2949       }
2950     }
2951 
2952     /* add received vals into ba */
2953     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2954       /* i-th row */
2955       if (i == *nextrow[k]) {
2956         anzi = *(nextai[k]+1) - *nextai[k];
2957         aj   = buf_rj[k] + *(nextai[k]);
2958         aa   = abuf_r[k] + *(nextai[k]);
2959         nextaj = 0;
2960         for (j=0; nextaj<anzi; j++){
2961           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2962             ba_i[j] += aa[nextaj++];
2963           }
2964         }
2965         nextrow[k]++; nextai[k]++;
2966       }
2967     }
2968     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2969   }
2970   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2971   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2972 
2973   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
2974   ierr = PetscFree(ba_i);CHKERRQ(ierr);
2975   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
2976   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 static PetscEvent logkey_seqstompisym = 0;
2981 #undef __FUNCT__
2982 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
2983 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
2984 {
2985   PetscErrorCode       ierr;
2986   Mat                  B_mpi;
2987   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2988   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
2989   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
2990   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
2991   PetscInt             len,proc,*dnz,*onz;
2992   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
2993   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
2994   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
2995   MPI_Status           *status;
2996   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
2997   PetscBT              lnkbt;
2998   Mat_Merge_SeqsToMPI  *merge;
2999   PetscObjectContainer container;
3000 
3001   PetscFunctionBegin;
3002   if (!logkey_seqstompisym) {
3003     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3004   }
3005   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3006 
3007   /* make sure it is a PETSc comm */
3008   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3009   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3010   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3011 
3012   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3013   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3014 
3015   /* determine row ownership */
3016   /*---------------------------------------------------------*/
3017   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3018   if (m == PETSC_DECIDE) {
3019     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3020   } else {
3021     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3022   }
3023   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3024   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3025   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3026 
3027   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3028   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3029 
3030   /* determine the number of messages to send, their lengths */
3031   /*---------------------------------------------------------*/
3032   len_s  = merge->len_s;
3033 
3034   len = 0;  /* length of buf_si[] */
3035   merge->nsend = 0;
3036   for (proc=0; proc<size; proc++){
3037     len_si[proc] = 0;
3038     if (proc == rank){
3039       len_s[proc] = 0;
3040     } else {
3041       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3042       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3043     }
3044     if (len_s[proc]) {
3045       merge->nsend++;
3046       nrows = 0;
3047       for (i=owners[proc]; i<owners[proc+1]; i++){
3048         if (ai[i+1] > ai[i]) nrows++;
3049       }
3050       len_si[proc] = 2*(nrows+1);
3051       len += len_si[proc];
3052     }
3053   }
3054 
3055   /* determine the number and length of messages to receive for ij-structure */
3056   /*-------------------------------------------------------------------------*/
3057   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3058   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3059 
3060   /* post the Irecv of j-structure */
3061   /*-------------------------------*/
3062   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3063   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3064 
3065   /* post the Isend of j-structure */
3066   /*--------------------------------*/
3067   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3068   sj_waits = si_waits + merge->nsend;
3069 
3070   for (proc=0, k=0; proc<size; proc++){
3071     if (!len_s[proc]) continue;
3072     i = owners[proc];
3073     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3074     k++;
3075   }
3076 
3077   /* receives and sends of j-structure are complete */
3078   /*------------------------------------------------*/
3079   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3080   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3081 
3082   /* send and recv i-structure */
3083   /*---------------------------*/
3084   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3085   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3086 
3087   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3088   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3089   for (proc=0,k=0; proc<size; proc++){
3090     if (!len_s[proc]) continue;
3091     /* form outgoing message for i-structure:
3092          buf_si[0]:                 nrows to be sent
3093                [1:nrows]:           row index (global)
3094                [nrows+1:2*nrows+1]: i-structure index
3095     */
3096     /*-------------------------------------------*/
3097     nrows = len_si[proc]/2 - 1;
3098     buf_si_i    = buf_si + nrows+1;
3099     buf_si[0]   = nrows;
3100     buf_si_i[0] = 0;
3101     nrows = 0;
3102     for (i=owners[proc]; i<owners[proc+1]; i++){
3103       anzi = ai[i+1] - ai[i];
3104       if (anzi) {
3105         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3106         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3107         nrows++;
3108       }
3109     }
3110     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3111     k++;
3112     buf_si += len_si[proc];
3113   }
3114 
3115   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3116   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3117 
3118   ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr);
3119   for (i=0; i<merge->nrecv; i++){
3120     ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr);
3121   }
3122 
3123   ierr = PetscFree(len_si);CHKERRQ(ierr);
3124   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3125   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3126   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3127   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3128   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3129   ierr = PetscFree(status);CHKERRQ(ierr);
3130 
3131   /* compute a local seq matrix in each processor */
3132   /*----------------------------------------------*/
3133   /* allocate bi array and free space for accumulating nonzero column info */
3134   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3135   bi[0] = 0;
3136 
3137   /* create and initialize a linked list */
3138   nlnk = N+1;
3139   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3140 
3141   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3142   len = 0;
3143   len  = ai[owners[rank+1]] - ai[owners[rank]];
3144   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3145   current_space = free_space;
3146 
3147   /* determine symbolic info for each local row */
3148   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3149   nextrow = buf_ri_k + merge->nrecv;
3150   nextai  = nextrow + merge->nrecv;
3151   for (k=0; k<merge->nrecv; k++){
3152     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3153     nrows = *buf_ri_k[k];
3154     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3155     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3156   }
3157 
3158   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3159   len = 0;
3160   for (i=0;i<m;i++) {
3161     bnzi   = 0;
3162     /* add local non-zero cols of this proc's seqmat into lnk */
3163     arow   = owners[rank] + i;
3164     anzi   = ai[arow+1] - ai[arow];
3165     aj     = a->j + ai[arow];
3166     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3167     bnzi += nlnk;
3168     /* add received col data into lnk */
3169     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3170       if (i == *nextrow[k]) { /* i-th row */
3171         anzi = *(nextai[k]+1) - *nextai[k];
3172         aj   = buf_rj[k] + *nextai[k];
3173         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3174         bnzi += nlnk;
3175         nextrow[k]++; nextai[k]++;
3176       }
3177     }
3178     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3179 
3180     /* if free space is not available, make more free space */
3181     if (current_space->local_remaining<bnzi) {
3182       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3183       nspacedouble++;
3184     }
3185     /* copy data into free space, then initialize lnk */
3186     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3187     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3188 
3189     current_space->array           += bnzi;
3190     current_space->local_used      += bnzi;
3191     current_space->local_remaining -= bnzi;
3192 
3193     bi[i+1] = bi[i] + bnzi;
3194   }
3195 
3196   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3197 
3198   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3199   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3200   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3201 
3202   /* create symbolic parallel matrix B_mpi */
3203   /*---------------------------------------*/
3204   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3205   if (n==PETSC_DECIDE) {
3206     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3207   } else {
3208     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3209   }
3210   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3211   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3212   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3213 
3214   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3215   B_mpi->assembled     = PETSC_FALSE;
3216   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3217   merge->bi            = bi;
3218   merge->bj            = bj;
3219   merge->buf_ri        = buf_ri;
3220   merge->buf_rj        = buf_rj;
3221   merge->coi           = PETSC_NULL;
3222   merge->coj           = PETSC_NULL;
3223   merge->owners_co     = PETSC_NULL;
3224 
3225   /* attach the supporting struct to B_mpi for reuse */
3226   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3227   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3228   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3229   *mpimat = B_mpi;
3230 
3231   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3232   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3233   PetscFunctionReturn(0);
3234 }
3235 
3236 static PetscEvent logkey_seqstompi = 0;
3237 #undef __FUNCT__
3238 #define __FUNCT__ "MatMerge_SeqsToMPI"
3239 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3240 {
3241   PetscErrorCode   ierr;
3242 
3243   PetscFunctionBegin;
3244   if (!logkey_seqstompi) {
3245     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3246   }
3247   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3248   if (scall == MAT_INITIAL_MATRIX){
3249     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3250   }
3251   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3252   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3253   PetscFunctionReturn(0);
3254 }
3255 static PetscEvent logkey_getlocalmat = 0;
3256 #undef __FUNCT__
3257 #define __FUNCT__ "MatGetLocalMat"
3258 /*@C
3259      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3260 
3261     Not Collective
3262 
3263    Input Parameters:
3264 +    A - the matrix
3265 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3266 
3267    Output Parameter:
3268 .    A_loc - the local sequential matrix generated
3269 
3270     Level: developer
3271 
3272 @*/
3273 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3274 {
3275   PetscErrorCode  ierr;
3276   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3277   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3278   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3279   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3280   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3281   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3282 
3283   PetscFunctionBegin;
3284   if (!logkey_getlocalmat) {
3285     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3286   }
3287   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3288   if (scall == MAT_INITIAL_MATRIX){
3289     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3290     ci[0] = 0;
3291     for (i=0; i<am; i++){
3292       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3293     }
3294     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3295     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3296     k = 0;
3297     for (i=0; i<am; i++) {
3298       ncols_o = bi[i+1] - bi[i];
3299       ncols_d = ai[i+1] - ai[i];
3300       /* off-diagonal portion of A */
3301       for (jo=0; jo<ncols_o; jo++) {
3302         col = cmap[*bj];
3303         if (col >= cstart) break;
3304         cj[k]   = col; bj++;
3305         ca[k++] = *ba++;
3306       }
3307       /* diagonal portion of A */
3308       for (j=0; j<ncols_d; j++) {
3309         cj[k]   = cstart + *aj++;
3310         ca[k++] = *aa++;
3311       }
3312       /* off-diagonal portion of A */
3313       for (j=jo; j<ncols_o; j++) {
3314         cj[k]   = cmap[*bj++];
3315         ca[k++] = *ba++;
3316       }
3317     }
3318     /* put together the new matrix */
3319     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3320     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3321     /* Since these are PETSc arrays, change flags to free them as necessary. */
3322     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3323     mat->freedata = PETSC_TRUE;
3324     mat->nonew    = 0;
3325   } else if (scall == MAT_REUSE_MATRIX){
3326     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3327     ci = mat->i; cj = mat->j; ca = mat->a;
3328     for (i=0; i<am; i++) {
3329       /* off-diagonal portion of A */
3330       ncols_o = bi[i+1] - bi[i];
3331       for (jo=0; jo<ncols_o; jo++) {
3332         col = cmap[*bj];
3333         if (col >= cstart) break;
3334         *ca++ = *ba++; bj++;
3335       }
3336       /* diagonal portion of A */
3337       ncols_d = ai[i+1] - ai[i];
3338       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3339       /* off-diagonal portion of A */
3340       for (j=jo; j<ncols_o; j++) {
3341         *ca++ = *ba++; bj++;
3342       }
3343     }
3344   } else {
3345     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3346   }
3347 
3348   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 static PetscEvent logkey_getlocalmatcondensed = 0;
3353 #undef __FUNCT__
3354 #define __FUNCT__ "MatGetLocalMatCondensed"
3355 /*@C
3356      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3357 
3358     Not Collective
3359 
3360    Input Parameters:
3361 +    A - the matrix
3362 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3363 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3364 
3365    Output Parameter:
3366 .    A_loc - the local sequential matrix generated
3367 
3368     Level: developer
3369 
3370 @*/
3371 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3372 {
3373   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3374   PetscErrorCode    ierr;
3375   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3376   IS                isrowa,iscola;
3377   Mat               *aloc;
3378 
3379   PetscFunctionBegin;
3380   if (!logkey_getlocalmatcondensed) {
3381     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3382   }
3383   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3384   if (!row){
3385     start = a->rstart; end = a->rend;
3386     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3387   } else {
3388     isrowa = *row;
3389   }
3390   if (!col){
3391     start = a->cstart;
3392     cmap  = a->garray;
3393     nzA   = a->A->n;
3394     nzB   = a->B->n;
3395     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3396     ncols = 0;
3397     for (i=0; i<nzB; i++) {
3398       if (cmap[i] < start) idx[ncols++] = cmap[i];
3399       else break;
3400     }
3401     imark = i;
3402     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3403     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3404     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3405     ierr = PetscFree(idx);CHKERRQ(ierr);
3406   } else {
3407     iscola = *col;
3408   }
3409   if (scall != MAT_INITIAL_MATRIX){
3410     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3411     aloc[0] = *A_loc;
3412   }
3413   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3414   *A_loc = aloc[0];
3415   ierr = PetscFree(aloc);CHKERRQ(ierr);
3416   if (!row){
3417     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3418   }
3419   if (!col){
3420     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3421   }
3422   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3423   PetscFunctionReturn(0);
3424 }
3425 
3426 static PetscEvent logkey_GetBrowsOfAcols = 0;
3427 #undef __FUNCT__
3428 #define __FUNCT__ "MatGetBrowsOfAcols"
3429 /*@C
3430     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3431 
3432     Collective on Mat
3433 
3434    Input Parameters:
3435 +    A,B - the matrices in mpiaij format
3436 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3437 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3438 
3439    Output Parameter:
3440 +    rowb, colb - index sets of rows and columns of B to extract
3441 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3442 -    B_seq - the sequential matrix generated
3443 
3444     Level: developer
3445 
3446 @*/
3447 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3448 {
3449   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3450   PetscErrorCode    ierr;
3451   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3452   IS                isrowb,iscolb;
3453   Mat               *bseq;
3454 
3455   PetscFunctionBegin;
3456   if (a->cstart != b->rstart || a->cend != b->rend){
3457     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3458   }
3459   if (!logkey_GetBrowsOfAcols) {
3460     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3461   }
3462   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3463 
3464   if (scall == MAT_INITIAL_MATRIX){
3465     start = a->cstart;
3466     cmap  = a->garray;
3467     nzA   = a->A->n;
3468     nzB   = a->B->n;
3469     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3470     ncols = 0;
3471     for (i=0; i<nzB; i++) {  /* row < local row index */
3472       if (cmap[i] < start) idx[ncols++] = cmap[i];
3473       else break;
3474     }
3475     imark = i;
3476     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3477     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3478     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3479     ierr = PetscFree(idx);CHKERRQ(ierr);
3480     *brstart = imark;
3481     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3482   } else {
3483     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3484     isrowb = *rowb; iscolb = *colb;
3485     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3486     bseq[0] = *B_seq;
3487   }
3488   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3489   *B_seq = bseq[0];
3490   ierr = PetscFree(bseq);CHKERRQ(ierr);
3491   if (!rowb){
3492     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3493   } else {
3494     *rowb = isrowb;
3495   }
3496   if (!colb){
3497     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3498   } else {
3499     *colb = iscolb;
3500   }
3501   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3502   PetscFunctionReturn(0);
3503 }
3504 
3505 static PetscEvent logkey_GetBrowsOfAocols = 0;
3506 #undef __FUNCT__
3507 #define __FUNCT__ "MatGetBrowsOfAoCols"
3508 /*@C
3509     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3510     of the OFF-DIAGONAL portion of local A
3511 
3512     Collective on Mat
3513 
3514    Input Parameters:
3515 +    A,B - the matrices in mpiaij format
3516 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3517 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3518 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3519 
3520    Output Parameter:
3521 +    B_oth - the sequential matrix generated
3522 
3523     Level: developer
3524 
3525 @*/
3526 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3527 {
3528   VecScatter_MPI_General *gen_to,*gen_from;
3529   PetscErrorCode         ierr;
3530   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3531   Mat_SeqAIJ             *b_oth;
3532   VecScatter             ctx=a->Mvctx;
3533   MPI_Comm               comm=ctx->comm;
3534   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3535   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3536   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3537   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3538   MPI_Request            *rwaits,*swaits;
3539   MPI_Status             *sstatus,rstatus;
3540   PetscInt               *cols;
3541   PetscScalar            *vals;
3542   PetscMPIInt            j;
3543 
3544   PetscFunctionBegin;
3545   if (a->cstart != b->rstart || a->cend != b->rend){
3546     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3547   }
3548   if (!logkey_GetBrowsOfAocols) {
3549     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3550   }
3551   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3552   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3553 
3554   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3555   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3556   rvalues  = gen_from->values; /* holds the length of sending row */
3557   svalues  = gen_to->values;   /* holds the length of receiving row */
3558   nrecvs   = gen_from->n;
3559   nsends   = gen_to->n;
3560   rwaits   = gen_from->requests;
3561   swaits   = gen_to->requests;
3562   srow     = gen_to->indices;   /* local row index to be sent */
3563   rstarts  = gen_from->starts;
3564   sstarts  = gen_to->starts;
3565   rprocs   = gen_from->procs;
3566   sprocs   = gen_to->procs;
3567   sstatus  = gen_to->sstatus;
3568 
3569   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3570   if (scall == MAT_INITIAL_MATRIX){
3571     /* i-array */
3572     /*---------*/
3573     /*  post receives */
3574     for (i=0; i<nrecvs; i++){
3575       rowlen = (PetscInt*)rvalues + rstarts[i];
3576       nrows = rstarts[i+1]-rstarts[i];
3577       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3578     }
3579 
3580     /* pack the outgoing message */
3581     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3582     rstartsj = sstartsj + nsends +1;
3583     sstartsj[0] = 0;  rstartsj[0] = 0;
3584     len = 0; /* total length of j or a array to be sent */
3585     k = 0;
3586     for (i=0; i<nsends; i++){
3587       rowlen = (PetscInt*)svalues + sstarts[i];
3588       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3589       for (j=0; j<nrows; j++) {
3590         row = srow[k] + b->rowners[rank]; /* global row idx */
3591         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3592         len += rowlen[j];
3593         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3594         k++;
3595       }
3596       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3597        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3598     }
3599     /* recvs and sends of i-array are completed */
3600     i = nrecvs;
3601     while (i--) {
3602       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3603     }
3604     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3605     /* allocate buffers for sending j and a arrays */
3606     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3607     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3608 
3609     /* create i-array of B_oth */
3610     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3611     b_othi[0] = 0;
3612     len = 0; /* total length of j or a array to be received */
3613     k = 0;
3614     for (i=0; i<nrecvs; i++){
3615       rowlen = (PetscInt*)rvalues + rstarts[i];
3616       nrows = rstarts[i+1]-rstarts[i];
3617       for (j=0; j<nrows; j++) {
3618         b_othi[k+1] = b_othi[k] + rowlen[j];
3619         len += rowlen[j]; k++;
3620       }
3621       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3622     }
3623 
3624     /* allocate space for j and a arrrays of B_oth */
3625     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3626     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3627 
3628     /* j-array */
3629     /*---------*/
3630     /*  post receives of j-array */
3631     for (i=0; i<nrecvs; i++){
3632       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3633       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3634     }
3635     k = 0;
3636     for (i=0; i<nsends; i++){
3637       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3638       bufJ = bufj+sstartsj[i];
3639       for (j=0; j<nrows; j++) {
3640         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3641         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3642         for (l=0; l<ncols; l++){
3643           *bufJ++ = cols[l];
3644         }
3645         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3646       }
3647       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3648     }
3649 
3650     /* recvs and sends of j-array are completed */
3651     i = nrecvs;
3652     while (i--) {
3653       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3654     }
3655     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3656   } else if (scall == MAT_REUSE_MATRIX){
3657     sstartsj = *startsj;
3658     rstartsj = sstartsj + nsends +1;
3659     bufa     = *bufa_ptr;
3660     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3661     b_otha   = b_oth->a;
3662   } else {
3663     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3664   }
3665 
3666   /* a-array */
3667   /*---------*/
3668   /*  post receives of a-array */
3669   for (i=0; i<nrecvs; i++){
3670     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3671     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3672   }
3673   k = 0;
3674   for (i=0; i<nsends; i++){
3675     nrows = sstarts[i+1]-sstarts[i];
3676     bufA = bufa+sstartsj[i];
3677     for (j=0; j<nrows; j++) {
3678       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3679       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3680       for (l=0; l<ncols; l++){
3681         *bufA++ = vals[l];
3682       }
3683       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3684 
3685     }
3686     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3687   }
3688   /* recvs and sends of a-array are completed */
3689   i = nrecvs;
3690   while (i--) {
3691     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3692   }
3693    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3694 
3695   if (scall == MAT_INITIAL_MATRIX){
3696     /* put together the new matrix */
3697     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3698 
3699     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3700     /* Since these are PETSc arrays, change flags to free them as necessary. */
3701     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3702     b_oth->freedata = PETSC_TRUE;
3703     b_oth->nonew    = 0;
3704 
3705     ierr = PetscFree(bufj);CHKERRQ(ierr);
3706     if (!startsj || !bufa_ptr){
3707       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3708       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3709     } else {
3710       *startsj  = sstartsj;
3711       *bufa_ptr = bufa;
3712     }
3713   }
3714   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3715 
3716   PetscFunctionReturn(0);
3717 }
3718 
3719 /*MC
3720    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3721 
3722    Options Database Keys:
3723 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3724 
3725   Level: beginner
3726 
3727 .seealso: MatCreateMPIAIJ
3728 M*/
3729 
3730 EXTERN_C_BEGIN
3731 #undef __FUNCT__
3732 #define __FUNCT__ "MatCreate_MPIAIJ"
3733 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3734 {
3735   Mat_MPIAIJ     *b;
3736   PetscErrorCode ierr;
3737   PetscInt       i;
3738   PetscMPIInt    size;
3739 
3740   PetscFunctionBegin;
3741   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3742 
3743   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3744   B->data         = (void*)b;
3745   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3746   B->factor       = 0;
3747   B->bs           = 1;
3748   B->assembled    = PETSC_FALSE;
3749   B->mapping      = 0;
3750 
3751   B->insertmode      = NOT_SET_VALUES;
3752   b->size            = size;
3753   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3754 
3755   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3756   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3757 
3758   /* the information in the maps duplicates the information computed below, eventually
3759      we should remove the duplicate information that is not contained in the maps */
3760   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3761   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3762 
3763   /* build local table of row and column ownerships */
3764   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3765   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3766   b->cowners = b->rowners + b->size + 2;
3767   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3768   b->rowners[0] = 0;
3769   for (i=2; i<=b->size; i++) {
3770     b->rowners[i] += b->rowners[i-1];
3771   }
3772   b->rstart = b->rowners[b->rank];
3773   b->rend   = b->rowners[b->rank+1];
3774   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3775   b->cowners[0] = 0;
3776   for (i=2; i<=b->size; i++) {
3777     b->cowners[i] += b->cowners[i-1];
3778   }
3779   b->cstart = b->cowners[b->rank];
3780   b->cend   = b->cowners[b->rank+1];
3781 
3782   /* build cache for off array entries formed */
3783   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3784   b->donotstash  = PETSC_FALSE;
3785   b->colmap      = 0;
3786   b->garray      = 0;
3787   b->roworiented = PETSC_TRUE;
3788 
3789   /* stuff used for matrix vector multiply */
3790   b->lvec      = PETSC_NULL;
3791   b->Mvctx     = PETSC_NULL;
3792 
3793   /* stuff for MatGetRow() */
3794   b->rowindices   = 0;
3795   b->rowvalues    = 0;
3796   b->getrowactive = PETSC_FALSE;
3797 
3798   /* Explicitly create 2 MATSEQAIJ matrices. */
3799   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3800   ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr);
3801   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3802   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3803   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3804   ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr);
3805   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3806   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3807 
3808   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3809                                      "MatStoreValues_MPIAIJ",
3810                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3811   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3812                                      "MatRetrieveValues_MPIAIJ",
3813                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3814   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3815 				     "MatGetDiagonalBlock_MPIAIJ",
3816                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3817   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3818 				     "MatIsTranspose_MPIAIJ",
3819 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3820   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3821 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3822 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3823   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3824 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3825 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3826   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3827 				     "MatDiagonalScaleLocal_MPIAIJ",
3828 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3829   PetscFunctionReturn(0);
3830 }
3831 EXTERN_C_END
3832