xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f251bdbd1d9e8529587f82c093daf4686cd196a5)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (lastcol1 > col) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (lastcol2 > col) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; \
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIAIJ"
107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
110   PetscScalar    value;
111   PetscErrorCode ierr;
112   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
113   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
114   PetscTruth     roworiented = aij->roworiented;
115 
116   /* Some Variables required in the macro */
117   Mat            A = aij->A;
118   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
119   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
120   PetscScalar    *aa = a->a;
121   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
122   Mat            B = aij->B;
123   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
124   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
125   PetscScalar    *ba = b->a;
126 
127   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
128   PetscInt       nonew = a->nonew;
129   PetscScalar    *ap1,*ap2;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (im[i] < 0) continue;
134 #if defined(PETSC_USE_DEBUG)
135     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
136 #endif
137     if (im[i] >= rstart && im[i] < rend) {
138       row      = im[i] - rstart;
139       lastcol1 = -1;
140       rp1      = aj + ai[row];
141       ap1      = aa + ai[row];
142       rmax1    = aimax[row];
143       nrow1    = ailen[row];
144       low1     = 0;
145       high1    = nrow1;
146       lastcol2 = -1;
147       rp2      = bj + bi[row];
148       ap2      = ba + bi[row];
149       rmax2    = bimax[row];
150       nrow2    = bilen[row];
151       low2     = 0;
152       high2    = nrow2;
153 
154       for (j=0; j<n; j++) {
155         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
156         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
157         if (in[j] >= cstart && in[j] < cend){
158           col = in[j] - cstart;
159           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
160         } else if (in[j] < 0) continue;
161 #if defined(PETSC_USE_DEBUG)
162         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
163 #endif
164         else {
165           if (mat->was_assembled) {
166             if (!aij->colmap) {
167               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
168             }
169 #if defined (PETSC_USE_CTABLE)
170             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
171 	    col--;
172 #else
173             col = aij->colmap[in[j]] - 1;
174 #endif
175             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
176               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
177               col =  in[j];
178               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
179               B = aij->B;
180               b = (Mat_SeqAIJ*)B->data;
181               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
182               rp2      = bj + bi[row];
183               ap2      = ba + bi[row];
184               rmax2    = bimax[row];
185               nrow2    = bilen[row];
186               low2     = 0;
187               high2    = nrow2;
188               bm       = aij->B->m;
189               ba = b->a;
190             }
191           } else col = in[j];
192           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
193         }
194       }
195     } else {
196       if (!aij->donotstash) {
197         if (roworiented) {
198           if (ignorezeroentries && v[i*n] == 0.0) continue;
199           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
200         } else {
201           if (ignorezeroentries && v[i] == 0.0) continue;
202           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
203         }
204       }
205     }
206   }
207   PetscFunctionReturn(0);
208 }
209 
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatGetValues_MPIAIJ"
213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
214 {
215   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
216   PetscErrorCode ierr;
217   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
218   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
219 
220   PetscFunctionBegin;
221   for (i=0; i<m; i++) {
222     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
223     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
224     if (idxm[i] >= rstart && idxm[i] < rend) {
225       row = idxm[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
228         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
229         if (idxn[j] >= cstart && idxn[j] < cend){
230           col = idxn[j] - cstart;
231           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
232         } else {
233           if (!aij->colmap) {
234             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
235           }
236 #if defined (PETSC_USE_CTABLE)
237           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
238           col --;
239 #else
240           col = aij->colmap[idxn[j]] - 1;
241 #endif
242           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
243           else {
244             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
245           }
246         }
247       }
248     } else {
249       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
258 {
259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
260   PetscErrorCode ierr;
261   PetscInt       nstash,reallocs;
262   InsertMode     addv;
263 
264   PetscFunctionBegin;
265   if (aij->donotstash) {
266     PetscFunctionReturn(0);
267   }
268 
269   /* make sure all processors are either in INSERTMODE or ADDMODE */
270   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
271   if (addv == (ADD_VALUES|INSERT_VALUES)) {
272     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
273   }
274   mat->insertmode = addv; /* in case this processor had no cache */
275 
276   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
277   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
278   ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
288   PetscErrorCode ierr;
289   PetscMPIInt    n;
290   PetscInt       i,j,rstart,ncols,flg;
291   PetscInt       *row,*col,other_disassembled;
292   PetscScalar    *val;
293   InsertMode     addv = mat->insertmode;
294 
295   PetscFunctionBegin;
296   if (!aij->donotstash) {
297     while (1) {
298       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
299       if (!flg) break;
300 
301       for (i=0; i<n;) {
302         /* Now identify the consecutive vals belonging to the same row */
303         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
304         if (j < n) ncols = j-i;
305         else       ncols = n-i;
306         /* Now assemble all these values with a single function call */
307         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
308         i = j;
309       }
310     }
311     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
312   }
313   a->compressedrow.use     = PETSC_FALSE;
314   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
316 
317   /* determine if any processor has disassembled, if so we must
318      also disassemble ourselfs, in order that we may reassemble. */
319   /*
320      if nonzero structure of submatrix B cannot change then we know that
321      no processor disassembled thus we can skip this stuff
322   */
323   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
324     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
325     if (mat->was_assembled && !other_disassembled) {
326       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
327     }
328   }
329   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
330   b    = (Mat_SeqAIJ *)aij->B->data;
331 
332   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
333     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
334   }
335   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
336   b->compressedrow.use = PETSC_TRUE;
337   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
338   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
339 
340   if (aij->rowvalues) {
341     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
342     aij->rowvalues = 0;
343   }
344 
345   /* used by MatAXPY() */
346   a->xtoy = 0; b->xtoy = 0;
347   a->XtoY = 0; b->XtoY = 0;
348 
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
355 {
356   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
361   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNCT__
366 #define __FUNCT__ "MatZeroRows_MPIAIJ"
367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
368 {
369   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
370   PetscErrorCode ierr;
371   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
372   PetscInt       i,*owners = l->rowners;
373   PetscInt       *nprocs,j,idx,nsends,row;
374   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
375   PetscInt       *rvalues,count,base,slen,*source;
376   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
377   MPI_Comm       comm = A->comm;
378   MPI_Request    *send_waits,*recv_waits;
379   MPI_Status     recv_status,*send_status;
380 #if defined(PETSC_DEBUG)
381   PetscTruth     found = PETSC_FALSE;
382 #endif
383 
384   PetscFunctionBegin;
385   /*  first count number of contributors to each processor */
386   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
387   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
388   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
389   j = 0;
390   for (i=0; i<N; i++) {
391     if (lastidx > (idx = rows[i])) j = 0;
392     lastidx = idx;
393     for (; j<size; j++) {
394       if (idx >= owners[j] && idx < owners[j+1]) {
395         nprocs[2*j]++;
396         nprocs[2*j+1] = 1;
397         owner[i] = j;
398 #if defined(PETSC_DEBUG)
399         found = PETSC_TRUE;
400 #endif
401         break;
402       }
403     }
404 #if defined(PETSC_DEBUG)
405     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
406     found = PETSC_FALSE;
407 #endif
408   }
409   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
410 
411   /* inform other processors of number of messages and max length*/
412   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
413 
414   /* post receives:   */
415   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
416   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
417   for (i=0; i<nrecvs; i++) {
418     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
419   }
420 
421   /* do sends:
422       1) starts[i] gives the starting index in svalues for stuff going to
423          the ith processor
424   */
425   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
426   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
427   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
428   starts[0] = 0;
429   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
430   for (i=0; i<N; i++) {
431     svalues[starts[owner[i]]++] = rows[i];
432   }
433 
434   starts[0] = 0;
435   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
436   count = 0;
437   for (i=0; i<size; i++) {
438     if (nprocs[2*i+1]) {
439       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
440     }
441   }
442   ierr = PetscFree(starts);CHKERRQ(ierr);
443 
444   base = owners[rank];
445 
446   /*  wait on receives */
447   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
448   source = lens + nrecvs;
449   count  = nrecvs; slen = 0;
450   while (count) {
451     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
452     /* unpack receives into our local space */
453     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
454     source[imdex]  = recv_status.MPI_SOURCE;
455     lens[imdex]    = n;
456     slen          += n;
457     count--;
458   }
459   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
460 
461   /* move the data into the send scatter */
462   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
463   count = 0;
464   for (i=0; i<nrecvs; i++) {
465     values = rvalues + i*nmax;
466     for (j=0; j<lens[i]; j++) {
467       lrows[count++] = values[j] - base;
468     }
469   }
470   ierr = PetscFree(rvalues);CHKERRQ(ierr);
471   ierr = PetscFree(lens);CHKERRQ(ierr);
472   ierr = PetscFree(owner);CHKERRQ(ierr);
473   ierr = PetscFree(nprocs);CHKERRQ(ierr);
474 
475   /* actually zap the local rows */
476   /*
477         Zero the required rows. If the "diagonal block" of the matrix
478      is square and the user wishes to set the diagonal we use seperate
479      code so that MatSetValues() is not called for each diagonal allocating
480      new memory, thus calling lots of mallocs and slowing things down.
481 
482        Contributed by: Matthew Knepley
483   */
484   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
485   ierr = MatZeroRows(l->B,slen,lrows,0.0);CHKERRQ(ierr);
486   if ((diag != 0.0) && (l->A->M == l->A->N)) {
487     ierr      = MatZeroRows(l->A,slen,lrows,diag);CHKERRQ(ierr);
488   } else if (diag != 0.0) {
489     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
490     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
491       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
492 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
493     }
494     for (i = 0; i < slen; i++) {
495       row  = lrows[i] + rstart;
496       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
497     }
498     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
499     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
500   } else {
501     ierr = MatZeroRows(l->A,slen,lrows,0.0);CHKERRQ(ierr);
502   }
503   ierr = PetscFree(lrows);CHKERRQ(ierr);
504 
505   /* wait on sends */
506   if (nsends) {
507     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
508     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
509     ierr = PetscFree(send_status);CHKERRQ(ierr);
510   }
511   ierr = PetscFree(send_waits);CHKERRQ(ierr);
512   ierr = PetscFree(svalues);CHKERRQ(ierr);
513 
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNCT__
518 #define __FUNCT__ "MatMult_MPIAIJ"
519 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
520 {
521   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
522   PetscErrorCode ierr;
523   PetscInt       nt;
524 
525   PetscFunctionBegin;
526   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
527   if (nt != A->n) {
528     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
529   }
530   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
531   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
532   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
533   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
534   PetscFunctionReturn(0);
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatMultAdd_MPIAIJ"
539 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
540 {
541   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
542   PetscErrorCode ierr;
543 
544   PetscFunctionBegin;
545   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
546   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
547   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
548   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
554 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
555 {
556   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
557   PetscErrorCode ierr;
558   PetscTruth     merged;
559 
560   PetscFunctionBegin;
561   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
562   /* do nondiagonal part */
563   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
564   if (!merged) {
565     /* send it on its way */
566     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
567     /* do local part */
568     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
569     /* receive remote parts: note this assumes the values are not actually */
570     /* added in yy until the next line, */
571     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
572   } else {
573     /* do local part */
574     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
575     /* send it on its way */
576     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
577     /* values actually were received in the Begin() but we need to call this nop */
578     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
579   }
580   PetscFunctionReturn(0);
581 }
582 
583 EXTERN_C_BEGIN
584 #undef __FUNCT__
585 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
586 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscTruth *f)
587 {
588   MPI_Comm       comm;
589   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
590   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
591   IS             Me,Notme;
592   PetscErrorCode ierr;
593   PetscInt       M,N,first,last,*notme,i;
594   PetscMPIInt    size;
595 
596   PetscFunctionBegin;
597 
598   /* Easy test: symmetric diagonal block */
599   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
600   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
601   if (!*f) PetscFunctionReturn(0);
602   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
603   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
604   if (size == 1) PetscFunctionReturn(0);
605 
606   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
607   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
608   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
609   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
610   for (i=0; i<first; i++) notme[i] = i;
611   for (i=last; i<M; i++) notme[i-last+first] = i;
612   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
613   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
614   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
615   Aoff = Aoffs[0];
616   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
617   Boff = Boffs[0];
618   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
619   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
620   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
621   ierr = ISDestroy(Me);CHKERRQ(ierr);
622   ierr = ISDestroy(Notme);CHKERRQ(ierr);
623 
624   PetscFunctionReturn(0);
625 }
626 EXTERN_C_END
627 
628 #undef __FUNCT__
629 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
630 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
631 {
632   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
633   PetscErrorCode ierr;
634 
635   PetscFunctionBegin;
636   /* do nondiagonal part */
637   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
638   /* send it on its way */
639   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
640   /* do local part */
641   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
642   /* receive remote parts */
643   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
644   PetscFunctionReturn(0);
645 }
646 
647 /*
648   This only works correctly for square matrices where the subblock A->A is the
649    diagonal block
650 */
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
653 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
654 {
655   PetscErrorCode ierr;
656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
660   if (a->rstart != a->cstart || a->rend != a->cend) {
661     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
662   }
663   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatScale_MPIAIJ"
669 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
670 {
671   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
672   PetscErrorCode ierr;
673 
674   PetscFunctionBegin;
675   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
676   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatDestroy_MPIAIJ"
682 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
683 {
684   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
685   PetscErrorCode ierr;
686 
687   PetscFunctionBegin;
688 #if defined(PETSC_USE_LOG)
689   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
690 #endif
691   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
692   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
693   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
694   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
695 #if defined (PETSC_USE_CTABLE)
696   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
697 #else
698   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
699 #endif
700   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
701   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
702   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
703   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
704   ierr = PetscFree(aij);CHKERRQ(ierr);
705 
706   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
707   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
708   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
709   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
710   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
711   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
712   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
713   PetscFunctionReturn(0);
714 }
715 
716 #undef __FUNCT__
717 #define __FUNCT__ "MatView_MPIAIJ_Binary"
718 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
719 {
720   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
721   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
722   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
723   PetscErrorCode    ierr;
724   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
725   int               fd;
726   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
727   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
728   PetscScalar       *column_values;
729 
730   PetscFunctionBegin;
731   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
732   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
733   nz   = A->nz + B->nz;
734   if (!rank) {
735     header[0] = MAT_FILE_COOKIE;
736     header[1] = mat->M;
737     header[2] = mat->N;
738     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
739     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
740     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
741     /* get largest number of rows any processor has */
742     rlen = mat->m;
743     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
744     for (i=1; i<size; i++) {
745       rlen = PetscMax(rlen,range[i+1] - range[i]);
746     }
747   } else {
748     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
749     rlen = mat->m;
750   }
751 
752   /* load up the local row counts */
753   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
754   for (i=0; i<mat->m; i++) {
755     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
756   }
757 
758   /* store the row lengths to the file */
759   if (!rank) {
760     MPI_Status status;
761     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
762     for (i=1; i<size; i++) {
763       rlen = range[i+1] - range[i];
764       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
765       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
766     }
767   } else {
768     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
769   }
770   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
771 
772   /* load up the local column indices */
773   nzmax = nz; /* )th processor needs space a largest processor needs */
774   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
775   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
776   cnt  = 0;
777   for (i=0; i<mat->m; i++) {
778     for (j=B->i[i]; j<B->i[i+1]; j++) {
779       if ( (col = garray[B->j[j]]) > cstart) break;
780       column_indices[cnt++] = col;
781     }
782     for (k=A->i[i]; k<A->i[i+1]; k++) {
783       column_indices[cnt++] = A->j[k] + cstart;
784     }
785     for (; j<B->i[i+1]; j++) {
786       column_indices[cnt++] = garray[B->j[j]];
787     }
788   }
789   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
790 
791   /* store the column indices to the file */
792   if (!rank) {
793     MPI_Status status;
794     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
795     for (i=1; i<size; i++) {
796       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
797       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
798       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
799       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
800     }
801   } else {
802     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
803     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
804   }
805   ierr = PetscFree(column_indices);CHKERRQ(ierr);
806 
807   /* load up the local column values */
808   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
809   cnt  = 0;
810   for (i=0; i<mat->m; i++) {
811     for (j=B->i[i]; j<B->i[i+1]; j++) {
812       if ( garray[B->j[j]] > cstart) break;
813       column_values[cnt++] = B->a[j];
814     }
815     for (k=A->i[i]; k<A->i[i+1]; k++) {
816       column_values[cnt++] = A->a[k];
817     }
818     for (; j<B->i[i+1]; j++) {
819       column_values[cnt++] = B->a[j];
820     }
821   }
822   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
823 
824   /* store the column values to the file */
825   if (!rank) {
826     MPI_Status status;
827     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
828     for (i=1; i<size; i++) {
829       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
830       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
831       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
832       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
833     }
834   } else {
835     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
836     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
837   }
838   ierr = PetscFree(column_values);CHKERRQ(ierr);
839   PetscFunctionReturn(0);
840 }
841 
842 #undef __FUNCT__
843 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
844 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
845 {
846   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
847   PetscErrorCode    ierr;
848   PetscMPIInt       rank = aij->rank,size = aij->size;
849   PetscTruth        isdraw,iascii,isbinary;
850   PetscViewer       sviewer;
851   PetscViewerFormat format;
852 
853   PetscFunctionBegin;
854   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
855   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
856   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
857   if (iascii) {
858     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
859     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
860       MatInfo    info;
861       PetscTruth inodes;
862 
863       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
864       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
865       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
866       if (!inodes) {
867         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
868 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
869       } else {
870         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
871 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
872       }
873       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
874       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
875       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
876       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
877       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
878       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
879       PetscFunctionReturn(0);
880     } else if (format == PETSC_VIEWER_ASCII_INFO) {
881       PetscInt   inodecount,inodelimit,*inodes;
882       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
883       if (inodes) {
884         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
885       } else {
886         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
887       }
888       PetscFunctionReturn(0);
889     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
890       PetscFunctionReturn(0);
891     }
892   } else if (isbinary) {
893     if (size == 1) {
894       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
895       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
896     } else {
897       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
898     }
899     PetscFunctionReturn(0);
900   } else if (isdraw) {
901     PetscDraw  draw;
902     PetscTruth isnull;
903     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
904     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
905   }
906 
907   if (size == 1) {
908     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
909     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
910   } else {
911     /* assemble the entire matrix onto first processor. */
912     Mat         A;
913     Mat_SeqAIJ  *Aloc;
914     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
915     PetscScalar *a;
916 
917     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
918     if (!rank) {
919       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
920     } else {
921       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
922     }
923     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
924     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
925     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
926     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
927 
928     /* copy over the A part */
929     Aloc = (Mat_SeqAIJ*)aij->A->data;
930     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
931     row = aij->rstart;
932     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
933     for (i=0; i<m; i++) {
934       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
935       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
936     }
937     aj = Aloc->j;
938     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
939 
940     /* copy over the B part */
941     Aloc = (Mat_SeqAIJ*)aij->B->data;
942     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
943     row  = aij->rstart;
944     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
945     ct   = cols;
946     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
947     for (i=0; i<m; i++) {
948       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
949       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
950     }
951     ierr = PetscFree(ct);CHKERRQ(ierr);
952     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
953     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
954     /*
955        Everyone has to call to draw the matrix since the graphics waits are
956        synchronized across all processors that share the PetscDraw object
957     */
958     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
959     if (!rank) {
960       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
961       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
962     }
963     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
964     ierr = MatDestroy(A);CHKERRQ(ierr);
965   }
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatView_MPIAIJ"
971 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
972 {
973   PetscErrorCode ierr;
974   PetscTruth     iascii,isdraw,issocket,isbinary;
975 
976   PetscFunctionBegin;
977   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
978   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
979   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
980   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
981   if (iascii || isdraw || isbinary || issocket) {
982     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
983   } else {
984     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
985   }
986   PetscFunctionReturn(0);
987 }
988 
989 
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatRelax_MPIAIJ"
993 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
994 {
995   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
996   PetscErrorCode ierr;
997   Vec            bb1;
998   PetscScalar    mone=-1.0;
999 
1000   PetscFunctionBegin;
1001   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1002 
1003   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1004 
1005   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1006     if (flag & SOR_ZERO_INITIAL_GUESS) {
1007       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1008       its--;
1009     }
1010 
1011     while (its--) {
1012       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1013       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1014 
1015       /* update rhs: bb1 = bb - B*x */
1016       ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr);
1017       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1018 
1019       /* local sweep */
1020       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1021       CHKERRQ(ierr);
1022     }
1023   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1024     if (flag & SOR_ZERO_INITIAL_GUESS) {
1025       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1026       its--;
1027     }
1028     while (its--) {
1029       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1030       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1031 
1032       /* update rhs: bb1 = bb - B*x */
1033       ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr);
1034       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1035 
1036       /* local sweep */
1037       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1038       CHKERRQ(ierr);
1039     }
1040   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1041     if (flag & SOR_ZERO_INITIAL_GUESS) {
1042       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1043       its--;
1044     }
1045     while (its--) {
1046       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1047       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1048 
1049       /* update rhs: bb1 = bb - B*x */
1050       ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr);
1051       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1052 
1053       /* local sweep */
1054       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1055       CHKERRQ(ierr);
1056     }
1057   } else {
1058     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1059   }
1060 
1061   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1067 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1068 {
1069   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1070   Mat            A = mat->A,B = mat->B;
1071   PetscErrorCode ierr;
1072   PetscReal      isend[5],irecv[5];
1073 
1074   PetscFunctionBegin;
1075   info->block_size     = 1.0;
1076   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1077   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1078   isend[3] = info->memory;  isend[4] = info->mallocs;
1079   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1080   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1081   isend[3] += info->memory;  isend[4] += info->mallocs;
1082   if (flag == MAT_LOCAL) {
1083     info->nz_used      = isend[0];
1084     info->nz_allocated = isend[1];
1085     info->nz_unneeded  = isend[2];
1086     info->memory       = isend[3];
1087     info->mallocs      = isend[4];
1088   } else if (flag == MAT_GLOBAL_MAX) {
1089     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1090     info->nz_used      = irecv[0];
1091     info->nz_allocated = irecv[1];
1092     info->nz_unneeded  = irecv[2];
1093     info->memory       = irecv[3];
1094     info->mallocs      = irecv[4];
1095   } else if (flag == MAT_GLOBAL_SUM) {
1096     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1097     info->nz_used      = irecv[0];
1098     info->nz_allocated = irecv[1];
1099     info->nz_unneeded  = irecv[2];
1100     info->memory       = irecv[3];
1101     info->mallocs      = irecv[4];
1102   }
1103   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1104   info->fill_ratio_needed = 0;
1105   info->factor_mallocs    = 0;
1106   info->rows_global       = (double)matin->M;
1107   info->columns_global    = (double)matin->N;
1108   info->rows_local        = (double)matin->m;
1109   info->columns_local     = (double)matin->N;
1110 
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatSetOption_MPIAIJ"
1116 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1117 {
1118   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   switch (op) {
1123   case MAT_NO_NEW_NONZERO_LOCATIONS:
1124   case MAT_YES_NEW_NONZERO_LOCATIONS:
1125   case MAT_COLUMNS_UNSORTED:
1126   case MAT_COLUMNS_SORTED:
1127   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1128   case MAT_KEEP_ZEROED_ROWS:
1129   case MAT_NEW_NONZERO_LOCATION_ERR:
1130   case MAT_USE_INODES:
1131   case MAT_DO_NOT_USE_INODES:
1132   case MAT_IGNORE_ZERO_ENTRIES:
1133     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1134     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1135     break;
1136   case MAT_ROW_ORIENTED:
1137     a->roworiented = PETSC_TRUE;
1138     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1139     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1140     break;
1141   case MAT_ROWS_SORTED:
1142   case MAT_ROWS_UNSORTED:
1143   case MAT_YES_NEW_DIAGONALS:
1144     ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr);
1145     break;
1146   case MAT_COLUMN_ORIENTED:
1147     a->roworiented = PETSC_FALSE;
1148     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1149     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1150     break;
1151   case MAT_IGNORE_OFF_PROC_ENTRIES:
1152     a->donotstash = PETSC_TRUE;
1153     break;
1154   case MAT_NO_NEW_DIAGONALS:
1155     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1156   case MAT_SYMMETRIC:
1157   case MAT_STRUCTURALLY_SYMMETRIC:
1158   case MAT_HERMITIAN:
1159   case MAT_SYMMETRY_ETERNAL:
1160     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1161     break;
1162   case MAT_NOT_SYMMETRIC:
1163   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1164   case MAT_NOT_HERMITIAN:
1165   case MAT_NOT_SYMMETRY_ETERNAL:
1166     break;
1167   default:
1168     SETERRQ(PETSC_ERR_SUP,"unknown option");
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatGetRow_MPIAIJ"
1175 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1176 {
1177   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1178   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1179   PetscErrorCode ierr;
1180   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1181   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1182   PetscInt       *cmap,*idx_p;
1183 
1184   PetscFunctionBegin;
1185   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1186   mat->getrowactive = PETSC_TRUE;
1187 
1188   if (!mat->rowvalues && (idx || v)) {
1189     /*
1190         allocate enough space to hold information from the longest row.
1191     */
1192     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1193     PetscInt     max = 1,tmp;
1194     for (i=0; i<matin->m; i++) {
1195       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1196       if (max < tmp) { max = tmp; }
1197     }
1198     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1199     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1200   }
1201 
1202   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1203   lrow = row - rstart;
1204 
1205   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1206   if (!v)   {pvA = 0; pvB = 0;}
1207   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1208   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1209   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1210   nztot = nzA + nzB;
1211 
1212   cmap  = mat->garray;
1213   if (v  || idx) {
1214     if (nztot) {
1215       /* Sort by increasing column numbers, assuming A and B already sorted */
1216       PetscInt imark = -1;
1217       if (v) {
1218         *v = v_p = mat->rowvalues;
1219         for (i=0; i<nzB; i++) {
1220           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1221           else break;
1222         }
1223         imark = i;
1224         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1225         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1226       }
1227       if (idx) {
1228         *idx = idx_p = mat->rowindices;
1229         if (imark > -1) {
1230           for (i=0; i<imark; i++) {
1231             idx_p[i] = cmap[cworkB[i]];
1232           }
1233         } else {
1234           for (i=0; i<nzB; i++) {
1235             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1236             else break;
1237           }
1238           imark = i;
1239         }
1240         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1241         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1242       }
1243     } else {
1244       if (idx) *idx = 0;
1245       if (v)   *v   = 0;
1246     }
1247   }
1248   *nz = nztot;
1249   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1250   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1256 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1257 {
1258   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1259 
1260   PetscFunctionBegin;
1261   if (!aij->getrowactive) {
1262     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1263   }
1264   aij->getrowactive = PETSC_FALSE;
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatNorm_MPIAIJ"
1270 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1274   PetscErrorCode ierr;
1275   PetscInt       i,j,cstart = aij->cstart;
1276   PetscReal      sum = 0.0;
1277   PetscScalar    *v;
1278 
1279   PetscFunctionBegin;
1280   if (aij->size == 1) {
1281     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1282   } else {
1283     if (type == NORM_FROBENIUS) {
1284       v = amat->a;
1285       for (i=0; i<amat->nz; i++) {
1286 #if defined(PETSC_USE_COMPLEX)
1287         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1288 #else
1289         sum += (*v)*(*v); v++;
1290 #endif
1291       }
1292       v = bmat->a;
1293       for (i=0; i<bmat->nz; i++) {
1294 #if defined(PETSC_USE_COMPLEX)
1295         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1296 #else
1297         sum += (*v)*(*v); v++;
1298 #endif
1299       }
1300       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1301       *norm = sqrt(*norm);
1302     } else if (type == NORM_1) { /* max column norm */
1303       PetscReal *tmp,*tmp2;
1304       PetscInt    *jj,*garray = aij->garray;
1305       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1306       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1307       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1308       *norm = 0.0;
1309       v = amat->a; jj = amat->j;
1310       for (j=0; j<amat->nz; j++) {
1311         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1312       }
1313       v = bmat->a; jj = bmat->j;
1314       for (j=0; j<bmat->nz; j++) {
1315         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1316       }
1317       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1318       for (j=0; j<mat->N; j++) {
1319         if (tmp2[j] > *norm) *norm = tmp2[j];
1320       }
1321       ierr = PetscFree(tmp);CHKERRQ(ierr);
1322       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1323     } else if (type == NORM_INFINITY) { /* max row norm */
1324       PetscReal ntemp = 0.0;
1325       for (j=0; j<aij->A->m; j++) {
1326         v = amat->a + amat->i[j];
1327         sum = 0.0;
1328         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1329           sum += PetscAbsScalar(*v); v++;
1330         }
1331         v = bmat->a + bmat->i[j];
1332         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1333           sum += PetscAbsScalar(*v); v++;
1334         }
1335         if (sum > ntemp) ntemp = sum;
1336       }
1337       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1338     } else {
1339       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1340     }
1341   }
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatTranspose_MPIAIJ"
1347 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1348 {
1349   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1350   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1351   PetscErrorCode ierr;
1352   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1353   Mat            B;
1354   PetscScalar    *array;
1355 
1356   PetscFunctionBegin;
1357   if (!matout && M != N) {
1358     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1359   }
1360 
1361   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1362   ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr);
1363   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1364   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1365 
1366   /* copy over the A part */
1367   Aloc = (Mat_SeqAIJ*)a->A->data;
1368   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1369   row = a->rstart;
1370   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1371   for (i=0; i<m; i++) {
1372     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1373     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1374   }
1375   aj = Aloc->j;
1376   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1377 
1378   /* copy over the B part */
1379   Aloc = (Mat_SeqAIJ*)a->B->data;
1380   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1381   row  = a->rstart;
1382   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1383   ct   = cols;
1384   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1385   for (i=0; i<m; i++) {
1386     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1387     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1388   }
1389   ierr = PetscFree(ct);CHKERRQ(ierr);
1390   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1392   if (matout) {
1393     *matout = B;
1394   } else {
1395     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1396   }
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1402 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1403 {
1404   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1405   Mat            a = aij->A,b = aij->B;
1406   PetscErrorCode ierr;
1407   PetscInt       s1,s2,s3;
1408 
1409   PetscFunctionBegin;
1410   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1411   if (rr) {
1412     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1413     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1414     /* Overlap communication with computation. */
1415     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1416   }
1417   if (ll) {
1418     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1419     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1420     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1421   }
1422   /* scale  the diagonal block */
1423   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1424 
1425   if (rr) {
1426     /* Do a scatter end and then right scale the off-diagonal block */
1427     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1428     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1429   }
1430 
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1437 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1438 {
1439   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1440   PetscErrorCode ierr;
1441 
1442   PetscFunctionBegin;
1443   if (!a->rank) {
1444     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1445   }
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 #undef __FUNCT__
1450 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1451 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1452 {
1453   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1454   PetscErrorCode ierr;
1455 
1456   PetscFunctionBegin;
1457   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1458   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1459   PetscFunctionReturn(0);
1460 }
1461 #undef __FUNCT__
1462 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1463 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1464 {
1465   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1466   PetscErrorCode ierr;
1467 
1468   PetscFunctionBegin;
1469   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatEqual_MPIAIJ"
1475 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1476 {
1477   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1478   Mat            a,b,c,d;
1479   PetscTruth     flg;
1480   PetscErrorCode ierr;
1481 
1482   PetscFunctionBegin;
1483   a = matA->A; b = matA->B;
1484   c = matB->A; d = matB->B;
1485 
1486   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1487   if (flg) {
1488     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1489   }
1490   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatCopy_MPIAIJ"
1496 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1497 {
1498   PetscErrorCode ierr;
1499   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1500   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1501 
1502   PetscFunctionBegin;
1503   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1504   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1505     /* because of the column compression in the off-processor part of the matrix a->B,
1506        the number of columns in a->B and b->B may be different, hence we cannot call
1507        the MatCopy() directly on the two parts. If need be, we can provide a more
1508        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1509        then copying the submatrices */
1510     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1511   } else {
1512     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1513     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1514   }
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1520 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1521 {
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 #include "petscblaslapack.h"
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatAXPY_MPIAIJ"
1532 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1533 {
1534   PetscErrorCode ierr;
1535   PetscInt       i;
1536   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1537   PetscBLASInt   bnz,one=1;
1538   Mat_SeqAIJ     *x,*y;
1539 
1540   PetscFunctionBegin;
1541   if (str == SAME_NONZERO_PATTERN) {
1542     PetscScalar alpha = a;
1543     x = (Mat_SeqAIJ *)xx->A->data;
1544     y = (Mat_SeqAIJ *)yy->A->data;
1545     bnz = (PetscBLASInt)x->nz;
1546     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1547     x = (Mat_SeqAIJ *)xx->B->data;
1548     y = (Mat_SeqAIJ *)yy->B->data;
1549     bnz = (PetscBLASInt)x->nz;
1550     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1551   } else if (str == SUBSET_NONZERO_PATTERN) {
1552     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
1553 
1554     x = (Mat_SeqAIJ *)xx->B->data;
1555     y = (Mat_SeqAIJ *)yy->B->data;
1556     if (y->xtoy && y->XtoY != xx->B) {
1557       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1558       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1559     }
1560     if (!y->xtoy) { /* get xtoy */
1561       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1562       y->XtoY = xx->B;
1563     }
1564     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
1565   } else {
1566     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1572 
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatConjugate_MPIAIJ"
1575 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1576 {
1577 #if defined(PETSC_USE_COMPLEX)
1578   PetscErrorCode ierr;
1579   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1580 
1581   PetscFunctionBegin;
1582   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1583   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1584 #else
1585   PetscFunctionBegin;
1586 #endif
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 /* -------------------------------------------------------------------*/
1591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1592        MatGetRow_MPIAIJ,
1593        MatRestoreRow_MPIAIJ,
1594        MatMult_MPIAIJ,
1595 /* 4*/ MatMultAdd_MPIAIJ,
1596        MatMultTranspose_MPIAIJ,
1597        MatMultTransposeAdd_MPIAIJ,
1598        0,
1599        0,
1600        0,
1601 /*10*/ 0,
1602        0,
1603        0,
1604        MatRelax_MPIAIJ,
1605        MatTranspose_MPIAIJ,
1606 /*15*/ MatGetInfo_MPIAIJ,
1607        MatEqual_MPIAIJ,
1608        MatGetDiagonal_MPIAIJ,
1609        MatDiagonalScale_MPIAIJ,
1610        MatNorm_MPIAIJ,
1611 /*20*/ MatAssemblyBegin_MPIAIJ,
1612        MatAssemblyEnd_MPIAIJ,
1613        0,
1614        MatSetOption_MPIAIJ,
1615        MatZeroEntries_MPIAIJ,
1616 /*25*/ MatZeroRows_MPIAIJ,
1617        0,
1618        0,
1619        0,
1620        0,
1621 /*30*/ MatSetUpPreallocation_MPIAIJ,
1622        0,
1623        0,
1624        0,
1625        0,
1626 /*35*/ MatDuplicate_MPIAIJ,
1627        0,
1628        0,
1629        0,
1630        0,
1631 /*40*/ MatAXPY_MPIAIJ,
1632        MatGetSubMatrices_MPIAIJ,
1633        MatIncreaseOverlap_MPIAIJ,
1634        MatGetValues_MPIAIJ,
1635        MatCopy_MPIAIJ,
1636 /*45*/ MatPrintHelp_MPIAIJ,
1637        MatScale_MPIAIJ,
1638        0,
1639        0,
1640        0,
1641 /*50*/ MatSetBlockSize_MPIAIJ,
1642        0,
1643        0,
1644        0,
1645        0,
1646 /*55*/ MatFDColoringCreate_MPIAIJ,
1647        0,
1648        MatSetUnfactored_MPIAIJ,
1649        0,
1650        0,
1651 /*60*/ MatGetSubMatrix_MPIAIJ,
1652        MatDestroy_MPIAIJ,
1653        MatView_MPIAIJ,
1654        MatGetPetscMaps_Petsc,
1655        0,
1656 /*65*/ 0,
1657        0,
1658        0,
1659        0,
1660        0,
1661 /*70*/ 0,
1662        0,
1663        MatSetColoring_MPIAIJ,
1664 #if defined(PETSC_HAVE_ADIC)
1665        MatSetValuesAdic_MPIAIJ,
1666 #else
1667        0,
1668 #endif
1669        MatSetValuesAdifor_MPIAIJ,
1670 /*75*/ 0,
1671        0,
1672        0,
1673        0,
1674        0,
1675 /*80*/ 0,
1676        0,
1677        0,
1678        0,
1679 /*84*/ MatLoad_MPIAIJ,
1680        0,
1681        0,
1682        0,
1683        0,
1684        0,
1685 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1686        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1687        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1688        MatPtAP_Basic,
1689        MatPtAPSymbolic_MPIAIJ,
1690 /*95*/ MatPtAPNumeric_MPIAIJ,
1691        0,
1692        0,
1693        0,
1694        0,
1695 /*100*/0,
1696        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1697        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1698        MatConjugate_MPIAIJ
1699 };
1700 
1701 /* ----------------------------------------------------------------------------------------*/
1702 
1703 EXTERN_C_BEGIN
1704 #undef __FUNCT__
1705 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1706 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1707 {
1708   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1709   PetscErrorCode ierr;
1710 
1711   PetscFunctionBegin;
1712   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1713   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1714   PetscFunctionReturn(0);
1715 }
1716 EXTERN_C_END
1717 
1718 EXTERN_C_BEGIN
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1721 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1722 {
1723   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1724   PetscErrorCode ierr;
1725 
1726   PetscFunctionBegin;
1727   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1728   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1729   PetscFunctionReturn(0);
1730 }
1731 EXTERN_C_END
1732 
1733 #include "petscpc.h"
1734 EXTERN_C_BEGIN
1735 #undef __FUNCT__
1736 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1737 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1738 {
1739   Mat_MPIAIJ     *b;
1740   PetscErrorCode ierr;
1741   PetscInt       i;
1742 
1743   PetscFunctionBegin;
1744   B->preallocated = PETSC_TRUE;
1745   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1746   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1747   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1748   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1749   if (d_nnz) {
1750     for (i=0; i<B->m; i++) {
1751       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1752     }
1753   }
1754   if (o_nnz) {
1755     for (i=0; i<B->m; i++) {
1756       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1757     }
1758   }
1759   b = (Mat_MPIAIJ*)B->data;
1760   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1761   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1762 
1763   PetscFunctionReturn(0);
1764 }
1765 EXTERN_C_END
1766 
1767 #undef __FUNCT__
1768 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1769 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1770 {
1771   Mat            mat;
1772   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1773   PetscErrorCode ierr;
1774 
1775   PetscFunctionBegin;
1776   *newmat       = 0;
1777   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1778   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1779   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1780   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1781   a    = (Mat_MPIAIJ*)mat->data;
1782 
1783   mat->factor       = matin->factor;
1784   mat->bs           = matin->bs;
1785   mat->assembled    = PETSC_TRUE;
1786   mat->insertmode   = NOT_SET_VALUES;
1787   mat->preallocated = PETSC_TRUE;
1788 
1789   a->rstart       = oldmat->rstart;
1790   a->rend         = oldmat->rend;
1791   a->cstart       = oldmat->cstart;
1792   a->cend         = oldmat->cend;
1793   a->size         = oldmat->size;
1794   a->rank         = oldmat->rank;
1795   a->donotstash   = oldmat->donotstash;
1796   a->roworiented  = oldmat->roworiented;
1797   a->rowindices   = 0;
1798   a->rowvalues    = 0;
1799   a->getrowactive = PETSC_FALSE;
1800 
1801   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1802   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1803   if (oldmat->colmap) {
1804 #if defined (PETSC_USE_CTABLE)
1805     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1806 #else
1807     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1808     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1809     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1810 #endif
1811   } else a->colmap = 0;
1812   if (oldmat->garray) {
1813     PetscInt len;
1814     len  = oldmat->B->n;
1815     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1816     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1817     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1818   } else a->garray = 0;
1819 
1820   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1821   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1822   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1823   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1824   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1825   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1826   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1827   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1828   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1829   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1830   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1831   *newmat = mat;
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 #include "petscsys.h"
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatLoad_MPIAIJ"
1839 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1840 {
1841   Mat            A;
1842   PetscScalar    *vals,*svals;
1843   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1844   MPI_Status     status;
1845   PetscErrorCode ierr;
1846   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1847   PetscInt       i,nz,j,rstart,rend,mmax;
1848   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1849   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1850   PetscInt       cend,cstart,n,*rowners;
1851   int            fd;
1852 
1853   PetscFunctionBegin;
1854   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1855   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1856   if (!rank) {
1857     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1858     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1859     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1860   }
1861 
1862   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1863   M = header[1]; N = header[2];
1864   /* determine ownership of all rows */
1865   m    = M/size + ((M % size) > rank);
1866   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1867   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1868 
1869   /* First process needs enough room for process with most rows */
1870   if (!rank) {
1871     mmax       = rowners[1];
1872     for (i=2; i<size; i++) {
1873       mmax = PetscMax(mmax,rowners[i]);
1874     }
1875   } else mmax = m;
1876 
1877   rowners[0] = 0;
1878   for (i=2; i<=size; i++) {
1879     mmax       = PetscMax(mmax,rowners[i]);
1880     rowners[i] += rowners[i-1];
1881   }
1882   rstart = rowners[rank];
1883   rend   = rowners[rank+1];
1884 
1885   /* distribute row lengths to all processors */
1886   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1887   if (!rank) {
1888     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1889     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1890     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1891     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1892     for (j=0; j<m; j++) {
1893       procsnz[0] += ourlens[j];
1894     }
1895     for (i=1; i<size; i++) {
1896       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1897       /* calculate the number of nonzeros on each processor */
1898       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1899         procsnz[i] += rowlengths[j];
1900       }
1901       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1902     }
1903     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1904   } else {
1905     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1906   }
1907 
1908   if (!rank) {
1909     /* determine max buffer needed and allocate it */
1910     maxnz = 0;
1911     for (i=0; i<size; i++) {
1912       maxnz = PetscMax(maxnz,procsnz[i]);
1913     }
1914     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1915 
1916     /* read in my part of the matrix column indices  */
1917     nz   = procsnz[0];
1918     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1919     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1920 
1921     /* read in every one elses and ship off */
1922     for (i=1; i<size; i++) {
1923       nz   = procsnz[i];
1924       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1925       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1926     }
1927     ierr = PetscFree(cols);CHKERRQ(ierr);
1928   } else {
1929     /* determine buffer space needed for message */
1930     nz = 0;
1931     for (i=0; i<m; i++) {
1932       nz += ourlens[i];
1933     }
1934     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1935 
1936     /* receive message of column indices*/
1937     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1938     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1939     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1940   }
1941 
1942   /* determine column ownership if matrix is not square */
1943   if (N != M) {
1944     n      = N/size + ((N % size) > rank);
1945     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1946     cstart = cend - n;
1947   } else {
1948     cstart = rstart;
1949     cend   = rend;
1950     n      = cend - cstart;
1951   }
1952 
1953   /* loop over local rows, determining number of off diagonal entries */
1954   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1955   jj = 0;
1956   for (i=0; i<m; i++) {
1957     for (j=0; j<ourlens[i]; j++) {
1958       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1959       jj++;
1960     }
1961   }
1962 
1963   /* create our matrix */
1964   for (i=0; i<m; i++) {
1965     ourlens[i] -= offlens[i];
1966   }
1967   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
1968   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
1969   ierr = MatSetType(A,type);CHKERRQ(ierr);
1970   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
1971 
1972   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1973   for (i=0; i<m; i++) {
1974     ourlens[i] += offlens[i];
1975   }
1976 
1977   if (!rank) {
1978     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1979 
1980     /* read in my part of the matrix numerical values  */
1981     nz   = procsnz[0];
1982     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1983 
1984     /* insert into matrix */
1985     jj      = rstart;
1986     smycols = mycols;
1987     svals   = vals;
1988     for (i=0; i<m; i++) {
1989       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1990       smycols += ourlens[i];
1991       svals   += ourlens[i];
1992       jj++;
1993     }
1994 
1995     /* read in other processors and ship out */
1996     for (i=1; i<size; i++) {
1997       nz   = procsnz[i];
1998       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1999       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2000     }
2001     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2002   } else {
2003     /* receive numeric values */
2004     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2005 
2006     /* receive message of values*/
2007     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2008     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2009     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2010 
2011     /* insert into matrix */
2012     jj      = rstart;
2013     smycols = mycols;
2014     svals   = vals;
2015     for (i=0; i<m; i++) {
2016       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2017       smycols += ourlens[i];
2018       svals   += ourlens[i];
2019       jj++;
2020     }
2021   }
2022   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2023   ierr = PetscFree(vals);CHKERRQ(ierr);
2024   ierr = PetscFree(mycols);CHKERRQ(ierr);
2025   ierr = PetscFree(rowners);CHKERRQ(ierr);
2026 
2027   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2028   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2029   *newmat = A;
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2035 /*
2036     Not great since it makes two copies of the submatrix, first an SeqAIJ
2037   in local and then by concatenating the local matrices the end result.
2038   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2039 */
2040 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2041 {
2042   PetscErrorCode ierr;
2043   PetscMPIInt    rank,size;
2044   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2045   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2046   Mat            *local,M,Mreuse;
2047   PetscScalar    *vwork,*aa;
2048   MPI_Comm       comm = mat->comm;
2049   Mat_SeqAIJ     *aij;
2050 
2051 
2052   PetscFunctionBegin;
2053   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2054   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2055 
2056   if (call ==  MAT_REUSE_MATRIX) {
2057     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2058     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2059     local = &Mreuse;
2060     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2061   } else {
2062     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2063     Mreuse = *local;
2064     ierr   = PetscFree(local);CHKERRQ(ierr);
2065   }
2066 
2067   /*
2068       m - number of local rows
2069       n - number of columns (same on all processors)
2070       rstart - first row in new global matrix generated
2071   */
2072   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2073   if (call == MAT_INITIAL_MATRIX) {
2074     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2075     ii  = aij->i;
2076     jj  = aij->j;
2077 
2078     /*
2079         Determine the number of non-zeros in the diagonal and off-diagonal
2080         portions of the matrix in order to do correct preallocation
2081     */
2082 
2083     /* first get start and end of "diagonal" columns */
2084     if (csize == PETSC_DECIDE) {
2085       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2086       if (mglobal == n) { /* square matrix */
2087 	nlocal = m;
2088       } else {
2089         nlocal = n/size + ((n % size) > rank);
2090       }
2091     } else {
2092       nlocal = csize;
2093     }
2094     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2095     rstart = rend - nlocal;
2096     if (rank == size - 1 && rend != n) {
2097       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2098     }
2099 
2100     /* next, compute all the lengths */
2101     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2102     olens = dlens + m;
2103     for (i=0; i<m; i++) {
2104       jend = ii[i+1] - ii[i];
2105       olen = 0;
2106       dlen = 0;
2107       for (j=0; j<jend; j++) {
2108         if (*jj < rstart || *jj >= rend) olen++;
2109         else dlen++;
2110         jj++;
2111       }
2112       olens[i] = olen;
2113       dlens[i] = dlen;
2114     }
2115     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2116     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2117     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2118     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2119     ierr = PetscFree(dlens);CHKERRQ(ierr);
2120   } else {
2121     PetscInt ml,nl;
2122 
2123     M = *newmat;
2124     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2125     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2126     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2127     /*
2128          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2129        rather than the slower MatSetValues().
2130     */
2131     M->was_assembled = PETSC_TRUE;
2132     M->assembled     = PETSC_FALSE;
2133   }
2134   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2135   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2136   ii  = aij->i;
2137   jj  = aij->j;
2138   aa  = aij->a;
2139   for (i=0; i<m; i++) {
2140     row   = rstart + i;
2141     nz    = ii[i+1] - ii[i];
2142     cwork = jj;     jj += nz;
2143     vwork = aa;     aa += nz;
2144     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2145   }
2146 
2147   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2148   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2149   *newmat = M;
2150 
2151   /* save submatrix used in processor for next request */
2152   if (call ==  MAT_INITIAL_MATRIX) {
2153     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2154     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2155   }
2156 
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 EXTERN_C_BEGIN
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2163 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2164 {
2165   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2166   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2167   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2168   const PetscInt *JJ;
2169   PetscScalar    *values;
2170   PetscErrorCode ierr;
2171 
2172   PetscFunctionBegin;
2173 #if defined(PETSC_OPT_g)
2174   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2175 #endif
2176   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2177   o_nnz = d_nnz + m;
2178 
2179   for (i=0; i<m; i++) {
2180     nnz     = I[i+1]- I[i];
2181     JJ      = J + I[i];
2182     nnz_max = PetscMax(nnz_max,nnz);
2183 #if defined(PETSC_OPT_g)
2184     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2185 #endif
2186     for (j=0; j<nnz; j++) {
2187       if (*JJ >= cstart) break;
2188       JJ++;
2189     }
2190     d = 0;
2191     for (; j<nnz; j++) {
2192       if (*JJ++ >= cend) break;
2193       d++;
2194     }
2195     d_nnz[i] = d;
2196     o_nnz[i] = nnz - d;
2197   }
2198   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2199   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2200 
2201   if (v) values = (PetscScalar*)v;
2202   else {
2203     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2204     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2205   }
2206 
2207   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2208   for (i=0; i<m; i++) {
2209     ii   = i + rstart;
2210     nnz  = I[i+1]- I[i];
2211     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2212   }
2213   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2214   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2215   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2216 
2217   if (!v) {
2218     ierr = PetscFree(values);CHKERRQ(ierr);
2219   }
2220   PetscFunctionReturn(0);
2221 }
2222 EXTERN_C_END
2223 
2224 #undef __FUNCT__
2225 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2226 /*@C
2227    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2228    (the default parallel PETSc format).
2229 
2230    Collective on MPI_Comm
2231 
2232    Input Parameters:
2233 +  A - the matrix
2234 .  i - the indices into j for the start of each local row (starts with zero)
2235 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2236 -  v - optional values in the matrix
2237 
2238    Level: developer
2239 
2240 .keywords: matrix, aij, compressed row, sparse, parallel
2241 
2242 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2243 @*/
2244 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2245 {
2246   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2247 
2248   PetscFunctionBegin;
2249   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2250   if (f) {
2251     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2252   }
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2258 /*@C
2259    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2260    (the default parallel PETSc format).  For good matrix assembly performance
2261    the user should preallocate the matrix storage by setting the parameters
2262    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2263    performance can be increased by more than a factor of 50.
2264 
2265    Collective on MPI_Comm
2266 
2267    Input Parameters:
2268 +  A - the matrix
2269 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2270            (same value is used for all local rows)
2271 .  d_nnz - array containing the number of nonzeros in the various rows of the
2272            DIAGONAL portion of the local submatrix (possibly different for each row)
2273            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2274            The size of this array is equal to the number of local rows, i.e 'm'.
2275            You must leave room for the diagonal entry even if it is zero.
2276 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2277            submatrix (same value is used for all local rows).
2278 -  o_nnz - array containing the number of nonzeros in the various rows of the
2279            OFF-DIAGONAL portion of the local submatrix (possibly different for
2280            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2281            structure. The size of this array is equal to the number
2282            of local rows, i.e 'm'.
2283 
2284    If the *_nnz parameter is given then the *_nz parameter is ignored
2285 
2286    The AIJ format (also called the Yale sparse matrix format or
2287    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2288    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2289 
2290    The parallel matrix is partitioned such that the first m0 rows belong to
2291    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2292    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2293 
2294    The DIAGONAL portion of the local submatrix of a processor can be defined
2295    as the submatrix which is obtained by extraction the part corresponding
2296    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2297    first row that belongs to the processor, and r2 is the last row belonging
2298    to the this processor. This is a square mxm matrix. The remaining portion
2299    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2300 
2301    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2302 
2303    Example usage:
2304 
2305    Consider the following 8x8 matrix with 34 non-zero values, that is
2306    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2307    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2308    as follows:
2309 
2310 .vb
2311             1  2  0  |  0  3  0  |  0  4
2312     Proc0   0  5  6  |  7  0  0  |  8  0
2313             9  0 10  | 11  0  0  | 12  0
2314     -------------------------------------
2315            13  0 14  | 15 16 17  |  0  0
2316     Proc1   0 18  0  | 19 20 21  |  0  0
2317             0  0  0  | 22 23  0  | 24  0
2318     -------------------------------------
2319     Proc2  25 26 27  |  0  0 28  | 29  0
2320            30  0  0  | 31 32 33  |  0 34
2321 .ve
2322 
2323    This can be represented as a collection of submatrices as:
2324 
2325 .vb
2326       A B C
2327       D E F
2328       G H I
2329 .ve
2330 
2331    Where the submatrices A,B,C are owned by proc0, D,E,F are
2332    owned by proc1, G,H,I are owned by proc2.
2333 
2334    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2335    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2336    The 'M','N' parameters are 8,8, and have the same values on all procs.
2337 
2338    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2339    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2340    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2341    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2342    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2343    matrix, ans [DF] as another SeqAIJ matrix.
2344 
2345    When d_nz, o_nz parameters are specified, d_nz storage elements are
2346    allocated for every row of the local diagonal submatrix, and o_nz
2347    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2348    One way to choose d_nz and o_nz is to use the max nonzerors per local
2349    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2350    In this case, the values of d_nz,o_nz are:
2351 .vb
2352      proc0 : dnz = 2, o_nz = 2
2353      proc1 : dnz = 3, o_nz = 2
2354      proc2 : dnz = 1, o_nz = 4
2355 .ve
2356    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2357    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2358    for proc3. i.e we are using 12+15+10=37 storage locations to store
2359    34 values.
2360 
2361    When d_nnz, o_nnz parameters are specified, the storage is specified
2362    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2363    In the above case the values for d_nnz,o_nnz are:
2364 .vb
2365      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2366      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2367      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2368 .ve
2369    Here the space allocated is sum of all the above values i.e 34, and
2370    hence pre-allocation is perfect.
2371 
2372    Level: intermediate
2373 
2374 .keywords: matrix, aij, compressed row, sparse, parallel
2375 
2376 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2377           MPIAIJ
2378 @*/
2379 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2380 {
2381   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2382 
2383   PetscFunctionBegin;
2384   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2385   if (f) {
2386     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2387   }
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatCreateMPIAIJ"
2393 /*@C
2394    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2395    (the default parallel PETSc format).  For good matrix assembly performance
2396    the user should preallocate the matrix storage by setting the parameters
2397    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2398    performance can be increased by more than a factor of 50.
2399 
2400    Collective on MPI_Comm
2401 
2402    Input Parameters:
2403 +  comm - MPI communicator
2404 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2405            This value should be the same as the local size used in creating the
2406            y vector for the matrix-vector product y = Ax.
2407 .  n - This value should be the same as the local size used in creating the
2408        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2409        calculated if N is given) For square matrices n is almost always m.
2410 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2411 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2412 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2413            (same value is used for all local rows)
2414 .  d_nnz - array containing the number of nonzeros in the various rows of the
2415            DIAGONAL portion of the local submatrix (possibly different for each row)
2416            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2417            The size of this array is equal to the number of local rows, i.e 'm'.
2418            You must leave room for the diagonal entry even if it is zero.
2419 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2420            submatrix (same value is used for all local rows).
2421 -  o_nnz - array containing the number of nonzeros in the various rows of the
2422            OFF-DIAGONAL portion of the local submatrix (possibly different for
2423            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2424            structure. The size of this array is equal to the number
2425            of local rows, i.e 'm'.
2426 
2427    Output Parameter:
2428 .  A - the matrix
2429 
2430    Notes:
2431    If the *_nnz parameter is given then the *_nz parameter is ignored
2432 
2433    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2434    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2435    storage requirements for this matrix.
2436 
2437    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2438    processor than it must be used on all processors that share the object for
2439    that argument.
2440 
2441    The user MUST specify either the local or global matrix dimensions
2442    (possibly both).
2443 
2444    The parallel matrix is partitioned such that the first m0 rows belong to
2445    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2446    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2447 
2448    The DIAGONAL portion of the local submatrix of a processor can be defined
2449    as the submatrix which is obtained by extraction the part corresponding
2450    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2451    first row that belongs to the processor, and r2 is the last row belonging
2452    to the this processor. This is a square mxm matrix. The remaining portion
2453    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2454 
2455    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2456 
2457    When calling this routine with a single process communicator, a matrix of
2458    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2459    type of communicator, use the construction mechanism:
2460      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2461 
2462    By default, this format uses inodes (identical nodes) when possible.
2463    We search for consecutive rows with the same nonzero structure, thereby
2464    reusing matrix information to achieve increased efficiency.
2465 
2466    Options Database Keys:
2467 +  -mat_no_inode  - Do not use inodes
2468 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2469 -  -mat_aij_oneindex - Internally use indexing starting at 1
2470         rather than 0.  Note that when calling MatSetValues(),
2471         the user still MUST index entries starting at 0!
2472 
2473 
2474    Example usage:
2475 
2476    Consider the following 8x8 matrix with 34 non-zero values, that is
2477    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2478    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2479    as follows:
2480 
2481 .vb
2482             1  2  0  |  0  3  0  |  0  4
2483     Proc0   0  5  6  |  7  0  0  |  8  0
2484             9  0 10  | 11  0  0  | 12  0
2485     -------------------------------------
2486            13  0 14  | 15 16 17  |  0  0
2487     Proc1   0 18  0  | 19 20 21  |  0  0
2488             0  0  0  | 22 23  0  | 24  0
2489     -------------------------------------
2490     Proc2  25 26 27  |  0  0 28  | 29  0
2491            30  0  0  | 31 32 33  |  0 34
2492 .ve
2493 
2494    This can be represented as a collection of submatrices as:
2495 
2496 .vb
2497       A B C
2498       D E F
2499       G H I
2500 .ve
2501 
2502    Where the submatrices A,B,C are owned by proc0, D,E,F are
2503    owned by proc1, G,H,I are owned by proc2.
2504 
2505    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2506    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2507    The 'M','N' parameters are 8,8, and have the same values on all procs.
2508 
2509    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2510    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2511    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2512    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2513    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2514    matrix, ans [DF] as another SeqAIJ matrix.
2515 
2516    When d_nz, o_nz parameters are specified, d_nz storage elements are
2517    allocated for every row of the local diagonal submatrix, and o_nz
2518    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2519    One way to choose d_nz and o_nz is to use the max nonzerors per local
2520    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2521    In this case, the values of d_nz,o_nz are:
2522 .vb
2523      proc0 : dnz = 2, o_nz = 2
2524      proc1 : dnz = 3, o_nz = 2
2525      proc2 : dnz = 1, o_nz = 4
2526 .ve
2527    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2528    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2529    for proc3. i.e we are using 12+15+10=37 storage locations to store
2530    34 values.
2531 
2532    When d_nnz, o_nnz parameters are specified, the storage is specified
2533    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2534    In the above case the values for d_nnz,o_nnz are:
2535 .vb
2536      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2537      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2538      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2539 .ve
2540    Here the space allocated is sum of all the above values i.e 34, and
2541    hence pre-allocation is perfect.
2542 
2543    Level: intermediate
2544 
2545 .keywords: matrix, aij, compressed row, sparse, parallel
2546 
2547 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2548           MPIAIJ
2549 @*/
2550 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2551 {
2552   PetscErrorCode ierr;
2553   PetscMPIInt    size;
2554 
2555   PetscFunctionBegin;
2556   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2557   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2558   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2559   if (size > 1) {
2560     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2561     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2562   } else {
2563     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2564     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2565   }
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 #undef __FUNCT__
2570 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2571 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2572 {
2573   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2574 
2575   PetscFunctionBegin;
2576   *Ad     = a->A;
2577   *Ao     = a->B;
2578   *colmap = a->garray;
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 #undef __FUNCT__
2583 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2584 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2585 {
2586   PetscErrorCode ierr;
2587   PetscInt       i;
2588   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2589 
2590   PetscFunctionBegin;
2591   if (coloring->ctype == IS_COLORING_LOCAL) {
2592     ISColoringValue *allcolors,*colors;
2593     ISColoring      ocoloring;
2594 
2595     /* set coloring for diagonal portion */
2596     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2597 
2598     /* set coloring for off-diagonal portion */
2599     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2600     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2601     for (i=0; i<a->B->n; i++) {
2602       colors[i] = allcolors[a->garray[i]];
2603     }
2604     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2605     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2606     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2607     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2608   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2609     ISColoringValue *colors;
2610     PetscInt             *larray;
2611     ISColoring      ocoloring;
2612 
2613     /* set coloring for diagonal portion */
2614     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2615     for (i=0; i<a->A->n; i++) {
2616       larray[i] = i + a->cstart;
2617     }
2618     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2619     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2620     for (i=0; i<a->A->n; i++) {
2621       colors[i] = coloring->colors[larray[i]];
2622     }
2623     ierr = PetscFree(larray);CHKERRQ(ierr);
2624     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2625     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2626     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2627 
2628     /* set coloring for off-diagonal portion */
2629     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2630     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2631     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2632     for (i=0; i<a->B->n; i++) {
2633       colors[i] = coloring->colors[larray[i]];
2634     }
2635     ierr = PetscFree(larray);CHKERRQ(ierr);
2636     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2637     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2638     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2639   } else {
2640     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2641   }
2642 
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 #if defined(PETSC_HAVE_ADIC)
2647 #undef __FUNCT__
2648 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2649 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2650 {
2651   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2652   PetscErrorCode ierr;
2653 
2654   PetscFunctionBegin;
2655   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2656   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2657   PetscFunctionReturn(0);
2658 }
2659 #endif
2660 
2661 #undef __FUNCT__
2662 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2663 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2664 {
2665   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2666   PetscErrorCode ierr;
2667 
2668   PetscFunctionBegin;
2669   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2670   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 #undef __FUNCT__
2675 #define __FUNCT__ "MatMerge"
2676 /*@C
2677       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2678                  matrices from each processor
2679 
2680     Collective on MPI_Comm
2681 
2682    Input Parameters:
2683 +    comm - the communicators the parallel matrix will live on
2684 .    inmat - the input sequential matrices
2685 .    n - number of local columns (or PETSC_DECIDE)
2686 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2687 
2688    Output Parameter:
2689 .    outmat - the parallel matrix generated
2690 
2691     Level: advanced
2692 
2693    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2694 
2695 @*/
2696 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2697 {
2698   PetscErrorCode ierr;
2699   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2700   PetscInt       *indx;
2701   PetscScalar    *values;
2702   PetscMap       columnmap,rowmap;
2703 
2704   PetscFunctionBegin;
2705     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2706   /*
2707   PetscMPIInt       rank;
2708   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2709   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2710   */
2711   if (scall == MAT_INITIAL_MATRIX){
2712     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2713     if (n == PETSC_DECIDE){
2714       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2715       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2716       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2717       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2718       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2719     }
2720 
2721     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2722     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2723     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2724     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2725     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2726 
2727     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2728     for (i=0;i<m;i++) {
2729       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2730       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2731       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2732     }
2733     /* This routine will ONLY return MPIAIJ type matrix */
2734     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2735     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2736     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2737     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2738     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2739 
2740   } else if (scall == MAT_REUSE_MATRIX){
2741     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2742   } else {
2743     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2744   }
2745 
2746   for (i=0;i<m;i++) {
2747     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2748     I    = i + rstart;
2749     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2750     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2751   }
2752   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2753   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2754   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2755 
2756   PetscFunctionReturn(0);
2757 }
2758 
2759 #undef __FUNCT__
2760 #define __FUNCT__ "MatFileSplit"
2761 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2762 {
2763   PetscErrorCode    ierr;
2764   PetscMPIInt       rank;
2765   PetscInt          m,N,i,rstart,nnz;
2766   size_t            len;
2767   const PetscInt    *indx;
2768   PetscViewer       out;
2769   char              *name;
2770   Mat               B;
2771   const PetscScalar *values;
2772 
2773   PetscFunctionBegin;
2774   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2775   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2776   /* Should this be the type of the diagonal block of A? */
2777   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2778   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2779   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2780   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2781   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2782   for (i=0;i<m;i++) {
2783     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2784     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2785     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2786   }
2787   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2788   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2789 
2790   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2791   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2792   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2793   sprintf(name,"%s.%d",outfile,rank);
2794   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2795   ierr = PetscFree(name);
2796   ierr = MatView(B,out);CHKERRQ(ierr);
2797   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2798   ierr = MatDestroy(B);CHKERRQ(ierr);
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2805 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2806 {
2807   PetscErrorCode       ierr;
2808   Mat_Merge_SeqsToMPI  *merge;
2809   PetscObjectContainer container;
2810 
2811   PetscFunctionBegin;
2812   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2813   if (container) {
2814     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2815     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2816     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2817     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2818     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2819     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2820     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2821     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2822     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2823     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2824     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2825     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2826 
2827     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2828     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2829   }
2830   ierr = PetscFree(merge);CHKERRQ(ierr);
2831 
2832   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 
2836 #include "src/mat/utils/freespace.h"
2837 #include "petscbt.h"
2838 static PetscEvent logkey_seqstompinum = 0;
2839 #undef __FUNCT__
2840 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2841 /*@C
2842       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2843                  matrices from each processor
2844 
2845     Collective on MPI_Comm
2846 
2847    Input Parameters:
2848 +    comm - the communicators the parallel matrix will live on
2849 .    seqmat - the input sequential matrices
2850 .    m - number of local rows (or PETSC_DECIDE)
2851 .    n - number of local columns (or PETSC_DECIDE)
2852 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2853 
2854    Output Parameter:
2855 .    mpimat - the parallel matrix generated
2856 
2857     Level: advanced
2858 
2859    Notes:
2860      The dimensions of the sequential matrix in each processor MUST be the same.
2861      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2862      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2863 @*/
2864 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2865 {
2866   PetscErrorCode       ierr;
2867   MPI_Comm             comm=mpimat->comm;
2868   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2869   PetscMPIInt          size,rank,taga,*len_s;
2870   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2871   PetscInt             proc,m;
2872   PetscInt             **buf_ri,**buf_rj;
2873   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2874   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2875   MPI_Request          *s_waits,*r_waits;
2876   MPI_Status           *status;
2877   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2878   Mat_Merge_SeqsToMPI  *merge;
2879   PetscObjectContainer container;
2880 
2881   PetscFunctionBegin;
2882   if (!logkey_seqstompinum) {
2883     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2884   }
2885   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2886 
2887   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2888   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2889 
2890   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2891   if (container) {
2892     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2893   }
2894   bi     = merge->bi;
2895   bj     = merge->bj;
2896   buf_ri = merge->buf_ri;
2897   buf_rj = merge->buf_rj;
2898 
2899   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2900   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2901   len_s  = merge->len_s;
2902 
2903   /* send and recv matrix values */
2904   /*-----------------------------*/
2905   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2906   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2907 
2908   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2909   for (proc=0,k=0; proc<size; proc++){
2910     if (!len_s[proc]) continue;
2911     i = owners[proc];
2912     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2913     k++;
2914   }
2915 
2916   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
2917   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
2918   ierr = PetscFree(status);CHKERRQ(ierr);
2919 
2920   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2921   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2922 
2923   /* insert mat values of mpimat */
2924   /*----------------------------*/
2925   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2926   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2927   nextrow = buf_ri_k + merge->nrecv;
2928   nextai  = nextrow + merge->nrecv;
2929 
2930   for (k=0; k<merge->nrecv; k++){
2931     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2932     nrows = *(buf_ri_k[k]);
2933     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2934     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2935   }
2936 
2937   /* set values of ba */
2938   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2939   for (i=0; i<m; i++) {
2940     arow = owners[rank] + i;
2941     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2942     bnzi = bi[i+1] - bi[i];
2943     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2944 
2945     /* add local non-zero vals of this proc's seqmat into ba */
2946     anzi = ai[arow+1] - ai[arow];
2947     aj   = a->j + ai[arow];
2948     aa   = a->a + ai[arow];
2949     nextaj = 0;
2950     for (j=0; nextaj<anzi; j++){
2951       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2952         ba_i[j] += aa[nextaj++];
2953       }
2954     }
2955 
2956     /* add received vals into ba */
2957     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2958       /* i-th row */
2959       if (i == *nextrow[k]) {
2960         anzi = *(nextai[k]+1) - *nextai[k];
2961         aj   = buf_rj[k] + *(nextai[k]);
2962         aa   = abuf_r[k] + *(nextai[k]);
2963         nextaj = 0;
2964         for (j=0; nextaj<anzi; j++){
2965           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2966             ba_i[j] += aa[nextaj++];
2967           }
2968         }
2969         nextrow[k]++; nextai[k]++;
2970       }
2971     }
2972     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2973   }
2974   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2976 
2977   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
2978   ierr = PetscFree(ba_i);CHKERRQ(ierr);
2979   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
2980   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 static PetscEvent logkey_seqstompisym = 0;
2985 #undef __FUNCT__
2986 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
2987 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
2988 {
2989   PetscErrorCode       ierr;
2990   Mat                  B_mpi;
2991   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2992   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
2993   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
2994   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
2995   PetscInt             len,proc,*dnz,*onz;
2996   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
2997   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
2998   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
2999   MPI_Status           *status;
3000   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3001   PetscBT              lnkbt;
3002   Mat_Merge_SeqsToMPI  *merge;
3003   PetscObjectContainer container;
3004 
3005   PetscFunctionBegin;
3006   if (!logkey_seqstompisym) {
3007     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3008   }
3009   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3010 
3011   /* make sure it is a PETSc comm */
3012   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3013   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3014   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3015 
3016   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3017   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3018 
3019   /* determine row ownership */
3020   /*---------------------------------------------------------*/
3021   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3022   if (m == PETSC_DECIDE) {
3023     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3024   } else {
3025     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3026   }
3027   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3028   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3029   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3030 
3031   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3032   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3033 
3034   /* determine the number of messages to send, their lengths */
3035   /*---------------------------------------------------------*/
3036   len_s  = merge->len_s;
3037 
3038   len = 0;  /* length of buf_si[] */
3039   merge->nsend = 0;
3040   for (proc=0; proc<size; proc++){
3041     len_si[proc] = 0;
3042     if (proc == rank){
3043       len_s[proc] = 0;
3044     } else {
3045       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3046       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3047     }
3048     if (len_s[proc]) {
3049       merge->nsend++;
3050       nrows = 0;
3051       for (i=owners[proc]; i<owners[proc+1]; i++){
3052         if (ai[i+1] > ai[i]) nrows++;
3053       }
3054       len_si[proc] = 2*(nrows+1);
3055       len += len_si[proc];
3056     }
3057   }
3058 
3059   /* determine the number and length of messages to receive for ij-structure */
3060   /*-------------------------------------------------------------------------*/
3061   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3062   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3063 
3064   /* post the Irecv of j-structure */
3065   /*-------------------------------*/
3066   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3067   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3068 
3069   /* post the Isend of j-structure */
3070   /*--------------------------------*/
3071   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3072   sj_waits = si_waits + merge->nsend;
3073 
3074   for (proc=0, k=0; proc<size; proc++){
3075     if (!len_s[proc]) continue;
3076     i = owners[proc];
3077     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3078     k++;
3079   }
3080 
3081   /* receives and sends of j-structure are complete */
3082   /*------------------------------------------------*/
3083   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3084   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3085 
3086   /* send and recv i-structure */
3087   /*---------------------------*/
3088   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3089   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3090 
3091   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3092   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3093   for (proc=0,k=0; proc<size; proc++){
3094     if (!len_s[proc]) continue;
3095     /* form outgoing message for i-structure:
3096          buf_si[0]:                 nrows to be sent
3097                [1:nrows]:           row index (global)
3098                [nrows+1:2*nrows+1]: i-structure index
3099     */
3100     /*-------------------------------------------*/
3101     nrows = len_si[proc]/2 - 1;
3102     buf_si_i    = buf_si + nrows+1;
3103     buf_si[0]   = nrows;
3104     buf_si_i[0] = 0;
3105     nrows = 0;
3106     for (i=owners[proc]; i<owners[proc+1]; i++){
3107       anzi = ai[i+1] - ai[i];
3108       if (anzi) {
3109         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3110         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3111         nrows++;
3112       }
3113     }
3114     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3115     k++;
3116     buf_si += len_si[proc];
3117   }
3118 
3119   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3120   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3121 
3122   ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr);
3123   for (i=0; i<merge->nrecv; i++){
3124     ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr);
3125   }
3126 
3127   ierr = PetscFree(len_si);CHKERRQ(ierr);
3128   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3129   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3130   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3131   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3132   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3133   ierr = PetscFree(status);CHKERRQ(ierr);
3134 
3135   /* compute a local seq matrix in each processor */
3136   /*----------------------------------------------*/
3137   /* allocate bi array and free space for accumulating nonzero column info */
3138   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3139   bi[0] = 0;
3140 
3141   /* create and initialize a linked list */
3142   nlnk = N+1;
3143   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3144 
3145   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3146   len = 0;
3147   len  = ai[owners[rank+1]] - ai[owners[rank]];
3148   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3149   current_space = free_space;
3150 
3151   /* determine symbolic info for each local row */
3152   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3153   nextrow = buf_ri_k + merge->nrecv;
3154   nextai  = nextrow + merge->nrecv;
3155   for (k=0; k<merge->nrecv; k++){
3156     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3157     nrows = *buf_ri_k[k];
3158     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3159     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3160   }
3161 
3162   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3163   len = 0;
3164   for (i=0;i<m;i++) {
3165     bnzi   = 0;
3166     /* add local non-zero cols of this proc's seqmat into lnk */
3167     arow   = owners[rank] + i;
3168     anzi   = ai[arow+1] - ai[arow];
3169     aj     = a->j + ai[arow];
3170     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3171     bnzi += nlnk;
3172     /* add received col data into lnk */
3173     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3174       if (i == *nextrow[k]) { /* i-th row */
3175         anzi = *(nextai[k]+1) - *nextai[k];
3176         aj   = buf_rj[k] + *nextai[k];
3177         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3178         bnzi += nlnk;
3179         nextrow[k]++; nextai[k]++;
3180       }
3181     }
3182     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3183 
3184     /* if free space is not available, make more free space */
3185     if (current_space->local_remaining<bnzi) {
3186       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3187       nspacedouble++;
3188     }
3189     /* copy data into free space, then initialize lnk */
3190     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3191     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3192 
3193     current_space->array           += bnzi;
3194     current_space->local_used      += bnzi;
3195     current_space->local_remaining -= bnzi;
3196 
3197     bi[i+1] = bi[i] + bnzi;
3198   }
3199 
3200   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3201 
3202   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3203   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3204   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3205 
3206   /* create symbolic parallel matrix B_mpi */
3207   /*---------------------------------------*/
3208   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3209   if (n==PETSC_DECIDE) {
3210     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3211   } else {
3212     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3213   }
3214   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3215   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3216   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3217 
3218   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3219   B_mpi->assembled     = PETSC_FALSE;
3220   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3221   merge->bi            = bi;
3222   merge->bj            = bj;
3223   merge->buf_ri        = buf_ri;
3224   merge->buf_rj        = buf_rj;
3225   merge->coi           = PETSC_NULL;
3226   merge->coj           = PETSC_NULL;
3227   merge->owners_co     = PETSC_NULL;
3228 
3229   /* attach the supporting struct to B_mpi for reuse */
3230   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3231   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3232   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3233   *mpimat = B_mpi;
3234 
3235   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3236   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3237   PetscFunctionReturn(0);
3238 }
3239 
3240 static PetscEvent logkey_seqstompi = 0;
3241 #undef __FUNCT__
3242 #define __FUNCT__ "MatMerge_SeqsToMPI"
3243 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3244 {
3245   PetscErrorCode   ierr;
3246 
3247   PetscFunctionBegin;
3248   if (!logkey_seqstompi) {
3249     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3250   }
3251   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3252   if (scall == MAT_INITIAL_MATRIX){
3253     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3254   }
3255   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3256   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3257   PetscFunctionReturn(0);
3258 }
3259 static PetscEvent logkey_getlocalmat = 0;
3260 #undef __FUNCT__
3261 #define __FUNCT__ "MatGetLocalMat"
3262 /*@C
3263      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3264 
3265     Not Collective
3266 
3267    Input Parameters:
3268 +    A - the matrix
3269 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3270 
3271    Output Parameter:
3272 .    A_loc - the local sequential matrix generated
3273 
3274     Level: developer
3275 
3276 @*/
3277 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3278 {
3279   PetscErrorCode  ierr;
3280   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3281   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3282   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3283   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3284   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3285   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3286 
3287   PetscFunctionBegin;
3288   if (!logkey_getlocalmat) {
3289     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3290   }
3291   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3292   if (scall == MAT_INITIAL_MATRIX){
3293     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3294     ci[0] = 0;
3295     for (i=0; i<am; i++){
3296       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3297     }
3298     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3299     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3300     k = 0;
3301     for (i=0; i<am; i++) {
3302       ncols_o = bi[i+1] - bi[i];
3303       ncols_d = ai[i+1] - ai[i];
3304       /* off-diagonal portion of A */
3305       for (jo=0; jo<ncols_o; jo++) {
3306         col = cmap[*bj];
3307         if (col >= cstart) break;
3308         cj[k]   = col; bj++;
3309         ca[k++] = *ba++;
3310       }
3311       /* diagonal portion of A */
3312       for (j=0; j<ncols_d; j++) {
3313         cj[k]   = cstart + *aj++;
3314         ca[k++] = *aa++;
3315       }
3316       /* off-diagonal portion of A */
3317       for (j=jo; j<ncols_o; j++) {
3318         cj[k]   = cmap[*bj++];
3319         ca[k++] = *ba++;
3320       }
3321     }
3322     /* put together the new matrix */
3323     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3324     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3325     /* Since these are PETSc arrays, change flags to free them as necessary. */
3326     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3327     mat->freedata = PETSC_TRUE;
3328     mat->nonew    = 0;
3329   } else if (scall == MAT_REUSE_MATRIX){
3330     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3331     ci = mat->i; cj = mat->j; ca = mat->a;
3332     for (i=0; i<am; i++) {
3333       /* off-diagonal portion of A */
3334       ncols_o = bi[i+1] - bi[i];
3335       for (jo=0; jo<ncols_o; jo++) {
3336         col = cmap[*bj];
3337         if (col >= cstart) break;
3338         *ca++ = *ba++; bj++;
3339       }
3340       /* diagonal portion of A */
3341       ncols_d = ai[i+1] - ai[i];
3342       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3343       /* off-diagonal portion of A */
3344       for (j=jo; j<ncols_o; j++) {
3345         *ca++ = *ba++; bj++;
3346       }
3347     }
3348   } else {
3349     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3350   }
3351 
3352   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3353   PetscFunctionReturn(0);
3354 }
3355 
3356 static PetscEvent logkey_getlocalmatcondensed = 0;
3357 #undef __FUNCT__
3358 #define __FUNCT__ "MatGetLocalMatCondensed"
3359 /*@C
3360      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3361 
3362     Not Collective
3363 
3364    Input Parameters:
3365 +    A - the matrix
3366 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3367 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3368 
3369    Output Parameter:
3370 .    A_loc - the local sequential matrix generated
3371 
3372     Level: developer
3373 
3374 @*/
3375 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3376 {
3377   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3378   PetscErrorCode    ierr;
3379   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3380   IS                isrowa,iscola;
3381   Mat               *aloc;
3382 
3383   PetscFunctionBegin;
3384   if (!logkey_getlocalmatcondensed) {
3385     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3386   }
3387   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3388   if (!row){
3389     start = a->rstart; end = a->rend;
3390     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3391   } else {
3392     isrowa = *row;
3393   }
3394   if (!col){
3395     start = a->cstart;
3396     cmap  = a->garray;
3397     nzA   = a->A->n;
3398     nzB   = a->B->n;
3399     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3400     ncols = 0;
3401     for (i=0; i<nzB; i++) {
3402       if (cmap[i] < start) idx[ncols++] = cmap[i];
3403       else break;
3404     }
3405     imark = i;
3406     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3407     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3408     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3409     ierr = PetscFree(idx);CHKERRQ(ierr);
3410   } else {
3411     iscola = *col;
3412   }
3413   if (scall != MAT_INITIAL_MATRIX){
3414     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3415     aloc[0] = *A_loc;
3416   }
3417   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3418   *A_loc = aloc[0];
3419   ierr = PetscFree(aloc);CHKERRQ(ierr);
3420   if (!row){
3421     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3422   }
3423   if (!col){
3424     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3425   }
3426   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3427   PetscFunctionReturn(0);
3428 }
3429 
3430 static PetscEvent logkey_GetBrowsOfAcols = 0;
3431 #undef __FUNCT__
3432 #define __FUNCT__ "MatGetBrowsOfAcols"
3433 /*@C
3434     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3435 
3436     Collective on Mat
3437 
3438    Input Parameters:
3439 +    A,B - the matrices in mpiaij format
3440 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3441 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3442 
3443    Output Parameter:
3444 +    rowb, colb - index sets of rows and columns of B to extract
3445 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3446 -    B_seq - the sequential matrix generated
3447 
3448     Level: developer
3449 
3450 @*/
3451 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3452 {
3453   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3454   PetscErrorCode    ierr;
3455   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3456   IS                isrowb,iscolb;
3457   Mat               *bseq;
3458 
3459   PetscFunctionBegin;
3460   if (a->cstart != b->rstart || a->cend != b->rend){
3461     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3462   }
3463   if (!logkey_GetBrowsOfAcols) {
3464     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3465   }
3466   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3467 
3468   if (scall == MAT_INITIAL_MATRIX){
3469     start = a->cstart;
3470     cmap  = a->garray;
3471     nzA   = a->A->n;
3472     nzB   = a->B->n;
3473     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3474     ncols = 0;
3475     for (i=0; i<nzB; i++) {  /* row < local row index */
3476       if (cmap[i] < start) idx[ncols++] = cmap[i];
3477       else break;
3478     }
3479     imark = i;
3480     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3481     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3482     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3483     ierr = PetscFree(idx);CHKERRQ(ierr);
3484     *brstart = imark;
3485     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3486   } else {
3487     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3488     isrowb = *rowb; iscolb = *colb;
3489     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3490     bseq[0] = *B_seq;
3491   }
3492   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3493   *B_seq = bseq[0];
3494   ierr = PetscFree(bseq);CHKERRQ(ierr);
3495   if (!rowb){
3496     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3497   } else {
3498     *rowb = isrowb;
3499   }
3500   if (!colb){
3501     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3502   } else {
3503     *colb = iscolb;
3504   }
3505   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3506   PetscFunctionReturn(0);
3507 }
3508 
3509 static PetscEvent logkey_GetBrowsOfAocols = 0;
3510 #undef __FUNCT__
3511 #define __FUNCT__ "MatGetBrowsOfAoCols"
3512 /*@C
3513     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3514     of the OFF-DIAGONAL portion of local A
3515 
3516     Collective on Mat
3517 
3518    Input Parameters:
3519 +    A,B - the matrices in mpiaij format
3520 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3521 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3522 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3523 
3524    Output Parameter:
3525 +    B_oth - the sequential matrix generated
3526 
3527     Level: developer
3528 
3529 @*/
3530 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3531 {
3532   VecScatter_MPI_General *gen_to,*gen_from;
3533   PetscErrorCode         ierr;
3534   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3535   Mat_SeqAIJ             *b_oth;
3536   VecScatter             ctx=a->Mvctx;
3537   MPI_Comm               comm=ctx->comm;
3538   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3539   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3540   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3541   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3542   MPI_Request            *rwaits,*swaits;
3543   MPI_Status             *sstatus,rstatus;
3544   PetscInt               *cols;
3545   PetscScalar            *vals;
3546   PetscMPIInt            j;
3547 
3548   PetscFunctionBegin;
3549   if (a->cstart != b->rstart || a->cend != b->rend){
3550     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3551   }
3552   if (!logkey_GetBrowsOfAocols) {
3553     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3554   }
3555   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3556   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3557 
3558   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3559   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3560   rvalues  = gen_from->values; /* holds the length of sending row */
3561   svalues  = gen_to->values;   /* holds the length of receiving row */
3562   nrecvs   = gen_from->n;
3563   nsends   = gen_to->n;
3564   rwaits   = gen_from->requests;
3565   swaits   = gen_to->requests;
3566   srow     = gen_to->indices;   /* local row index to be sent */
3567   rstarts  = gen_from->starts;
3568   sstarts  = gen_to->starts;
3569   rprocs   = gen_from->procs;
3570   sprocs   = gen_to->procs;
3571   sstatus  = gen_to->sstatus;
3572 
3573   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3574   if (scall == MAT_INITIAL_MATRIX){
3575     /* i-array */
3576     /*---------*/
3577     /*  post receives */
3578     for (i=0; i<nrecvs; i++){
3579       rowlen = (PetscInt*)rvalues + rstarts[i];
3580       nrows = rstarts[i+1]-rstarts[i];
3581       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3582     }
3583 
3584     /* pack the outgoing message */
3585     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3586     rstartsj = sstartsj + nsends +1;
3587     sstartsj[0] = 0;  rstartsj[0] = 0;
3588     len = 0; /* total length of j or a array to be sent */
3589     k = 0;
3590     for (i=0; i<nsends; i++){
3591       rowlen = (PetscInt*)svalues + sstarts[i];
3592       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3593       for (j=0; j<nrows; j++) {
3594         row = srow[k] + b->rowners[rank]; /* global row idx */
3595         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3596         len += rowlen[j];
3597         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3598         k++;
3599       }
3600       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3601        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3602     }
3603     /* recvs and sends of i-array are completed */
3604     i = nrecvs;
3605     while (i--) {
3606       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3607     }
3608     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3609     /* allocate buffers for sending j and a arrays */
3610     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3611     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3612 
3613     /* create i-array of B_oth */
3614     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3615     b_othi[0] = 0;
3616     len = 0; /* total length of j or a array to be received */
3617     k = 0;
3618     for (i=0; i<nrecvs; i++){
3619       rowlen = (PetscInt*)rvalues + rstarts[i];
3620       nrows = rstarts[i+1]-rstarts[i];
3621       for (j=0; j<nrows; j++) {
3622         b_othi[k+1] = b_othi[k] + rowlen[j];
3623         len += rowlen[j]; k++;
3624       }
3625       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3626     }
3627 
3628     /* allocate space for j and a arrrays of B_oth */
3629     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3630     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3631 
3632     /* j-array */
3633     /*---------*/
3634     /*  post receives of j-array */
3635     for (i=0; i<nrecvs; i++){
3636       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3637       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3638     }
3639     k = 0;
3640     for (i=0; i<nsends; i++){
3641       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3642       bufJ = bufj+sstartsj[i];
3643       for (j=0; j<nrows; j++) {
3644         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3645         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3646         for (l=0; l<ncols; l++){
3647           *bufJ++ = cols[l];
3648         }
3649         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3650       }
3651       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3652     }
3653 
3654     /* recvs and sends of j-array are completed */
3655     i = nrecvs;
3656     while (i--) {
3657       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3658     }
3659     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3660   } else if (scall == MAT_REUSE_MATRIX){
3661     sstartsj = *startsj;
3662     rstartsj = sstartsj + nsends +1;
3663     bufa     = *bufa_ptr;
3664     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3665     b_otha   = b_oth->a;
3666   } else {
3667     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3668   }
3669 
3670   /* a-array */
3671   /*---------*/
3672   /*  post receives of a-array */
3673   for (i=0; i<nrecvs; i++){
3674     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3675     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3676   }
3677   k = 0;
3678   for (i=0; i<nsends; i++){
3679     nrows = sstarts[i+1]-sstarts[i];
3680     bufA = bufa+sstartsj[i];
3681     for (j=0; j<nrows; j++) {
3682       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3683       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3684       for (l=0; l<ncols; l++){
3685         *bufA++ = vals[l];
3686       }
3687       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3688 
3689     }
3690     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3691   }
3692   /* recvs and sends of a-array are completed */
3693   i = nrecvs;
3694   while (i--) {
3695     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3696   }
3697    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3698 
3699   if (scall == MAT_INITIAL_MATRIX){
3700     /* put together the new matrix */
3701     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3702 
3703     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3704     /* Since these are PETSc arrays, change flags to free them as necessary. */
3705     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3706     b_oth->freedata = PETSC_TRUE;
3707     b_oth->nonew    = 0;
3708 
3709     ierr = PetscFree(bufj);CHKERRQ(ierr);
3710     if (!startsj || !bufa_ptr){
3711       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3712       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3713     } else {
3714       *startsj  = sstartsj;
3715       *bufa_ptr = bufa;
3716     }
3717   }
3718   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3719 
3720   PetscFunctionReturn(0);
3721 }
3722 
3723 /*MC
3724    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3725 
3726    Options Database Keys:
3727 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3728 
3729   Level: beginner
3730 
3731 .seealso: MatCreateMPIAIJ
3732 M*/
3733 
3734 EXTERN_C_BEGIN
3735 #undef __FUNCT__
3736 #define __FUNCT__ "MatCreate_MPIAIJ"
3737 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3738 {
3739   Mat_MPIAIJ     *b;
3740   PetscErrorCode ierr;
3741   PetscInt       i;
3742   PetscMPIInt    size;
3743 
3744   PetscFunctionBegin;
3745   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3746 
3747   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3748   B->data         = (void*)b;
3749   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3750   B->factor       = 0;
3751   B->bs           = 1;
3752   B->assembled    = PETSC_FALSE;
3753   B->mapping      = 0;
3754 
3755   B->insertmode      = NOT_SET_VALUES;
3756   b->size            = size;
3757   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3758 
3759   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3760   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3761 
3762   /* the information in the maps duplicates the information computed below, eventually
3763      we should remove the duplicate information that is not contained in the maps */
3764   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3765   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3766 
3767   /* build local table of row and column ownerships */
3768   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3769   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3770   b->cowners = b->rowners + b->size + 2;
3771   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3772   b->rowners[0] = 0;
3773   for (i=2; i<=b->size; i++) {
3774     b->rowners[i] += b->rowners[i-1];
3775   }
3776   b->rstart = b->rowners[b->rank];
3777   b->rend   = b->rowners[b->rank+1];
3778   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3779   b->cowners[0] = 0;
3780   for (i=2; i<=b->size; i++) {
3781     b->cowners[i] += b->cowners[i-1];
3782   }
3783   b->cstart = b->cowners[b->rank];
3784   b->cend   = b->cowners[b->rank+1];
3785 
3786   /* build cache for off array entries formed */
3787   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3788   b->donotstash  = PETSC_FALSE;
3789   b->colmap      = 0;
3790   b->garray      = 0;
3791   b->roworiented = PETSC_TRUE;
3792 
3793   /* stuff used for matrix vector multiply */
3794   b->lvec      = PETSC_NULL;
3795   b->Mvctx     = PETSC_NULL;
3796 
3797   /* stuff for MatGetRow() */
3798   b->rowindices   = 0;
3799   b->rowvalues    = 0;
3800   b->getrowactive = PETSC_FALSE;
3801 
3802   /* Explicitly create 2 MATSEQAIJ matrices. */
3803   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3804   ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr);
3805   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3806   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3807   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3808   ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr);
3809   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3810   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3811 
3812   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3813                                      "MatStoreValues_MPIAIJ",
3814                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3815   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3816                                      "MatRetrieveValues_MPIAIJ",
3817                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3819 				     "MatGetDiagonalBlock_MPIAIJ",
3820                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3822 				     "MatIsTranspose_MPIAIJ",
3823 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3825 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3826 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3828 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3829 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3830   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3831 				     "MatDiagonalScaleLocal_MPIAIJ",
3832 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3833   PetscFunctionReturn(0);
3834 }
3835 EXTERN_C_END
3836