xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision df48e8d96c19df54efdde2c76f874a16fcafd8fe)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/aij/mpi/mpiaij.h"
4 #include "src/inline/spops.h"
5 
6 /*
7   Local utility routine that creates a mapping from the global column
8 number to the local number in the off-diagonal part of the local
9 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
10 a slightly higher hash table cost; without it it is not scalable (each processor
11 has an order N integer array but is fast to acess.
12 */
13 #undef __FUNCT__
14 #define __FUNCT__ "CreateColmap_MPIAIJ_Private"
15 PetscErrorCode CreateColmap_MPIAIJ_Private(Mat mat)
16 {
17   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
18   PetscErrorCode ierr;
19   PetscInt       n = aij->B->n,i;
20 
21   PetscFunctionBegin;
22 #if defined (PETSC_USE_CTABLE)
23   ierr = PetscTableCreate(n,&aij->colmap);CHKERRQ(ierr);
24   for (i=0; i<n; i++){
25     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1);CHKERRQ(ierr);
26   }
27 #else
28   ierr = PetscMalloc((mat->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
29   ierr = PetscLogObjectMemory(mat,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
30   ierr = PetscMemzero(aij->colmap,mat->N*sizeof(PetscInt));CHKERRQ(ierr);
31   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
32 #endif
33   PetscFunctionReturn(0);
34 }
35 
36 
37 #define CHUNKSIZE   15
38 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
39 { \
40     if (lastcol1 > col) low1 = 0; else high1 = nrow1; \
41     lastcol1 = col;\
42     while (high1-low1 > 5) { \
43       t = (low1+high1)/2; \
44       if (rp1[t] > col) high1 = t; \
45       else             low1  = t; \
46     } \
47       for (_i=low1; _i<high1; _i++) { \
48         if (rp1[_i] > col) break; \
49         if (rp1[_i] == col) { \
50           if (addv == ADD_VALUES) ap1[_i] += value;   \
51           else                    ap1[_i] = value; \
52           goto a_noinsert; \
53         } \
54       }  \
55       if (value == 0.0 && ignorezeroentries) goto a_noinsert; \
56       if (nonew == 1) goto a_noinsert; \
57       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58       MatSeqXAIJReallocateAIJ(a,1,nrow1,row,col,rmax1,aa,ai,aj,am,rp1,ap1,aimax,nonew); \
59       N = nrow1++ - 1; a->nz++; \
60       /* shift up all the later entries in this row */ \
61       for (ii=N; ii>=_i; ii--) { \
62         rp1[ii+1] = rp1[ii]; \
63         ap1[ii+1] = ap1[ii]; \
64       } \
65       rp1[_i] = col;  \
66       ap1[_i] = value;  \
67       a_noinsert: ; \
68       ailen[row] = nrow1; \
69 }
70 
71 
72 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
73 { \
74     if (lastcol2 > col) low2 = 0; else high2 = nrow2; \
75     lastcol2 = col;\
76     while (high2-low2 > 5) { \
77       t = (low2+high2)/2; \
78       if (rp2[t] > col) high2 = t; \
79       else             low2  = t; \
80     } \
81        for (_i=low2; _i<high2; _i++) { \
82         if (rp2[_i] > col) break; \
83         if (rp2[_i] == col) { \
84           if (addv == ADD_VALUES) ap2[_i] += value;   \
85           else                    ap2[_i] = value; \
86           goto b_noinsert; \
87         } \
88       }  \
89       if (value == 0.0 && ignorezeroentries) goto b_noinsert; \
90       if (nonew == 1) goto b_noinsert; \
91       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
92       MatSeqXAIJReallocateAIJ(b,1,nrow2,row,col,rmax2,ba,bi,bj,bm,rp2,ap2,bimax,nonew); \
93       N = nrow2++ - 1; b->nz++; \
94       /* shift up all the later entries in this row */ \
95       for (ii=N; ii>=_i; ii--) { \
96         rp2[ii+1] = rp2[ii]; \
97         ap2[ii+1] = ap2[ii]; \
98       } \
99       rp2[_i] = col;  \
100       ap2[_i] = value;  \
101       b_noinsert: ; \
102       bilen[row] = nrow2; \
103 }
104 
105 #undef __FUNCT__
106 #define __FUNCT__ "MatSetValues_MPIAIJ"
107 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
108 {
109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
110   PetscScalar    value;
111   PetscErrorCode ierr;
112   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
113   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
114   PetscTruth     roworiented = aij->roworiented;
115 
116   /* Some Variables required in the macro */
117   Mat            A = aij->A;
118   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data;
119   PetscInt       *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
120   PetscScalar    *aa = a->a;
121   PetscTruth     ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES))?PETSC_TRUE:PETSC_FALSE);
122   Mat            B = aij->B;
123   Mat_SeqAIJ     *b = (Mat_SeqAIJ*)B->data;
124   PetscInt       *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->m,am = aij->A->m;
125   PetscScalar    *ba = b->a;
126 
127   PetscInt       *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
128   PetscInt       nonew = a->nonew;
129   PetscScalar    *ap1,*ap2;
130 
131   PetscFunctionBegin;
132   for (i=0; i<m; i++) {
133     if (im[i] < 0) continue;
134 #if defined(PETSC_USE_DEBUG)
135     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
136 #endif
137     if (im[i] >= rstart && im[i] < rend) {
138       row      = im[i] - rstart;
139       lastcol1 = -1;
140       rp1      = aj + ai[row];
141       ap1      = aa + ai[row];
142       rmax1    = aimax[row];
143       nrow1    = ailen[row];
144       low1     = 0;
145       high1    = nrow1;
146       lastcol2 = -1;
147       rp2      = bj + bi[row];
148       ap2      = ba + bi[row];
149       rmax2    = bimax[row];
150       nrow2    = bilen[row];
151       low2     = 0;
152       high2    = nrow2;
153 
154       for (j=0; j<n; j++) {
155         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
156         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
157         if (in[j] >= cstart && in[j] < cend){
158           col = in[j] - cstart;
159           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
160         } else if (in[j] < 0) continue;
161 #if defined(PETSC_USE_DEBUG)
162         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
163 #endif
164         else {
165           if (mat->was_assembled) {
166             if (!aij->colmap) {
167               ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
168             }
169 #if defined (PETSC_USE_CTABLE)
170             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
171 	    col--;
172 #else
173             col = aij->colmap[in[j]] - 1;
174 #endif
175             if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
176               ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
177               col =  in[j];
178               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
179               B = aij->B;
180               b = (Mat_SeqAIJ*)B->data;
181               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
182               rp2      = bj + bi[row];
183               ap2      = ba + bi[row];
184               rmax2    = bimax[row];
185               nrow2    = bilen[row];
186               low2     = 0;
187               high2    = nrow2;
188               bm       = aij->B->m;
189               ba = b->a;
190             }
191           } else col = in[j];
192           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
193         }
194       }
195     } else {
196       if (!aij->donotstash) {
197         if (roworiented) {
198           if (ignorezeroentries && v[i*n] == 0.0) continue;
199           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
200         } else {
201           if (ignorezeroentries && v[i] == 0.0) continue;
202           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
203         }
204       }
205     }
206   }
207   PetscFunctionReturn(0);
208 }
209 
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatGetValues_MPIAIJ"
213 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
214 {
215   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
216   PetscErrorCode ierr;
217   PetscInt       i,j,rstart = aij->rstart,rend = aij->rend;
218   PetscInt       cstart = aij->cstart,cend = aij->cend,row,col;
219 
220   PetscFunctionBegin;
221   for (i=0; i<m; i++) {
222     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
223     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
224     if (idxm[i] >= rstart && idxm[i] < rend) {
225       row = idxm[i] - rstart;
226       for (j=0; j<n; j++) {
227         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
228         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
229         if (idxn[j] >= cstart && idxn[j] < cend){
230           col = idxn[j] - cstart;
231           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
232         } else {
233           if (!aij->colmap) {
234             ierr = CreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
235           }
236 #if defined (PETSC_USE_CTABLE)
237           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
238           col --;
239 #else
240           col = aij->colmap[idxn[j]] - 1;
241 #endif
242           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
243           else {
244             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
245           }
246         }
247       }
248     } else {
249       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
257 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
258 {
259   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
260   PetscErrorCode ierr;
261   PetscInt       nstash,reallocs;
262   InsertMode     addv;
263 
264   PetscFunctionBegin;
265   if (aij->donotstash) {
266     PetscFunctionReturn(0);
267   }
268 
269   /* make sure all processors are either in INSERTMODE or ADDMODE */
270   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
271   if (addv == (ADD_VALUES|INSERT_VALUES)) {
272     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
273   }
274   mat->insertmode = addv; /* in case this processor had no cache */
275 
276   ierr = MatStashScatterBegin_Private(&mat->stash,aij->rowners);CHKERRQ(ierr);
277   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
278   ierr = PetscLogInfo((aij->A,"MatAssemblyBegin_MPIAIJ:Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
284 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
285 {
286   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
287   Mat_SeqAIJ     *a=(Mat_SeqAIJ *)aij->A->data,*b= (Mat_SeqAIJ *)aij->B->data;
288   PetscErrorCode ierr;
289   PetscMPIInt    n;
290   PetscInt       i,j,rstart,ncols,flg;
291   PetscInt       *row,*col,other_disassembled;
292   PetscScalar    *val;
293   InsertMode     addv = mat->insertmode;
294 
295   PetscFunctionBegin;
296   if (!aij->donotstash) {
297     while (1) {
298       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
299       if (!flg) break;
300 
301       for (i=0; i<n;) {
302         /* Now identify the consecutive vals belonging to the same row */
303         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
304         if (j < n) ncols = j-i;
305         else       ncols = n-i;
306         /* Now assemble all these values with a single function call */
307         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
308         i = j;
309       }
310     }
311     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
312   }
313   a->compressedrow.use     = PETSC_FALSE;
314   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
315   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
316 
317   /* determine if any processor has disassembled, if so we must
318      also disassemble ourselfs, in order that we may reassemble. */
319   /*
320      if nonzero structure of submatrix B cannot change then we know that
321      no processor disassembled thus we can skip this stuff
322   */
323   if (!((Mat_SeqAIJ*)aij->B->data)->nonew)  {
324     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
325     if (mat->was_assembled && !other_disassembled) {
326       ierr = DisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
327     }
328   }
329   /* reaccess the b because aij->B was changed in MatSetValues() or DisAssemble() */
330   b    = (Mat_SeqAIJ *)aij->B->data;
331 
332   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
333     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
334   }
335   ierr = MatSetOption(aij->B,MAT_DO_NOT_USE_INODES);CHKERRQ(ierr);
336   b->compressedrow.use = PETSC_TRUE;
337   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
338   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
339 
340   if (aij->rowvalues) {
341     ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);
342     aij->rowvalues = 0;
343   }
344 
345   /* used by MatAXPY() */
346   a->xtoy = 0; b->xtoy = 0;
347   a->XtoY = 0; b->XtoY = 0;
348 
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
354 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
355 {
356   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
361   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
362   PetscFunctionReturn(0);
363 }
364 
365 #undef __FUNCT__
366 #define __FUNCT__ "MatZeroRows_MPIAIJ"
367 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,IS is,const PetscScalar *diag)
368 {
369   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
370   PetscErrorCode ierr;
371   PetscMPIInt    size = l->size,imdex,n,rank = l->rank,tag = A->tag,lastidx = -1;
372   PetscInt       i,N,*rows,*owners = l->rowners;
373   PetscInt       *nprocs,j,idx,nsends,row;
374   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
375   PetscInt       *rvalues,count,base,slen,*source;
376   PetscInt       *lens,*lrows,*values,rstart=l->rstart;
377   MPI_Comm       comm = A->comm;
378   MPI_Request    *send_waits,*recv_waits;
379   MPI_Status     recv_status,*send_status;
380   IS             istmp;
381 #if defined(PETSC_DEBUG)
382   PetscTruth     found = PETSC_FALSE;
383 #endif
384 
385   PetscFunctionBegin;
386   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
387   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
388 
389   /*  first count number of contributors to each processor */
390   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
391   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
392   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
393   j = 0;
394   for (i=0; i<N; i++) {
395     if (lastidx > (idx = rows[i])) j = 0;
396     lastidx = idx;
397     for (; j<size; j++) {
398       if (idx >= owners[j] && idx < owners[j+1]) {
399         nprocs[2*j]++;
400         nprocs[2*j+1] = 1;
401         owner[i] = j;
402 #if defined(PETSC_DEBUG)
403         found = PETSC_TRUE;
404 #endif
405         break;
406       }
407     }
408 #if defined(PETSC_DEBUG)
409     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
410     found = PETSC_FALSE;
411 #endif
412   }
413   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
414 
415   /* inform other processors of number of messages and max length*/
416   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
417 
418   /* post receives:   */
419   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
420   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
421   for (i=0; i<nrecvs; i++) {
422     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
423   }
424 
425   /* do sends:
426       1) starts[i] gives the starting index in svalues for stuff going to
427          the ith processor
428   */
429   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
430   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
431   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
432   starts[0] = 0;
433   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
434   for (i=0; i<N; i++) {
435     svalues[starts[owner[i]]++] = rows[i];
436   }
437   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
438 
439   starts[0] = 0;
440   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
441   count = 0;
442   for (i=0; i<size; i++) {
443     if (nprocs[2*i+1]) {
444       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
445     }
446   }
447   ierr = PetscFree(starts);CHKERRQ(ierr);
448 
449   base = owners[rank];
450 
451   /*  wait on receives */
452   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
453   source = lens + nrecvs;
454   count  = nrecvs; slen = 0;
455   while (count) {
456     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
457     /* unpack receives into our local space */
458     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
459     source[imdex]  = recv_status.MPI_SOURCE;
460     lens[imdex]    = n;
461     slen          += n;
462     count--;
463   }
464   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
465 
466   /* move the data into the send scatter */
467   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
468   count = 0;
469   for (i=0; i<nrecvs; i++) {
470     values = rvalues + i*nmax;
471     for (j=0; j<lens[i]; j++) {
472       lrows[count++] = values[j] - base;
473     }
474   }
475   ierr = PetscFree(rvalues);CHKERRQ(ierr);
476   ierr = PetscFree(lens);CHKERRQ(ierr);
477   ierr = PetscFree(owner);CHKERRQ(ierr);
478   ierr = PetscFree(nprocs);CHKERRQ(ierr);
479 
480   /* actually zap the local rows */
481   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
482   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
483 
484   /*
485         Zero the required rows. If the "diagonal block" of the matrix
486      is square and the user wishes to set the diagonal we use seperate
487      code so that MatSetValues() is not called for each diagonal allocating
488      new memory, thus calling lots of mallocs and slowing things down.
489 
490        Contributed by: Mathew Knepley
491   */
492   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
493   ierr = MatZeroRows(l->B,istmp,0);CHKERRQ(ierr);
494   if (diag && (l->A->M == l->A->N)) {
495     ierr      = MatZeroRows(l->A,istmp,diag);CHKERRQ(ierr);
496   } else if (diag) {
497     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
498     if (((Mat_SeqAIJ*)l->A->data)->nonew) {
499       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\n\
500 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
501     }
502     for (i = 0; i < slen; i++) {
503       row  = lrows[i] + rstart;
504       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
505     }
506     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
507     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
508   } else {
509     ierr = MatZeroRows(l->A,istmp,0);CHKERRQ(ierr);
510   }
511   ierr = ISDestroy(istmp);CHKERRQ(ierr);
512   ierr = PetscFree(lrows);CHKERRQ(ierr);
513 
514   /* wait on sends */
515   if (nsends) {
516     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
517     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
518     ierr = PetscFree(send_status);CHKERRQ(ierr);
519   }
520   ierr = PetscFree(send_waits);CHKERRQ(ierr);
521   ierr = PetscFree(svalues);CHKERRQ(ierr);
522 
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatMult_MPIAIJ"
528 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
529 {
530   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
531   PetscErrorCode ierr;
532   PetscInt       nt;
533 
534   PetscFunctionBegin;
535   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
536   if (nt != A->n) {
537     SETERRQ2(PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->n,nt);
538   }
539   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
540   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
541   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
542   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatMultAdd_MPIAIJ"
548 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
549 {
550   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
551   PetscErrorCode ierr;
552 
553   PetscFunctionBegin;
554   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
555   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
556   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
557   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
563 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
564 {
565   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
566   PetscErrorCode ierr;
567   PetscTruth     merged;
568 
569   PetscFunctionBegin;
570   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
571   /* do nondiagonal part */
572   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
573   if (!merged) {
574     /* send it on its way */
575     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
576     /* do local part */
577     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
578     /* receive remote parts: note this assumes the values are not actually */
579     /* added in yy until the next line, */
580     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
581   } else {
582     /* do local part */
583     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
584     /* send it on its way */
585     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
586     /* values actually were received in the Begin() but we need to call this nop */
587     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
588   }
589   PetscFunctionReturn(0);
590 }
591 
592 EXTERN_C_BEGIN
593 #undef __FUNCT__
594 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
595 PetscErrorCode PETSCMAT_DLLEXPORT MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscTruth tol,PetscTruth *f)
596 {
597   MPI_Comm       comm;
598   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ *) Amat->data, *Bij;
599   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
600   IS             Me,Notme;
601   PetscErrorCode ierr;
602   PetscInt       M,N,first,last,*notme,i;
603   PetscMPIInt    size;
604 
605   PetscFunctionBegin;
606 
607   /* Easy test: symmetric diagonal block */
608   Bij = (Mat_MPIAIJ *) Bmat->data; Bdia = Bij->A;
609   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
610   if (!*f) PetscFunctionReturn(0);
611   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
612   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
613   if (size == 1) PetscFunctionReturn(0);
614 
615   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
616   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
617   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
618   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
619   for (i=0; i<first; i++) notme[i] = i;
620   for (i=last; i<M; i++) notme[i-last+first] = i;
621   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,&Notme);CHKERRQ(ierr);
622   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
623   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
624   Aoff = Aoffs[0];
625   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
626   Boff = Boffs[0];
627   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
628   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
629   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
630   ierr = ISDestroy(Me);CHKERRQ(ierr);
631   ierr = ISDestroy(Notme);CHKERRQ(ierr);
632 
633   PetscFunctionReturn(0);
634 }
635 EXTERN_C_END
636 
637 #undef __FUNCT__
638 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
639 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
640 {
641   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
642   PetscErrorCode ierr;
643 
644   PetscFunctionBegin;
645   /* do nondiagonal part */
646   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
647   /* send it on its way */
648   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
649   /* do local part */
650   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
651   /* receive remote parts */
652   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 /*
657   This only works correctly for square matrices where the subblock A->A is the
658    diagonal block
659 */
660 #undef __FUNCT__
661 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
662 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
663 {
664   PetscErrorCode ierr;
665   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
666 
667   PetscFunctionBegin;
668   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
669   if (a->rstart != a->cstart || a->rend != a->cend) {
670     SETERRQ(PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
671   }
672   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatScale_MPIAIJ"
678 PetscErrorCode MatScale_MPIAIJ(const PetscScalar aa[],Mat A)
679 {
680   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
681   PetscErrorCode ierr;
682 
683   PetscFunctionBegin;
684   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
685   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
686   PetscFunctionReturn(0);
687 }
688 
689 #undef __FUNCT__
690 #define __FUNCT__ "MatDestroy_MPIAIJ"
691 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
692 {
693   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
694   PetscErrorCode ierr;
695 
696   PetscFunctionBegin;
697 #if defined(PETSC_USE_LOG)
698   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->M,mat->N);
699 #endif
700   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
701   ierr = PetscFree(aij->rowners);CHKERRQ(ierr);
702   ierr = MatDestroy(aij->A);CHKERRQ(ierr);
703   ierr = MatDestroy(aij->B);CHKERRQ(ierr);
704 #if defined (PETSC_USE_CTABLE)
705   if (aij->colmap) {ierr = PetscTableDelete(aij->colmap);CHKERRQ(ierr);}
706 #else
707   if (aij->colmap) {ierr = PetscFree(aij->colmap);CHKERRQ(ierr);}
708 #endif
709   if (aij->garray) {ierr = PetscFree(aij->garray);CHKERRQ(ierr);}
710   if (aij->lvec)   {ierr = VecDestroy(aij->lvec);CHKERRQ(ierr);}
711   if (aij->Mvctx)  {ierr = VecScatterDestroy(aij->Mvctx);CHKERRQ(ierr);}
712   if (aij->rowvalues) {ierr = PetscFree(aij->rowvalues);CHKERRQ(ierr);}
713   ierr = PetscFree(aij);CHKERRQ(ierr);
714 
715   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
716   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
717   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
718   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
719   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
720   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
721   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPIAIJ_Binary"
727 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
728 {
729   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
730   Mat_SeqAIJ*       A = (Mat_SeqAIJ*)aij->A->data;
731   Mat_SeqAIJ*       B = (Mat_SeqAIJ*)aij->B->data;
732   PetscErrorCode    ierr;
733   PetscMPIInt       rank,size,tag = ((PetscObject)viewer)->tag;
734   int               fd;
735   PetscInt          nz,header[4],*row_lengths,*range,rlen,i;
736   PetscInt          nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = aij->cstart,rnz;
737   PetscScalar       *column_values;
738 
739   PetscFunctionBegin;
740   ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
741   ierr = MPI_Comm_size(mat->comm,&size);CHKERRQ(ierr);
742   nz   = A->nz + B->nz;
743   if (!rank) {
744     header[0] = MAT_FILE_COOKIE;
745     header[1] = mat->M;
746     header[2] = mat->N;
747     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
748     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
749     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
750     /* get largest number of rows any processor has */
751     rlen = mat->m;
752     ierr = PetscMapGetGlobalRange(mat->rmap,&range);CHKERRQ(ierr);
753     for (i=1; i<size; i++) {
754       rlen = PetscMax(rlen,range[i+1] - range[i]);
755     }
756   } else {
757     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,mat->comm);CHKERRQ(ierr);
758     rlen = mat->m;
759   }
760 
761   /* load up the local row counts */
762   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
763   for (i=0; i<mat->m; i++) {
764     row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
765   }
766 
767   /* store the row lengths to the file */
768   if (!rank) {
769     MPI_Status status;
770     ierr = PetscBinaryWrite(fd,row_lengths,mat->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
771     for (i=1; i<size; i++) {
772       rlen = range[i+1] - range[i];
773       ierr = MPI_Recv(row_lengths,rlen,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
774       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
775     }
776   } else {
777     ierr = MPI_Send(row_lengths,mat->m,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
778   }
779   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
780 
781   /* load up the local column indices */
782   nzmax = nz; /* )th processor needs space a largest processor needs */
783   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,mat->comm);CHKERRQ(ierr);
784   ierr = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
785   cnt  = 0;
786   for (i=0; i<mat->m; i++) {
787     for (j=B->i[i]; j<B->i[i+1]; j++) {
788       if ( (col = garray[B->j[j]]) > cstart) break;
789       column_indices[cnt++] = col;
790     }
791     for (k=A->i[i]; k<A->i[i+1]; k++) {
792       column_indices[cnt++] = A->j[k] + cstart;
793     }
794     for (; j<B->i[i+1]; j++) {
795       column_indices[cnt++] = garray[B->j[j]];
796     }
797   }
798   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
799 
800   /* store the column indices to the file */
801   if (!rank) {
802     MPI_Status status;
803     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
804     for (i=1; i<size; i++) {
805       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
806       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
807       ierr = MPI_Recv(column_indices,rnz,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
808       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
809     }
810   } else {
811     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
812     ierr = MPI_Send(column_indices,nz,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
813   }
814   ierr = PetscFree(column_indices);CHKERRQ(ierr);
815 
816   /* load up the local column values */
817   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
818   cnt  = 0;
819   for (i=0; i<mat->m; i++) {
820     for (j=B->i[i]; j<B->i[i+1]; j++) {
821       if ( garray[B->j[j]] > cstart) break;
822       column_values[cnt++] = B->a[j];
823     }
824     for (k=A->i[i]; k<A->i[i+1]; k++) {
825       column_values[cnt++] = A->a[k];
826     }
827     for (; j<B->i[i+1]; j++) {
828       column_values[cnt++] = B->a[j];
829     }
830   }
831   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
832 
833   /* store the column values to the file */
834   if (!rank) {
835     MPI_Status status;
836     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
837     for (i=1; i<size; i++) {
838       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,mat->comm,&status);CHKERRQ(ierr);
839       if (rnz > nzmax) SETERRQ2(PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
840       ierr = MPI_Recv(column_values,rnz,MPIU_SCALAR,i,tag,mat->comm,&status);CHKERRQ(ierr);
841       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
842     }
843   } else {
844     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,mat->comm);CHKERRQ(ierr);
845     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,mat->comm);CHKERRQ(ierr);
846   }
847   ierr = PetscFree(column_values);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
853 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
854 {
855   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
856   PetscErrorCode    ierr;
857   PetscMPIInt       rank = aij->rank,size = aij->size;
858   PetscTruth        isdraw,iascii,isbinary;
859   PetscViewer       sviewer;
860   PetscViewerFormat format;
861 
862   PetscFunctionBegin;
863   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
864   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
865   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
866   if (iascii) {
867     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
868     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
869       MatInfo    info;
870       PetscTruth inodes;
871 
872       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
873       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
874       ierr = MatInodeGetInodeSizes(aij->A,PETSC_NULL,(PetscInt **)&inodes,PETSC_NULL);CHKERRQ(ierr);
875       if (!inodes) {
876         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
877 					      rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
878       } else {
879         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
880 		    rank,mat->m,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
881       }
882       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
883       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
884       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
885       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
886       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
887       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
888       PetscFunctionReturn(0);
889     } else if (format == PETSC_VIEWER_ASCII_INFO) {
890       PetscInt   inodecount,inodelimit,*inodes;
891       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
892       if (inodes) {
893         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
894       } else {
895         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
896       }
897       PetscFunctionReturn(0);
898     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
899       PetscFunctionReturn(0);
900     }
901   } else if (isbinary) {
902     if (size == 1) {
903       ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
904       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
905     } else {
906       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
907     }
908     PetscFunctionReturn(0);
909   } else if (isdraw) {
910     PetscDraw  draw;
911     PetscTruth isnull;
912     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
913     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
914   }
915 
916   if (size == 1) {
917     ierr = PetscObjectSetName((PetscObject)aij->A,mat->name);CHKERRQ(ierr);
918     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
919   } else {
920     /* assemble the entire matrix onto first processor. */
921     Mat         A;
922     Mat_SeqAIJ  *Aloc;
923     PetscInt    M = mat->M,N = mat->N,m,*ai,*aj,row,*cols,i,*ct;
924     PetscScalar *a;
925 
926     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
927     if (!rank) {
928       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
929     } else {
930       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
931     }
932     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
933     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
934     ierr = MatMPIAIJSetPreallocation(A,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
935     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
936 
937     /* copy over the A part */
938     Aloc = (Mat_SeqAIJ*)aij->A->data;
939     m = aij->A->m; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
940     row = aij->rstart;
941     for (i=0; i<ai[m]; i++) {aj[i] += aij->cstart ;}
942     for (i=0; i<m; i++) {
943       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
944       row++; a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
945     }
946     aj = Aloc->j;
947     for (i=0; i<ai[m]; i++) {aj[i] -= aij->cstart;}
948 
949     /* copy over the B part */
950     Aloc = (Mat_SeqAIJ*)aij->B->data;
951     m    = aij->B->m;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
952     row  = aij->rstart;
953     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
954     ct   = cols;
955     for (i=0; i<ai[m]; i++) {cols[i] = aij->garray[aj[i]];}
956     for (i=0; i<m; i++) {
957       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
958       row++; a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
959     }
960     ierr = PetscFree(ct);CHKERRQ(ierr);
961     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
962     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
963     /*
964        Everyone has to call to draw the matrix since the graphics waits are
965        synchronized across all processors that share the PetscDraw object
966     */
967     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
968     if (!rank) {
969       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
970       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
971     }
972     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
973     ierr = MatDestroy(A);CHKERRQ(ierr);
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatView_MPIAIJ"
980 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
981 {
982   PetscErrorCode ierr;
983   PetscTruth     iascii,isdraw,issocket,isbinary;
984 
985   PetscFunctionBegin;
986   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
987   ierr  = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
988   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
989   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
990   if (iascii || isdraw || isbinary || issocket) {
991     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
992   } else {
993     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIAIJ matrices",((PetscObject)viewer)->type_name);
994   }
995   PetscFunctionReturn(0);
996 }
997 
998 
999 
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatRelax_MPIAIJ"
1002 PetscErrorCode MatRelax_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1003 {
1004   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1005   PetscErrorCode ierr;
1006   Vec            bb1;
1007   PetscScalar    mone=-1.0;
1008 
1009   PetscFunctionBegin;
1010   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
1011 
1012   ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1013 
1014   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
1015     if (flag & SOR_ZERO_INITIAL_GUESS) {
1016       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
1017       its--;
1018     }
1019 
1020     while (its--) {
1021       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1022       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1023 
1024       /* update rhs: bb1 = bb - B*x */
1025       ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr);
1026       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1027 
1028       /* local sweep */
1029       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);
1030       CHKERRQ(ierr);
1031     }
1032   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
1033     if (flag & SOR_ZERO_INITIAL_GUESS) {
1034       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1035       its--;
1036     }
1037     while (its--) {
1038       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1039       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1040 
1041       /* update rhs: bb1 = bb - B*x */
1042       ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr);
1043       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1044 
1045       /* local sweep */
1046       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1047       CHKERRQ(ierr);
1048     }
1049   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
1050     if (flag & SOR_ZERO_INITIAL_GUESS) {
1051       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
1052       its--;
1053     }
1054     while (its--) {
1055       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1056       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
1057 
1058       /* update rhs: bb1 = bb - B*x */
1059       ierr = VecScale(mat->lvec,mone);CHKERRQ(ierr);
1060       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1061 
1062       /* local sweep */
1063       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,PETSC_NULL,xx);
1064       CHKERRQ(ierr);
1065     }
1066   } else {
1067     SETERRQ(PETSC_ERR_SUP,"Parallel SOR not supported");
1068   }
1069 
1070   ierr = VecDestroy(bb1);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1076 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1077 {
1078   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1079   Mat            A = mat->A,B = mat->B;
1080   PetscErrorCode ierr;
1081   PetscReal      isend[5],irecv[5];
1082 
1083   PetscFunctionBegin;
1084   info->block_size     = 1.0;
1085   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1086   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1087   isend[3] = info->memory;  isend[4] = info->mallocs;
1088   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1089   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1090   isend[3] += info->memory;  isend[4] += info->mallocs;
1091   if (flag == MAT_LOCAL) {
1092     info->nz_used      = isend[0];
1093     info->nz_allocated = isend[1];
1094     info->nz_unneeded  = isend[2];
1095     info->memory       = isend[3];
1096     info->mallocs      = isend[4];
1097   } else if (flag == MAT_GLOBAL_MAX) {
1098     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1099     info->nz_used      = irecv[0];
1100     info->nz_allocated = irecv[1];
1101     info->nz_unneeded  = irecv[2];
1102     info->memory       = irecv[3];
1103     info->mallocs      = irecv[4];
1104   } else if (flag == MAT_GLOBAL_SUM) {
1105     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1106     info->nz_used      = irecv[0];
1107     info->nz_allocated = irecv[1];
1108     info->nz_unneeded  = irecv[2];
1109     info->memory       = irecv[3];
1110     info->mallocs      = irecv[4];
1111   }
1112   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1113   info->fill_ratio_needed = 0;
1114   info->factor_mallocs    = 0;
1115   info->rows_global       = (double)matin->M;
1116   info->columns_global    = (double)matin->N;
1117   info->rows_local        = (double)matin->m;
1118   info->columns_local     = (double)matin->N;
1119 
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatSetOption_MPIAIJ"
1125 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op)
1126 {
1127   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   switch (op) {
1132   case MAT_NO_NEW_NONZERO_LOCATIONS:
1133   case MAT_YES_NEW_NONZERO_LOCATIONS:
1134   case MAT_COLUMNS_UNSORTED:
1135   case MAT_COLUMNS_SORTED:
1136   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1137   case MAT_KEEP_ZEROED_ROWS:
1138   case MAT_NEW_NONZERO_LOCATION_ERR:
1139   case MAT_USE_INODES:
1140   case MAT_DO_NOT_USE_INODES:
1141   case MAT_IGNORE_ZERO_ENTRIES:
1142     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1143     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1144     break;
1145   case MAT_ROW_ORIENTED:
1146     a->roworiented = PETSC_TRUE;
1147     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1148     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1149     break;
1150   case MAT_ROWS_SORTED:
1151   case MAT_ROWS_UNSORTED:
1152   case MAT_YES_NEW_DIAGONALS:
1153     ierr = PetscLogInfo((A,"MatSetOption_MPIAIJ:Option ignored\n"));CHKERRQ(ierr);
1154     break;
1155   case MAT_COLUMN_ORIENTED:
1156     a->roworiented = PETSC_FALSE;
1157     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1158     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1159     break;
1160   case MAT_IGNORE_OFF_PROC_ENTRIES:
1161     a->donotstash = PETSC_TRUE;
1162     break;
1163   case MAT_NO_NEW_DIAGONALS:
1164     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1165   case MAT_SYMMETRIC:
1166   case MAT_STRUCTURALLY_SYMMETRIC:
1167   case MAT_HERMITIAN:
1168   case MAT_SYMMETRY_ETERNAL:
1169     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1170     break;
1171   case MAT_NOT_SYMMETRIC:
1172   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1173   case MAT_NOT_HERMITIAN:
1174   case MAT_NOT_SYMMETRY_ETERNAL:
1175     break;
1176   default:
1177     SETERRQ(PETSC_ERR_SUP,"unknown option");
1178   }
1179   PetscFunctionReturn(0);
1180 }
1181 
1182 #undef __FUNCT__
1183 #define __FUNCT__ "MatGetRow_MPIAIJ"
1184 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1185 {
1186   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1187   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1188   PetscErrorCode ierr;
1189   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = mat->cstart;
1190   PetscInt       nztot,nzA,nzB,lrow,rstart = mat->rstart,rend = mat->rend;
1191   PetscInt       *cmap,*idx_p;
1192 
1193   PetscFunctionBegin;
1194   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1195   mat->getrowactive = PETSC_TRUE;
1196 
1197   if (!mat->rowvalues && (idx || v)) {
1198     /*
1199         allocate enough space to hold information from the longest row.
1200     */
1201     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1202     PetscInt     max = 1,tmp;
1203     for (i=0; i<matin->m; i++) {
1204       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1205       if (max < tmp) { max = tmp; }
1206     }
1207     ierr = PetscMalloc(max*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1208     mat->rowindices = (PetscInt*)(mat->rowvalues + max);
1209   }
1210 
1211   if (row < rstart || row >= rend) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Only local rows")
1212   lrow = row - rstart;
1213 
1214   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1215   if (!v)   {pvA = 0; pvB = 0;}
1216   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1217   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1218   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1219   nztot = nzA + nzB;
1220 
1221   cmap  = mat->garray;
1222   if (v  || idx) {
1223     if (nztot) {
1224       /* Sort by increasing column numbers, assuming A and B already sorted */
1225       PetscInt imark = -1;
1226       if (v) {
1227         *v = v_p = mat->rowvalues;
1228         for (i=0; i<nzB; i++) {
1229           if (cmap[cworkB[i]] < cstart)   v_p[i] = vworkB[i];
1230           else break;
1231         }
1232         imark = i;
1233         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1234         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1235       }
1236       if (idx) {
1237         *idx = idx_p = mat->rowindices;
1238         if (imark > -1) {
1239           for (i=0; i<imark; i++) {
1240             idx_p[i] = cmap[cworkB[i]];
1241           }
1242         } else {
1243           for (i=0; i<nzB; i++) {
1244             if (cmap[cworkB[i]] < cstart)   idx_p[i] = cmap[cworkB[i]];
1245             else break;
1246           }
1247           imark = i;
1248         }
1249         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1250         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1251       }
1252     } else {
1253       if (idx) *idx = 0;
1254       if (v)   *v   = 0;
1255     }
1256   }
1257   *nz = nztot;
1258   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1259   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1265 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1266 {
1267   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1268 
1269   PetscFunctionBegin;
1270   if (!aij->getrowactive) {
1271     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1272   }
1273   aij->getrowactive = PETSC_FALSE;
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatNorm_MPIAIJ"
1279 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1280 {
1281   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1282   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1283   PetscErrorCode ierr;
1284   PetscInt       i,j,cstart = aij->cstart;
1285   PetscReal      sum = 0.0;
1286   PetscScalar    *v;
1287 
1288   PetscFunctionBegin;
1289   if (aij->size == 1) {
1290     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1291   } else {
1292     if (type == NORM_FROBENIUS) {
1293       v = amat->a;
1294       for (i=0; i<amat->nz; i++) {
1295 #if defined(PETSC_USE_COMPLEX)
1296         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1297 #else
1298         sum += (*v)*(*v); v++;
1299 #endif
1300       }
1301       v = bmat->a;
1302       for (i=0; i<bmat->nz; i++) {
1303 #if defined(PETSC_USE_COMPLEX)
1304         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1305 #else
1306         sum += (*v)*(*v); v++;
1307 #endif
1308       }
1309       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1310       *norm = sqrt(*norm);
1311     } else if (type == NORM_1) { /* max column norm */
1312       PetscReal *tmp,*tmp2;
1313       PetscInt    *jj,*garray = aij->garray;
1314       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
1315       ierr = PetscMalloc((mat->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
1316       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
1317       *norm = 0.0;
1318       v = amat->a; jj = amat->j;
1319       for (j=0; j<amat->nz; j++) {
1320         tmp[cstart + *jj++ ] += PetscAbsScalar(*v);  v++;
1321       }
1322       v = bmat->a; jj = bmat->j;
1323       for (j=0; j<bmat->nz; j++) {
1324         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1325       }
1326       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
1327       for (j=0; j<mat->N; j++) {
1328         if (tmp2[j] > *norm) *norm = tmp2[j];
1329       }
1330       ierr = PetscFree(tmp);CHKERRQ(ierr);
1331       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1332     } else if (type == NORM_INFINITY) { /* max row norm */
1333       PetscReal ntemp = 0.0;
1334       for (j=0; j<aij->A->m; j++) {
1335         v = amat->a + amat->i[j];
1336         sum = 0.0;
1337         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1338           sum += PetscAbsScalar(*v); v++;
1339         }
1340         v = bmat->a + bmat->i[j];
1341         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1342           sum += PetscAbsScalar(*v); v++;
1343         }
1344         if (sum > ntemp) ntemp = sum;
1345       }
1346       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
1347     } else {
1348       SETERRQ(PETSC_ERR_SUP,"No support for two norm");
1349     }
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatTranspose_MPIAIJ"
1356 PetscErrorCode MatTranspose_MPIAIJ(Mat A,Mat *matout)
1357 {
1358   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1359   Mat_SeqAIJ     *Aloc = (Mat_SeqAIJ*)a->A->data;
1360   PetscErrorCode ierr;
1361   PetscInt       M = A->M,N = A->N,m,*ai,*aj,row,*cols,i,*ct;
1362   Mat            B;
1363   PetscScalar    *array;
1364 
1365   PetscFunctionBegin;
1366   if (!matout && M != N) {
1367     SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1368   }
1369 
1370   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1371   ierr = MatSetSizes(B,A->n,A->m,N,M);CHKERRQ(ierr);
1372   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1373   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1374 
1375   /* copy over the A part */
1376   Aloc = (Mat_SeqAIJ*)a->A->data;
1377   m = a->A->m; ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1378   row = a->rstart;
1379   for (i=0; i<ai[m]; i++) {aj[i] += a->cstart ;}
1380   for (i=0; i<m; i++) {
1381     ierr = MatSetValues(B,ai[i+1]-ai[i],aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1382     row++; array += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1383   }
1384   aj = Aloc->j;
1385   for (i=0; i<ai[m]; i++) {aj[i] -= a->cstart ;}
1386 
1387   /* copy over the B part */
1388   Aloc = (Mat_SeqAIJ*)a->B->data;
1389   m = a->B->m;  ai = Aloc->i; aj = Aloc->j; array = Aloc->a;
1390   row  = a->rstart;
1391   ierr = PetscMalloc((1+ai[m])*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1392   ct   = cols;
1393   for (i=0; i<ai[m]; i++) {cols[i] = a->garray[aj[i]];}
1394   for (i=0; i<m; i++) {
1395     ierr = MatSetValues(B,ai[i+1]-ai[i],cols,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1396     row++; array += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1397   }
1398   ierr = PetscFree(ct);CHKERRQ(ierr);
1399   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1400   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1401   if (matout) {
1402     *matout = B;
1403   } else {
1404     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1405   }
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1411 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1412 {
1413   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1414   Mat            a = aij->A,b = aij->B;
1415   PetscErrorCode ierr;
1416   PetscInt       s1,s2,s3;
1417 
1418   PetscFunctionBegin;
1419   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1420   if (rr) {
1421     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1422     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1423     /* Overlap communication with computation. */
1424     ierr = VecScatterBegin(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1425   }
1426   if (ll) {
1427     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1428     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1429     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
1430   }
1431   /* scale  the diagonal block */
1432   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1433 
1434   if (rr) {
1435     /* Do a scatter end and then right scale the off-diagonal block */
1436     ierr = VecScatterEnd(rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD,aij->Mvctx);CHKERRQ(ierr);
1437     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
1438   }
1439 
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatPrintHelp_MPIAIJ"
1446 PetscErrorCode MatPrintHelp_MPIAIJ(Mat A)
1447 {
1448   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1449   PetscErrorCode ierr;
1450 
1451   PetscFunctionBegin;
1452   if (!a->rank) {
1453     ierr = MatPrintHelp_SeqAIJ(a->A);CHKERRQ(ierr);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatSetBlockSize_MPIAIJ"
1460 PetscErrorCode MatSetBlockSize_MPIAIJ(Mat A,PetscInt bs)
1461 {
1462   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1463   PetscErrorCode ierr;
1464 
1465   PetscFunctionBegin;
1466   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1467   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 #undef __FUNCT__
1471 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
1472 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1473 {
1474   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1475   PetscErrorCode ierr;
1476 
1477   PetscFunctionBegin;
1478   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNCT__
1483 #define __FUNCT__ "MatEqual_MPIAIJ"
1484 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscTruth *flag)
1485 {
1486   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
1487   Mat            a,b,c,d;
1488   PetscTruth     flg;
1489   PetscErrorCode ierr;
1490 
1491   PetscFunctionBegin;
1492   a = matA->A; b = matA->B;
1493   c = matB->A; d = matB->B;
1494 
1495   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1496   if (flg) {
1497     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1498   }
1499   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatCopy_MPIAIJ"
1505 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
1506 {
1507   PetscErrorCode ierr;
1508   Mat_MPIAIJ     *a = (Mat_MPIAIJ *)A->data;
1509   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
1510 
1511   PetscFunctionBegin;
1512   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1513   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1514     /* because of the column compression in the off-processor part of the matrix a->B,
1515        the number of columns in a->B and b->B may be different, hence we cannot call
1516        the MatCopy() directly on the two parts. If need be, we can provide a more
1517        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
1518        then copying the submatrices */
1519     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1520   } else {
1521     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1522     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1523   }
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatSetUpPreallocation_MPIAIJ"
1529 PetscErrorCode MatSetUpPreallocation_MPIAIJ(Mat A)
1530 {
1531   PetscErrorCode ierr;
1532 
1533   PetscFunctionBegin;
1534   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #include "petscblaslapack.h"
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatAXPY_MPIAIJ"
1541 PetscErrorCode MatAXPY_MPIAIJ(const PetscScalar a[],Mat X,Mat Y,MatStructure str)
1542 {
1543   PetscErrorCode ierr;
1544   PetscInt       i;
1545   Mat_MPIAIJ     *xx = (Mat_MPIAIJ *)X->data,*yy = (Mat_MPIAIJ *)Y->data;
1546   PetscBLASInt   bnz,one=1;
1547   Mat_SeqAIJ     *x,*y;
1548 
1549   PetscFunctionBegin;
1550   if (str == SAME_NONZERO_PATTERN) {
1551     x = (Mat_SeqAIJ *)xx->A->data;
1552     y = (Mat_SeqAIJ *)yy->A->data;
1553     bnz = (PetscBLASInt)x->nz;
1554     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1555     x = (Mat_SeqAIJ *)xx->B->data;
1556     y = (Mat_SeqAIJ *)yy->B->data;
1557     bnz = (PetscBLASInt)x->nz;
1558     BLASaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1559   } else if (str == SUBSET_NONZERO_PATTERN) {
1560     ierr = MatAXPY_SeqAIJ(a,xx->A,yy->A,str);CHKERRQ(ierr);
1561 
1562     x = (Mat_SeqAIJ *)xx->B->data;
1563     y = (Mat_SeqAIJ *)yy->B->data;
1564     if (y->xtoy && y->XtoY != xx->B) {
1565       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1566       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1567     }
1568     if (!y->xtoy) { /* get xtoy */
1569       ierr = MatAXPYGetxtoy_Private(xx->B->m,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
1570       y->XtoY = xx->B;
1571     }
1572     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += (*a)*(x->a[i]);
1573   } else {
1574     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1575   }
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_SeqAIJ(Mat);
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatConjugate_MPIAIJ"
1583 PetscErrorCode PETSCMAT_DLLEXPORT MatConjugate_MPIAIJ(Mat mat)
1584 {
1585 #if defined(PETSC_USE_COMPLEX)
1586   PetscErrorCode ierr;
1587   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1588 
1589   PetscFunctionBegin;
1590   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
1591   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
1592 #else
1593   PetscFunctionBegin;
1594 #endif
1595   PetscFunctionReturn(0);
1596 }
1597 
1598 /* -------------------------------------------------------------------*/
1599 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
1600        MatGetRow_MPIAIJ,
1601        MatRestoreRow_MPIAIJ,
1602        MatMult_MPIAIJ,
1603 /* 4*/ MatMultAdd_MPIAIJ,
1604        MatMultTranspose_MPIAIJ,
1605        MatMultTransposeAdd_MPIAIJ,
1606        0,
1607        0,
1608        0,
1609 /*10*/ 0,
1610        0,
1611        0,
1612        MatRelax_MPIAIJ,
1613        MatTranspose_MPIAIJ,
1614 /*15*/ MatGetInfo_MPIAIJ,
1615        MatEqual_MPIAIJ,
1616        MatGetDiagonal_MPIAIJ,
1617        MatDiagonalScale_MPIAIJ,
1618        MatNorm_MPIAIJ,
1619 /*20*/ MatAssemblyBegin_MPIAIJ,
1620        MatAssemblyEnd_MPIAIJ,
1621        0,
1622        MatSetOption_MPIAIJ,
1623        MatZeroEntries_MPIAIJ,
1624 /*25*/ MatZeroRows_MPIAIJ,
1625        0,
1626        0,
1627        0,
1628        0,
1629 /*30*/ MatSetUpPreallocation_MPIAIJ,
1630        0,
1631        0,
1632        0,
1633        0,
1634 /*35*/ MatDuplicate_MPIAIJ,
1635        0,
1636        0,
1637        0,
1638        0,
1639 /*40*/ MatAXPY_MPIAIJ,
1640        MatGetSubMatrices_MPIAIJ,
1641        MatIncreaseOverlap_MPIAIJ,
1642        MatGetValues_MPIAIJ,
1643        MatCopy_MPIAIJ,
1644 /*45*/ MatPrintHelp_MPIAIJ,
1645        MatScale_MPIAIJ,
1646        0,
1647        0,
1648        0,
1649 /*50*/ MatSetBlockSize_MPIAIJ,
1650        0,
1651        0,
1652        0,
1653        0,
1654 /*55*/ MatFDColoringCreate_MPIAIJ,
1655        0,
1656        MatSetUnfactored_MPIAIJ,
1657        0,
1658        0,
1659 /*60*/ MatGetSubMatrix_MPIAIJ,
1660        MatDestroy_MPIAIJ,
1661        MatView_MPIAIJ,
1662        MatGetPetscMaps_Petsc,
1663        0,
1664 /*65*/ 0,
1665        0,
1666        0,
1667        0,
1668        0,
1669 /*70*/ 0,
1670        0,
1671        MatSetColoring_MPIAIJ,
1672 #if defined(PETSC_HAVE_ADIC)
1673        MatSetValuesAdic_MPIAIJ,
1674 #else
1675        0,
1676 #endif
1677        MatSetValuesAdifor_MPIAIJ,
1678 /*75*/ 0,
1679        0,
1680        0,
1681        0,
1682        0,
1683 /*80*/ 0,
1684        0,
1685        0,
1686        0,
1687 /*84*/ MatLoad_MPIAIJ,
1688        0,
1689        0,
1690        0,
1691        0,
1692        0,
1693 /*90*/ MatMatMult_MPIAIJ_MPIAIJ,
1694        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
1695        MatMatMultNumeric_MPIAIJ_MPIAIJ,
1696        MatPtAP_Basic,
1697        MatPtAPSymbolic_MPIAIJ,
1698 /*95*/ MatPtAPNumeric_MPIAIJ,
1699        0,
1700        0,
1701        0,
1702        0,
1703 /*100*/0,
1704        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
1705        MatPtAPNumeric_MPIAIJ_MPIAIJ,
1706        MatConjugate_MPIAIJ
1707 };
1708 
1709 /* ----------------------------------------------------------------------------------------*/
1710 
1711 EXTERN_C_BEGIN
1712 #undef __FUNCT__
1713 #define __FUNCT__ "MatStoreValues_MPIAIJ"
1714 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIAIJ(Mat mat)
1715 {
1716   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1717   PetscErrorCode ierr;
1718 
1719   PetscFunctionBegin;
1720   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
1721   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
1722   PetscFunctionReturn(0);
1723 }
1724 EXTERN_C_END
1725 
1726 EXTERN_C_BEGIN
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
1729 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIAIJ(Mat mat)
1730 {
1731   Mat_MPIAIJ     *aij = (Mat_MPIAIJ *)mat->data;
1732   PetscErrorCode ierr;
1733 
1734   PetscFunctionBegin;
1735   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
1736   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
1737   PetscFunctionReturn(0);
1738 }
1739 EXTERN_C_END
1740 
1741 #include "petscpc.h"
1742 EXTERN_C_BEGIN
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
1745 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1746 {
1747   Mat_MPIAIJ     *b;
1748   PetscErrorCode ierr;
1749   PetscInt       i;
1750 
1751   PetscFunctionBegin;
1752   B->preallocated = PETSC_TRUE;
1753   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1754   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1755   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1756   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1757   if (d_nnz) {
1758     for (i=0; i<B->m; i++) {
1759       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than 0: local row %D value %D",i,d_nnz[i]);
1760     }
1761   }
1762   if (o_nnz) {
1763     for (i=0; i<B->m; i++) {
1764       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than 0: local row %D value %D",i,o_nnz[i]);
1765     }
1766   }
1767   b = (Mat_MPIAIJ*)B->data;
1768   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
1769   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
1770 
1771   PetscFunctionReturn(0);
1772 }
1773 EXTERN_C_END
1774 
1775 #undef __FUNCT__
1776 #define __FUNCT__ "MatDuplicate_MPIAIJ"
1777 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1778 {
1779   Mat            mat;
1780   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
1781   PetscErrorCode ierr;
1782 
1783   PetscFunctionBegin;
1784   *newmat       = 0;
1785   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1786   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1787   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1788   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1789   a    = (Mat_MPIAIJ*)mat->data;
1790 
1791   mat->factor       = matin->factor;
1792   mat->bs           = matin->bs;
1793   mat->assembled    = PETSC_TRUE;
1794   mat->insertmode   = NOT_SET_VALUES;
1795   mat->preallocated = PETSC_TRUE;
1796 
1797   a->rstart       = oldmat->rstart;
1798   a->rend         = oldmat->rend;
1799   a->cstart       = oldmat->cstart;
1800   a->cend         = oldmat->cend;
1801   a->size         = oldmat->size;
1802   a->rank         = oldmat->rank;
1803   a->donotstash   = oldmat->donotstash;
1804   a->roworiented  = oldmat->roworiented;
1805   a->rowindices   = 0;
1806   a->rowvalues    = 0;
1807   a->getrowactive = PETSC_FALSE;
1808 
1809   ierr       = PetscMemcpy(a->rowners,oldmat->rowners,2*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1810   ierr       = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1811   if (oldmat->colmap) {
1812 #if defined (PETSC_USE_CTABLE)
1813     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1814 #else
1815     ierr = PetscMalloc((mat->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1816     ierr = PetscLogObjectMemory(mat,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1817     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(mat->N)*sizeof(PetscInt));CHKERRQ(ierr);
1818 #endif
1819   } else a->colmap = 0;
1820   if (oldmat->garray) {
1821     PetscInt len;
1822     len  = oldmat->B->n;
1823     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1824     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1825     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
1826   } else a->garray = 0;
1827 
1828   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1829   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1830   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1831   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1832   ierr =  MatDestroy(a->A);CHKERRQ(ierr);
1833   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1834   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1835   ierr =  MatDestroy(a->B);CHKERRQ(ierr);
1836   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1837   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1838   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
1839   *newmat = mat;
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 #include "petscsys.h"
1844 
1845 #undef __FUNCT__
1846 #define __FUNCT__ "MatLoad_MPIAIJ"
1847 PetscErrorCode MatLoad_MPIAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1848 {
1849   Mat            A;
1850   PetscScalar    *vals,*svals;
1851   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1852   MPI_Status     status;
1853   PetscErrorCode ierr;
1854   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,maxnz;
1855   PetscInt       i,nz,j,rstart,rend,mmax;
1856   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1857   PetscInt       *ourlens,*procsnz = 0,*offlens,jj,*mycols,*smycols;
1858   PetscInt       cend,cstart,n,*rowners;
1859   int            fd;
1860 
1861   PetscFunctionBegin;
1862   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1863   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1864   if (!rank) {
1865     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1866     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1867     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1868   }
1869 
1870   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1871   M = header[1]; N = header[2];
1872   /* determine ownership of all rows */
1873   m    = M/size + ((M % size) > rank);
1874   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
1875   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
1876 
1877   /* First process needs enough room for process with most rows */
1878   if (!rank) {
1879     mmax       = rowners[1];
1880     for (i=2; i<size; i++) {
1881       mmax = PetscMax(mmax,rowners[i]);
1882     }
1883   } else mmax = m;
1884 
1885   rowners[0] = 0;
1886   for (i=2; i<=size; i++) {
1887     mmax       = PetscMax(mmax,rowners[i]);
1888     rowners[i] += rowners[i-1];
1889   }
1890   rstart = rowners[rank];
1891   rend   = rowners[rank+1];
1892 
1893   /* distribute row lengths to all processors */
1894   ierr    = PetscMalloc2(mmax,PetscInt,&ourlens,mmax,PetscInt,&offlens);CHKERRQ(ierr);
1895   if (!rank) {
1896     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
1897     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
1898     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
1899     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
1900     for (j=0; j<m; j++) {
1901       procsnz[0] += ourlens[j];
1902     }
1903     for (i=1; i<size; i++) {
1904       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
1905       /* calculate the number of nonzeros on each processor */
1906       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
1907         procsnz[i] += rowlengths[j];
1908       }
1909       ierr = MPI_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1910     }
1911     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
1912   } else {
1913     ierr = MPI_Recv(ourlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1914   }
1915 
1916   if (!rank) {
1917     /* determine max buffer needed and allocate it */
1918     maxnz = 0;
1919     for (i=0; i<size; i++) {
1920       maxnz = PetscMax(maxnz,procsnz[i]);
1921     }
1922     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1923 
1924     /* read in my part of the matrix column indices  */
1925     nz   = procsnz[0];
1926     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1927     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
1928 
1929     /* read in every one elses and ship off */
1930     for (i=1; i<size; i++) {
1931       nz   = procsnz[i];
1932       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
1933       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
1934     }
1935     ierr = PetscFree(cols);CHKERRQ(ierr);
1936   } else {
1937     /* determine buffer space needed for message */
1938     nz = 0;
1939     for (i=0; i<m; i++) {
1940       nz += ourlens[i];
1941     }
1942     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
1943 
1944     /* receive message of column indices*/
1945     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
1946     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
1947     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
1948   }
1949 
1950   /* determine column ownership if matrix is not square */
1951   if (N != M) {
1952     n      = N/size + ((N % size) > rank);
1953     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1954     cstart = cend - n;
1955   } else {
1956     cstart = rstart;
1957     cend   = rend;
1958     n      = cend - cstart;
1959   }
1960 
1961   /* loop over local rows, determining number of off diagonal entries */
1962   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
1963   jj = 0;
1964   for (i=0; i<m; i++) {
1965     for (j=0; j<ourlens[i]; j++) {
1966       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
1967       jj++;
1968     }
1969   }
1970 
1971   /* create our matrix */
1972   for (i=0; i<m; i++) {
1973     ourlens[i] -= offlens[i];
1974   }
1975   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
1976   ierr = MatSetSizes(A,m,n,M,N);CHKERRQ(ierr);
1977   ierr = MatSetType(A,type);CHKERRQ(ierr);
1978   ierr = MatMPIAIJSetPreallocation(A,0,ourlens,0,offlens);CHKERRQ(ierr);
1979 
1980   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
1981   for (i=0; i<m; i++) {
1982     ourlens[i] += offlens[i];
1983   }
1984 
1985   if (!rank) {
1986     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
1987 
1988     /* read in my part of the matrix numerical values  */
1989     nz   = procsnz[0];
1990     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1991 
1992     /* insert into matrix */
1993     jj      = rstart;
1994     smycols = mycols;
1995     svals   = vals;
1996     for (i=0; i<m; i++) {
1997       ierr = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
1998       smycols += ourlens[i];
1999       svals   += ourlens[i];
2000       jj++;
2001     }
2002 
2003     /* read in other processors and ship out */
2004     for (i=1; i<size; i++) {
2005       nz   = procsnz[i];
2006       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2007       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2008     }
2009     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2010   } else {
2011     /* receive numeric values */
2012     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
2013 
2014     /* receive message of values*/
2015     ierr = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2016     ierr = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2017     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2018 
2019     /* insert into matrix */
2020     jj      = rstart;
2021     smycols = mycols;
2022     svals   = vals;
2023     for (i=0; i<m; i++) {
2024       ierr     = MatSetValues_MPIAIJ(A,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
2025       smycols += ourlens[i];
2026       svals   += ourlens[i];
2027       jj++;
2028     }
2029   }
2030   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
2031   ierr = PetscFree(vals);CHKERRQ(ierr);
2032   ierr = PetscFree(mycols);CHKERRQ(ierr);
2033   ierr = PetscFree(rowners);CHKERRQ(ierr);
2034 
2035   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2036   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2037   *newmat = A;
2038   PetscFunctionReturn(0);
2039 }
2040 
2041 #undef __FUNCT__
2042 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
2043 /*
2044     Not great since it makes two copies of the submatrix, first an SeqAIJ
2045   in local and then by concatenating the local matrices the end result.
2046   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
2047 */
2048 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2049 {
2050   PetscErrorCode ierr;
2051   PetscMPIInt    rank,size;
2052   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j;
2053   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2054   Mat            *local,M,Mreuse;
2055   PetscScalar    *vwork,*aa;
2056   MPI_Comm       comm = mat->comm;
2057   Mat_SeqAIJ     *aij;
2058 
2059 
2060   PetscFunctionBegin;
2061   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2062   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2063 
2064   if (call ==  MAT_REUSE_MATRIX) {
2065     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2066     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2067     local = &Mreuse;
2068     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2069   } else {
2070     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2071     Mreuse = *local;
2072     ierr   = PetscFree(local);CHKERRQ(ierr);
2073   }
2074 
2075   /*
2076       m - number of local rows
2077       n - number of columns (same on all processors)
2078       rstart - first row in new global matrix generated
2079   */
2080   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2081   if (call == MAT_INITIAL_MATRIX) {
2082     aij = (Mat_SeqAIJ*)(Mreuse)->data;
2083     ii  = aij->i;
2084     jj  = aij->j;
2085 
2086     /*
2087         Determine the number of non-zeros in the diagonal and off-diagonal
2088         portions of the matrix in order to do correct preallocation
2089     */
2090 
2091     /* first get start and end of "diagonal" columns */
2092     if (csize == PETSC_DECIDE) {
2093       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2094       if (mglobal == n) { /* square matrix */
2095 	nlocal = m;
2096       } else {
2097         nlocal = n/size + ((n % size) > rank);
2098       }
2099     } else {
2100       nlocal = csize;
2101     }
2102     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2103     rstart = rend - nlocal;
2104     if (rank == size - 1 && rend != n) {
2105       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2106     }
2107 
2108     /* next, compute all the lengths */
2109     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2110     olens = dlens + m;
2111     for (i=0; i<m; i++) {
2112       jend = ii[i+1] - ii[i];
2113       olen = 0;
2114       dlen = 0;
2115       for (j=0; j<jend; j++) {
2116         if (*jj < rstart || *jj >= rend) olen++;
2117         else dlen++;
2118         jj++;
2119       }
2120       olens[i] = olen;
2121       dlens[i] = dlen;
2122     }
2123     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2124     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
2125     ierr = MatSetType(M,mat->type_name);CHKERRQ(ierr);
2126     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
2127     ierr = PetscFree(dlens);CHKERRQ(ierr);
2128   } else {
2129     PetscInt ml,nl;
2130 
2131     M = *newmat;
2132     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2133     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2134     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2135     /*
2136          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2137        rather than the slower MatSetValues().
2138     */
2139     M->was_assembled = PETSC_TRUE;
2140     M->assembled     = PETSC_FALSE;
2141   }
2142   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2143   aij = (Mat_SeqAIJ*)(Mreuse)->data;
2144   ii  = aij->i;
2145   jj  = aij->j;
2146   aa  = aij->a;
2147   for (i=0; i<m; i++) {
2148     row   = rstart + i;
2149     nz    = ii[i+1] - ii[i];
2150     cwork = jj;     jj += nz;
2151     vwork = aa;     aa += nz;
2152     ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2153   }
2154 
2155   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2156   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2157   *newmat = M;
2158 
2159   /* save submatrix used in processor for next request */
2160   if (call ==  MAT_INITIAL_MATRIX) {
2161     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2162     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2163   }
2164 
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 EXTERN_C_BEGIN
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
2171 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2172 {
2173   Mat_MPIAIJ     *b = (Mat_MPIAIJ *)B->data;
2174   PetscInt       m = B->m,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2175   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2176   const PetscInt *JJ;
2177   PetscScalar    *values;
2178   PetscErrorCode ierr;
2179 
2180   PetscFunctionBegin;
2181 #if defined(PETSC_OPT_g)
2182   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2183 #endif
2184   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2185   o_nnz = d_nnz + m;
2186 
2187   for (i=0; i<m; i++) {
2188     nnz     = I[i+1]- I[i];
2189     JJ      = J + I[i];
2190     nnz_max = PetscMax(nnz_max,nnz);
2191 #if defined(PETSC_OPT_g)
2192     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2193 #endif
2194     for (j=0; j<nnz; j++) {
2195       if (*JJ >= cstart) break;
2196       JJ++;
2197     }
2198     d = 0;
2199     for (; j<nnz; j++) {
2200       if (*JJ++ >= cend) break;
2201       d++;
2202     }
2203     d_nnz[i] = d;
2204     o_nnz[i] = nnz - d;
2205   }
2206   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2207   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2208 
2209   if (v) values = (PetscScalar*)v;
2210   else {
2211     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2212     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2213   }
2214 
2215   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2216   for (i=0; i<m; i++) {
2217     ii   = i + rstart;
2218     nnz  = I[i+1]- I[i];
2219     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+I[i],values+(v ? I[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2220   }
2221   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2222   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2223   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2224 
2225   if (!v) {
2226     ierr = PetscFree(values);CHKERRQ(ierr);
2227   }
2228   PetscFunctionReturn(0);
2229 }
2230 EXTERN_C_END
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
2234 /*@C
2235    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2236    (the default parallel PETSc format).
2237 
2238    Collective on MPI_Comm
2239 
2240    Input Parameters:
2241 +  A - the matrix
2242 .  i - the indices into j for the start of each local row (starts with zero)
2243 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2244 -  v - optional values in the matrix
2245 
2246    Level: developer
2247 
2248 .keywords: matrix, aij, compressed row, sparse, parallel
2249 
2250 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2251 @*/
2252 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2253 {
2254   PetscErrorCode ierr,(*f)(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]);
2255 
2256   PetscFunctionBegin;
2257   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2258   if (f) {
2259     ierr = (*f)(B,i,j,v);CHKERRQ(ierr);
2260   }
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatMPIAIJSetPreallocation"
2266 /*@C
2267    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
2268    (the default parallel PETSc format).  For good matrix assembly performance
2269    the user should preallocate the matrix storage by setting the parameters
2270    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2271    performance can be increased by more than a factor of 50.
2272 
2273    Collective on MPI_Comm
2274 
2275    Input Parameters:
2276 +  A - the matrix
2277 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2278            (same value is used for all local rows)
2279 .  d_nnz - array containing the number of nonzeros in the various rows of the
2280            DIAGONAL portion of the local submatrix (possibly different for each row)
2281            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2282            The size of this array is equal to the number of local rows, i.e 'm'.
2283            You must leave room for the diagonal entry even if it is zero.
2284 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2285            submatrix (same value is used for all local rows).
2286 -  o_nnz - array containing the number of nonzeros in the various rows of the
2287            OFF-DIAGONAL portion of the local submatrix (possibly different for
2288            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2289            structure. The size of this array is equal to the number
2290            of local rows, i.e 'm'.
2291 
2292    If the *_nnz parameter is given then the *_nz parameter is ignored
2293 
2294    The AIJ format (also called the Yale sparse matrix format or
2295    compressed row storage (CSR)), is fully compatible with standard Fortran 77
2296    storage.  The stored row and column indices begin with zero.  See the users manual for details.
2297 
2298    The parallel matrix is partitioned such that the first m0 rows belong to
2299    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2300    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2301 
2302    The DIAGONAL portion of the local submatrix of a processor can be defined
2303    as the submatrix which is obtained by extraction the part corresponding
2304    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2305    first row that belongs to the processor, and r2 is the last row belonging
2306    to the this processor. This is a square mxm matrix. The remaining portion
2307    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2308 
2309    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2310 
2311    Example usage:
2312 
2313    Consider the following 8x8 matrix with 34 non-zero values, that is
2314    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2315    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2316    as follows:
2317 
2318 .vb
2319             1  2  0  |  0  3  0  |  0  4
2320     Proc0   0  5  6  |  7  0  0  |  8  0
2321             9  0 10  | 11  0  0  | 12  0
2322     -------------------------------------
2323            13  0 14  | 15 16 17  |  0  0
2324     Proc1   0 18  0  | 19 20 21  |  0  0
2325             0  0  0  | 22 23  0  | 24  0
2326     -------------------------------------
2327     Proc2  25 26 27  |  0  0 28  | 29  0
2328            30  0  0  | 31 32 33  |  0 34
2329 .ve
2330 
2331    This can be represented as a collection of submatrices as:
2332 
2333 .vb
2334       A B C
2335       D E F
2336       G H I
2337 .ve
2338 
2339    Where the submatrices A,B,C are owned by proc0, D,E,F are
2340    owned by proc1, G,H,I are owned by proc2.
2341 
2342    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2343    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2344    The 'M','N' parameters are 8,8, and have the same values on all procs.
2345 
2346    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2347    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2348    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2349    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2350    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2351    matrix, ans [DF] as another SeqAIJ matrix.
2352 
2353    When d_nz, o_nz parameters are specified, d_nz storage elements are
2354    allocated for every row of the local diagonal submatrix, and o_nz
2355    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2356    One way to choose d_nz and o_nz is to use the max nonzerors per local
2357    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2358    In this case, the values of d_nz,o_nz are:
2359 .vb
2360      proc0 : dnz = 2, o_nz = 2
2361      proc1 : dnz = 3, o_nz = 2
2362      proc2 : dnz = 1, o_nz = 4
2363 .ve
2364    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2365    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2366    for proc3. i.e we are using 12+15+10=37 storage locations to store
2367    34 values.
2368 
2369    When d_nnz, o_nnz parameters are specified, the storage is specified
2370    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2371    In the above case the values for d_nnz,o_nnz are:
2372 .vb
2373      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2374      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2375      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2376 .ve
2377    Here the space allocated is sum of all the above values i.e 34, and
2378    hence pre-allocation is perfect.
2379 
2380    Level: intermediate
2381 
2382 .keywords: matrix, aij, compressed row, sparse, parallel
2383 
2384 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIAIJ(), MatMPIAIJSetPreallocationCSR(),
2385           MPIAIJ
2386 @*/
2387 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2388 {
2389   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2390 
2391   PetscFunctionBegin;
2392   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2393   if (f) {
2394     ierr = (*f)(B,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2395   }
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatCreateMPIAIJ"
2401 /*@C
2402    MatCreateMPIAIJ - Creates a sparse parallel matrix in AIJ format
2403    (the default parallel PETSc format).  For good matrix assembly performance
2404    the user should preallocate the matrix storage by setting the parameters
2405    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2406    performance can be increased by more than a factor of 50.
2407 
2408    Collective on MPI_Comm
2409 
2410    Input Parameters:
2411 +  comm - MPI communicator
2412 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2413            This value should be the same as the local size used in creating the
2414            y vector for the matrix-vector product y = Ax.
2415 .  n - This value should be the same as the local size used in creating the
2416        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2417        calculated if N is given) For square matrices n is almost always m.
2418 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2419 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2420 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
2421            (same value is used for all local rows)
2422 .  d_nnz - array containing the number of nonzeros in the various rows of the
2423            DIAGONAL portion of the local submatrix (possibly different for each row)
2424            or PETSC_NULL, if d_nz is used to specify the nonzero structure.
2425            The size of this array is equal to the number of local rows, i.e 'm'.
2426            You must leave room for the diagonal entry even if it is zero.
2427 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
2428            submatrix (same value is used for all local rows).
2429 -  o_nnz - array containing the number of nonzeros in the various rows of the
2430            OFF-DIAGONAL portion of the local submatrix (possibly different for
2431            each row) or PETSC_NULL, if o_nz is used to specify the nonzero
2432            structure. The size of this array is equal to the number
2433            of local rows, i.e 'm'.
2434 
2435    Output Parameter:
2436 .  A - the matrix
2437 
2438    Notes:
2439    If the *_nnz parameter is given then the *_nz parameter is ignored
2440 
2441    m,n,M,N parameters specify the size of the matrix, and its partitioning across
2442    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
2443    storage requirements for this matrix.
2444 
2445    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
2446    processor than it must be used on all processors that share the object for
2447    that argument.
2448 
2449    The user MUST specify either the local or global matrix dimensions
2450    (possibly both).
2451 
2452    The parallel matrix is partitioned such that the first m0 rows belong to
2453    process 0, the next m1 rows belong to process 1, the next m2 rows belong
2454    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
2455 
2456    The DIAGONAL portion of the local submatrix of a processor can be defined
2457    as the submatrix which is obtained by extraction the part corresponding
2458    to the rows r1-r2 and columns r1-r2 of the global matrix, where r1 is the
2459    first row that belongs to the processor, and r2 is the last row belonging
2460    to the this processor. This is a square mxm matrix. The remaining portion
2461    of the local submatrix (mxN) constitute the OFF-DIAGONAL portion.
2462 
2463    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
2464 
2465    When calling this routine with a single process communicator, a matrix of
2466    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
2467    type of communicator, use the construction mechanism:
2468      MatCreate(...,&A); MatSetType(A,MPIAIJ); MatMPIAIJSetPreallocation(A,...);
2469 
2470    By default, this format uses inodes (identical nodes) when possible.
2471    We search for consecutive rows with the same nonzero structure, thereby
2472    reusing matrix information to achieve increased efficiency.
2473 
2474    Options Database Keys:
2475 +  -mat_no_inode  - Do not use inodes
2476 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
2477 -  -mat_aij_oneindex - Internally use indexing starting at 1
2478         rather than 0.  Note that when calling MatSetValues(),
2479         the user still MUST index entries starting at 0!
2480 
2481 
2482    Example usage:
2483 
2484    Consider the following 8x8 matrix with 34 non-zero values, that is
2485    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
2486    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
2487    as follows:
2488 
2489 .vb
2490             1  2  0  |  0  3  0  |  0  4
2491     Proc0   0  5  6  |  7  0  0  |  8  0
2492             9  0 10  | 11  0  0  | 12  0
2493     -------------------------------------
2494            13  0 14  | 15 16 17  |  0  0
2495     Proc1   0 18  0  | 19 20 21  |  0  0
2496             0  0  0  | 22 23  0  | 24  0
2497     -------------------------------------
2498     Proc2  25 26 27  |  0  0 28  | 29  0
2499            30  0  0  | 31 32 33  |  0 34
2500 .ve
2501 
2502    This can be represented as a collection of submatrices as:
2503 
2504 .vb
2505       A B C
2506       D E F
2507       G H I
2508 .ve
2509 
2510    Where the submatrices A,B,C are owned by proc0, D,E,F are
2511    owned by proc1, G,H,I are owned by proc2.
2512 
2513    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2514    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
2515    The 'M','N' parameters are 8,8, and have the same values on all procs.
2516 
2517    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
2518    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
2519    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
2520    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
2521    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
2522    matrix, ans [DF] as another SeqAIJ matrix.
2523 
2524    When d_nz, o_nz parameters are specified, d_nz storage elements are
2525    allocated for every row of the local diagonal submatrix, and o_nz
2526    storage locations are allocated for every row of the OFF-DIAGONAL submat.
2527    One way to choose d_nz and o_nz is to use the max nonzerors per local
2528    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
2529    In this case, the values of d_nz,o_nz are:
2530 .vb
2531      proc0 : dnz = 2, o_nz = 2
2532      proc1 : dnz = 3, o_nz = 2
2533      proc2 : dnz = 1, o_nz = 4
2534 .ve
2535    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
2536    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
2537    for proc3. i.e we are using 12+15+10=37 storage locations to store
2538    34 values.
2539 
2540    When d_nnz, o_nnz parameters are specified, the storage is specified
2541    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
2542    In the above case the values for d_nnz,o_nnz are:
2543 .vb
2544      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
2545      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
2546      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
2547 .ve
2548    Here the space allocated is sum of all the above values i.e 34, and
2549    hence pre-allocation is perfect.
2550 
2551    Level: intermediate
2552 
2553 .keywords: matrix, aij, compressed row, sparse, parallel
2554 
2555 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2556           MPIAIJ
2557 @*/
2558 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2559 {
2560   PetscErrorCode ierr;
2561   PetscMPIInt    size;
2562 
2563   PetscFunctionBegin;
2564   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2565   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2566   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2567   if (size > 1) {
2568     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
2569     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2570   } else {
2571     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
2572     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
2573   }
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 #undef __FUNCT__
2578 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
2579 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2580 {
2581   Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data;
2582 
2583   PetscFunctionBegin;
2584   *Ad     = a->A;
2585   *Ao     = a->B;
2586   *colmap = a->garray;
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 #undef __FUNCT__
2591 #define __FUNCT__ "MatSetColoring_MPIAIJ"
2592 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
2593 {
2594   PetscErrorCode ierr;
2595   PetscInt       i;
2596   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2597 
2598   PetscFunctionBegin;
2599   if (coloring->ctype == IS_COLORING_LOCAL) {
2600     ISColoringValue *allcolors,*colors;
2601     ISColoring      ocoloring;
2602 
2603     /* set coloring for diagonal portion */
2604     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
2605 
2606     /* set coloring for off-diagonal portion */
2607     ierr = ISAllGatherColors(A->comm,coloring->n,coloring->colors,PETSC_NULL,&allcolors);CHKERRQ(ierr);
2608     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2609     for (i=0; i<a->B->n; i++) {
2610       colors[i] = allcolors[a->garray[i]];
2611     }
2612     ierr = PetscFree(allcolors);CHKERRQ(ierr);
2613     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2614     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2615     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2616   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
2617     ISColoringValue *colors;
2618     PetscInt             *larray;
2619     ISColoring      ocoloring;
2620 
2621     /* set coloring for diagonal portion */
2622     ierr = PetscMalloc((a->A->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2623     for (i=0; i<a->A->n; i++) {
2624       larray[i] = i + a->cstart;
2625     }
2626     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->A->n,larray,PETSC_NULL,larray);CHKERRQ(ierr);
2627     ierr = PetscMalloc((a->A->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2628     for (i=0; i<a->A->n; i++) {
2629       colors[i] = coloring->colors[larray[i]];
2630     }
2631     ierr = PetscFree(larray);CHKERRQ(ierr);
2632     ierr = ISColoringCreate(PETSC_COMM_SELF,a->A->n,colors,&ocoloring);CHKERRQ(ierr);
2633     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
2634     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2635 
2636     /* set coloring for off-diagonal portion */
2637     ierr = PetscMalloc((a->B->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
2638     ierr = ISGlobalToLocalMappingApply(A->mapping,IS_GTOLM_MASK,a->B->n,a->garray,PETSC_NULL,larray);CHKERRQ(ierr);
2639     ierr = PetscMalloc((a->B->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
2640     for (i=0; i<a->B->n; i++) {
2641       colors[i] = coloring->colors[larray[i]];
2642     }
2643     ierr = PetscFree(larray);CHKERRQ(ierr);
2644     ierr = ISColoringCreate(MPI_COMM_SELF,a->B->n,colors,&ocoloring);CHKERRQ(ierr);
2645     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
2646     ierr = ISColoringDestroy(ocoloring);CHKERRQ(ierr);
2647   } else {
2648     SETERRQ1(PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
2649   }
2650 
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 #if defined(PETSC_HAVE_ADIC)
2655 #undef __FUNCT__
2656 #define __FUNCT__ "MatSetValuesAdic_MPIAIJ"
2657 PetscErrorCode MatSetValuesAdic_MPIAIJ(Mat A,void *advalues)
2658 {
2659   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2660   PetscErrorCode ierr;
2661 
2662   PetscFunctionBegin;
2663   ierr = MatSetValuesAdic_SeqAIJ(a->A,advalues);CHKERRQ(ierr);
2664   ierr = MatSetValuesAdic_SeqAIJ(a->B,advalues);CHKERRQ(ierr);
2665   PetscFunctionReturn(0);
2666 }
2667 #endif
2668 
2669 #undef __FUNCT__
2670 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
2671 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
2672 {
2673   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2674   PetscErrorCode ierr;
2675 
2676   PetscFunctionBegin;
2677   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
2678   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 #undef __FUNCT__
2683 #define __FUNCT__ "MatMerge"
2684 /*@C
2685       MatMerge - Creates a single large PETSc matrix by concatinating sequential
2686                  matrices from each processor
2687 
2688     Collective on MPI_Comm
2689 
2690    Input Parameters:
2691 +    comm - the communicators the parallel matrix will live on
2692 .    inmat - the input sequential matrices
2693 .    n - number of local columns (or PETSC_DECIDE)
2694 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2695 
2696    Output Parameter:
2697 .    outmat - the parallel matrix generated
2698 
2699     Level: advanced
2700 
2701    Notes: The number of columns of the matrix in EACH processor MUST be the same.
2702 
2703 @*/
2704 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
2705 {
2706   PetscErrorCode ierr;
2707   PetscInt       m,N,i,rstart,nnz,I,*dnz,*onz;
2708   PetscInt       *indx;
2709   PetscScalar    *values;
2710   PetscMap       columnmap,rowmap;
2711 
2712   PetscFunctionBegin;
2713     ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
2714   /*
2715   PetscMPIInt       rank;
2716   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2717   ierr = PetscPrintf(PETSC_COMM_SELF," [%d] inmat m=%d, n=%d, N=%d\n",rank,m,n,N);
2718   */
2719   if (scall == MAT_INITIAL_MATRIX){
2720     /* count nonzeros in each row, for diagonal and off diagonal portion of matrix */
2721     if (n == PETSC_DECIDE){
2722       ierr = PetscMapCreate(comm,&columnmap);CHKERRQ(ierr);
2723       ierr = PetscMapSetSize(columnmap,N);CHKERRQ(ierr);
2724       ierr = PetscMapSetType(columnmap,MAP_MPI);CHKERRQ(ierr);
2725       ierr = PetscMapGetLocalSize(columnmap,&n);CHKERRQ(ierr);
2726       ierr = PetscMapDestroy(columnmap);CHKERRQ(ierr);
2727     }
2728 
2729     ierr = PetscMapCreate(comm,&rowmap);CHKERRQ(ierr);
2730     ierr = PetscMapSetLocalSize(rowmap,m);CHKERRQ(ierr);
2731     ierr = PetscMapSetType(rowmap,MAP_MPI);CHKERRQ(ierr);
2732     ierr = PetscMapGetLocalRange(rowmap,&rstart,0);CHKERRQ(ierr);
2733     ierr = PetscMapDestroy(rowmap);CHKERRQ(ierr);
2734 
2735     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
2736     for (i=0;i<m;i++) {
2737       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2738       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
2739       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,PETSC_NULL);CHKERRQ(ierr);
2740     }
2741     /* This routine will ONLY return MPIAIJ type matrix */
2742     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
2743     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2744     ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
2745     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
2746     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
2747 
2748   } else if (scall == MAT_REUSE_MATRIX){
2749     ierr = MatGetOwnershipRange(*outmat,&rstart,PETSC_NULL);CHKERRQ(ierr);
2750   } else {
2751     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
2752   }
2753 
2754   for (i=0;i<m;i++) {
2755     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2756     I    = i + rstart;
2757     ierr = MatSetValues(*outmat,1,&I,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2758     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
2759   }
2760   ierr = MatDestroy(inmat);CHKERRQ(ierr);
2761   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2762   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2763 
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 #undef __FUNCT__
2768 #define __FUNCT__ "MatFileSplit"
2769 PetscErrorCode MatFileSplit(Mat A,char *outfile)
2770 {
2771   PetscErrorCode    ierr;
2772   PetscMPIInt       rank;
2773   PetscInt          m,N,i,rstart,nnz;
2774   size_t            len;
2775   const PetscInt    *indx;
2776   PetscViewer       out;
2777   char              *name;
2778   Mat               B;
2779   const PetscScalar *values;
2780 
2781   PetscFunctionBegin;
2782   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
2783   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
2784   /* Should this be the type of the diagonal block of A? */
2785   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2786   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
2787   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2788   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
2789   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
2790   for (i=0;i<m;i++) {
2791     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2792     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
2793     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
2794   }
2795   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2796   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2797 
2798   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2799   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
2800   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
2801   sprintf(name,"%s.%d",outfile,rank);
2802   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,PETSC_FILE_CREATE,&out);CHKERRQ(ierr);
2803   ierr = PetscFree(name);
2804   ierr = MatView(B,out);CHKERRQ(ierr);
2805   ierr = PetscViewerDestroy(out);CHKERRQ(ierr);
2806   ierr = MatDestroy(B);CHKERRQ(ierr);
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 EXTERN PetscErrorCode MatDestroy_MPIAIJ(Mat);
2811 #undef __FUNCT__
2812 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
2813 PetscErrorCode PETSCMAT_DLLEXPORT MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
2814 {
2815   PetscErrorCode       ierr;
2816   Mat_Merge_SeqsToMPI  *merge;
2817   PetscObjectContainer container;
2818 
2819   PetscFunctionBegin;
2820   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2821   if (container) {
2822     ierr = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2823     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
2824     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
2825     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
2826     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
2827     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
2828     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
2829     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
2830     ierr = PetscMapDestroy(merge->rowmap);CHKERRQ(ierr);
2831     if (merge->coi){ierr = PetscFree(merge->coi);CHKERRQ(ierr);}
2832     if (merge->coj){ierr = PetscFree(merge->coj);CHKERRQ(ierr);}
2833     if (merge->owners_co){ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);}
2834 
2835     ierr = PetscObjectContainerDestroy(container);CHKERRQ(ierr);
2836     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
2837   }
2838   ierr = PetscFree(merge);CHKERRQ(ierr);
2839 
2840   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 #include "src/mat/utils/freespace.h"
2845 #include "petscbt.h"
2846 static PetscEvent logkey_seqstompinum = 0;
2847 #undef __FUNCT__
2848 #define __FUNCT__ "MatMerge_SeqsToMPINumeric"
2849 /*@C
2850       MatMerge_SeqsToMPI - Creates a MPIAIJ matrix by adding sequential
2851                  matrices from each processor
2852 
2853     Collective on MPI_Comm
2854 
2855    Input Parameters:
2856 +    comm - the communicators the parallel matrix will live on
2857 .    seqmat - the input sequential matrices
2858 .    m - number of local rows (or PETSC_DECIDE)
2859 .    n - number of local columns (or PETSC_DECIDE)
2860 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
2861 
2862    Output Parameter:
2863 .    mpimat - the parallel matrix generated
2864 
2865     Level: advanced
2866 
2867    Notes:
2868      The dimensions of the sequential matrix in each processor MUST be the same.
2869      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
2870      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
2871 @*/
2872 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPINumeric(Mat seqmat,Mat mpimat)
2873 {
2874   PetscErrorCode       ierr;
2875   MPI_Comm             comm=mpimat->comm;
2876   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
2877   PetscMPIInt          size,rank,taga,*len_s;
2878   PetscInt             N=mpimat->N,i,j,*owners,*ai=a->i,*aj=a->j;
2879   PetscInt             proc,m;
2880   PetscInt             **buf_ri,**buf_rj;
2881   PetscInt             k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
2882   PetscInt             nrows,**buf_ri_k,**nextrow,**nextai;
2883   MPI_Request          *s_waits,*r_waits;
2884   MPI_Status           *status;
2885   MatScalar            *aa=a->a,**abuf_r,*ba_i;
2886   Mat_Merge_SeqsToMPI  *merge;
2887   PetscObjectContainer container;
2888 
2889   PetscFunctionBegin;
2890   if (!logkey_seqstompinum) {
2891     ierr = PetscLogEventRegister(&logkey_seqstompinum,"MatMerge_SeqsToMPINumeric",MAT_COOKIE);
2892   }
2893   ierr = PetscLogEventBegin(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2894 
2895   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2896   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2897 
2898   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject *)&container);CHKERRQ(ierr);
2899   if (container) {
2900     ierr  = PetscObjectContainerGetPointer(container,(void **)&merge);CHKERRQ(ierr);
2901   }
2902   bi     = merge->bi;
2903   bj     = merge->bj;
2904   buf_ri = merge->buf_ri;
2905   buf_rj = merge->buf_rj;
2906 
2907   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
2908   ierr   = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
2909   len_s  = merge->len_s;
2910 
2911   /* send and recv matrix values */
2912   /*-----------------------------*/
2913   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&taga);CHKERRQ(ierr);
2914   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
2915 
2916   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
2917   for (proc=0,k=0; proc<size; proc++){
2918     if (!len_s[proc]) continue;
2919     i = owners[proc];
2920     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
2921     k++;
2922   }
2923 
2924   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
2925   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
2926   ierr = PetscFree(status);CHKERRQ(ierr);
2927 
2928   ierr = PetscFree(s_waits);CHKERRQ(ierr);
2929   ierr = PetscFree(r_waits);CHKERRQ(ierr);
2930 
2931   /* insert mat values of mpimat */
2932   /*----------------------------*/
2933   ierr = PetscMalloc(N*sizeof(MatScalar),&ba_i);CHKERRQ(ierr);
2934   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
2935   nextrow = buf_ri_k + merge->nrecv;
2936   nextai  = nextrow + merge->nrecv;
2937 
2938   for (k=0; k<merge->nrecv; k++){
2939     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
2940     nrows = *(buf_ri_k[k]);
2941     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
2942     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
2943   }
2944 
2945   /* set values of ba */
2946   ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr);
2947   for (i=0; i<m; i++) {
2948     arow = owners[rank] + i;
2949     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
2950     bnzi = bi[i+1] - bi[i];
2951     ierr = PetscMemzero(ba_i,bnzi*sizeof(MatScalar));CHKERRQ(ierr);
2952 
2953     /* add local non-zero vals of this proc's seqmat into ba */
2954     anzi = ai[arow+1] - ai[arow];
2955     aj   = a->j + ai[arow];
2956     aa   = a->a + ai[arow];
2957     nextaj = 0;
2958     for (j=0; nextaj<anzi; j++){
2959       if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2960         ba_i[j] += aa[nextaj++];
2961       }
2962     }
2963 
2964     /* add received vals into ba */
2965     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
2966       /* i-th row */
2967       if (i == *nextrow[k]) {
2968         anzi = *(nextai[k]+1) - *nextai[k];
2969         aj   = buf_rj[k] + *(nextai[k]);
2970         aa   = abuf_r[k] + *(nextai[k]);
2971         nextaj = 0;
2972         for (j=0; nextaj<anzi; j++){
2973           if (*(bj_i + j) == aj[nextaj]){ /* bcol == acol */
2974             ba_i[j] += aa[nextaj++];
2975           }
2976         }
2977         nextrow[k]++; nextai[k]++;
2978       }
2979     }
2980     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
2981   }
2982   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2983   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2984 
2985   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
2986   ierr = PetscFree(ba_i);CHKERRQ(ierr);
2987   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
2988   ierr = PetscLogEventEnd(logkey_seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
2989   PetscFunctionReturn(0);
2990 }
2991 
2992 static PetscEvent logkey_seqstompisym = 0;
2993 #undef __FUNCT__
2994 #define __FUNCT__ "MatMerge_SeqsToMPISymbolic"
2995 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPISymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
2996 {
2997   PetscErrorCode       ierr;
2998   Mat                  B_mpi;
2999   Mat_SeqAIJ           *a=(Mat_SeqAIJ*)seqmat->data;
3000   PetscMPIInt          size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
3001   PetscInt             **buf_rj,**buf_ri,**buf_ri_k;
3002   PetscInt             M=seqmat->m,N=seqmat->n,i,*owners,*ai=a->i,*aj=a->j;
3003   PetscInt             len,proc,*dnz,*onz;
3004   PetscInt             k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
3005   PetscInt             nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
3006   MPI_Request          *si_waits,*sj_waits,*ri_waits,*rj_waits;
3007   MPI_Status           *status;
3008   FreeSpaceList        free_space=PETSC_NULL,current_space=PETSC_NULL;
3009   PetscBT              lnkbt;
3010   Mat_Merge_SeqsToMPI  *merge;
3011   PetscObjectContainer container;
3012 
3013   PetscFunctionBegin;
3014   if (!logkey_seqstompisym) {
3015     ierr = PetscLogEventRegister(&logkey_seqstompisym,"MatMerge_SeqsToMPISymbolic",MAT_COOKIE);
3016   }
3017   ierr = PetscLogEventBegin(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3018 
3019   /* make sure it is a PETSc comm */
3020   ierr = PetscCommDuplicate(comm,&comm,PETSC_NULL);CHKERRQ(ierr);
3021   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3022   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3023 
3024   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
3025   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
3026 
3027   /* determine row ownership */
3028   /*---------------------------------------------------------*/
3029   ierr = PetscMapCreate(comm,&merge->rowmap);CHKERRQ(ierr);
3030   if (m == PETSC_DECIDE) {
3031     ierr = PetscMapSetSize(merge->rowmap,M);CHKERRQ(ierr);
3032   } else {
3033     ierr = PetscMapSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
3034   }
3035   ierr = PetscMapSetType(merge->rowmap,MAP_MPI);CHKERRQ(ierr);
3036   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
3037   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
3038 
3039   if (m == PETSC_DECIDE) {ierr = PetscMapGetLocalSize(merge->rowmap,&m);CHKERRQ(ierr); }
3040   ierr = PetscMapGetGlobalRange(merge->rowmap,&owners);CHKERRQ(ierr);
3041 
3042   /* determine the number of messages to send, their lengths */
3043   /*---------------------------------------------------------*/
3044   len_s  = merge->len_s;
3045 
3046   len = 0;  /* length of buf_si[] */
3047   merge->nsend = 0;
3048   for (proc=0; proc<size; proc++){
3049     len_si[proc] = 0;
3050     if (proc == rank){
3051       len_s[proc] = 0;
3052     } else {
3053       len_si[proc] = owners[proc+1] - owners[proc] + 1;
3054       len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
3055     }
3056     if (len_s[proc]) {
3057       merge->nsend++;
3058       nrows = 0;
3059       for (i=owners[proc]; i<owners[proc+1]; i++){
3060         if (ai[i+1] > ai[i]) nrows++;
3061       }
3062       len_si[proc] = 2*(nrows+1);
3063       len += len_si[proc];
3064     }
3065   }
3066 
3067   /* determine the number and length of messages to receive for ij-structure */
3068   /*-------------------------------------------------------------------------*/
3069   ierr = PetscGatherNumberOfMessages(comm,PETSC_NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
3070   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
3071 
3072   /* post the Irecv of j-structure */
3073   /*-------------------------------*/
3074   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagj);CHKERRQ(ierr);
3075   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
3076 
3077   /* post the Isend of j-structure */
3078   /*--------------------------------*/
3079   ierr = PetscMalloc((2*merge->nsend+1)*sizeof(MPI_Request),&si_waits);CHKERRQ(ierr);
3080   sj_waits = si_waits + merge->nsend;
3081 
3082   for (proc=0, k=0; proc<size; proc++){
3083     if (!len_s[proc]) continue;
3084     i = owners[proc];
3085     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
3086     k++;
3087   }
3088 
3089   /* receives and sends of j-structure are complete */
3090   /*------------------------------------------------*/
3091   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
3092   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
3093 
3094   /* send and recv i-structure */
3095   /*---------------------------*/
3096   ierr = PetscObjectGetNewTag((PetscObject)merge->rowmap,&tagi);CHKERRQ(ierr);
3097   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
3098 
3099   ierr = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
3100   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
3101   for (proc=0,k=0; proc<size; proc++){
3102     if (!len_s[proc]) continue;
3103     /* form outgoing message for i-structure:
3104          buf_si[0]:                 nrows to be sent
3105                [1:nrows]:           row index (global)
3106                [nrows+1:2*nrows+1]: i-structure index
3107     */
3108     /*-------------------------------------------*/
3109     nrows = len_si[proc]/2 - 1;
3110     buf_si_i    = buf_si + nrows+1;
3111     buf_si[0]   = nrows;
3112     buf_si_i[0] = 0;
3113     nrows = 0;
3114     for (i=owners[proc]; i<owners[proc+1]; i++){
3115       anzi = ai[i+1] - ai[i];
3116       if (anzi) {
3117         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
3118         buf_si[nrows+1] = i-owners[proc]; /* local row index */
3119         nrows++;
3120       }
3121     }
3122     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
3123     k++;
3124     buf_si += len_si[proc];
3125   }
3126 
3127   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
3128   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
3129 
3130   ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI: nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv));CHKERRQ(ierr);
3131   for (i=0; i<merge->nrecv; i++){
3132     ierr = PetscLogInfo(((PetscObject)(seqmat),"MatMerge_SeqsToMPI:   recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]));CHKERRQ(ierr);
3133   }
3134 
3135   ierr = PetscFree(len_si);CHKERRQ(ierr);
3136   ierr = PetscFree(len_ri);CHKERRQ(ierr);
3137   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
3138   ierr = PetscFree(si_waits);CHKERRQ(ierr);
3139   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
3140   ierr = PetscFree(buf_s);CHKERRQ(ierr);
3141   ierr = PetscFree(status);CHKERRQ(ierr);
3142 
3143   /* compute a local seq matrix in each processor */
3144   /*----------------------------------------------*/
3145   /* allocate bi array and free space for accumulating nonzero column info */
3146   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
3147   bi[0] = 0;
3148 
3149   /* create and initialize a linked list */
3150   nlnk = N+1;
3151   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3152 
3153   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
3154   len = 0;
3155   len  = ai[owners[rank+1]] - ai[owners[rank]];
3156   ierr = GetMoreSpace((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
3157   current_space = free_space;
3158 
3159   /* determine symbolic info for each local row */
3160   ierr = PetscMalloc((3*merge->nrecv+1)*sizeof(PetscInt**),&buf_ri_k);CHKERRQ(ierr);
3161   nextrow = buf_ri_k + merge->nrecv;
3162   nextai  = nextrow + merge->nrecv;
3163   for (k=0; k<merge->nrecv; k++){
3164     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
3165     nrows = *buf_ri_k[k];
3166     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
3167     nextai[k]   = buf_ri_k[k] + (nrows + 1);/* poins to the next i-structure of k-th recved i-structure  */
3168   }
3169 
3170   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
3171   len = 0;
3172   for (i=0;i<m;i++) {
3173     bnzi   = 0;
3174     /* add local non-zero cols of this proc's seqmat into lnk */
3175     arow   = owners[rank] + i;
3176     anzi   = ai[arow+1] - ai[arow];
3177     aj     = a->j + ai[arow];
3178     ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3179     bnzi += nlnk;
3180     /* add received col data into lnk */
3181     for (k=0; k<merge->nrecv; k++){ /* k-th received message */
3182       if (i == *nextrow[k]) { /* i-th row */
3183         anzi = *(nextai[k]+1) - *nextai[k];
3184         aj   = buf_rj[k] + *nextai[k];
3185         ierr = PetscLLAdd(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
3186         bnzi += nlnk;
3187         nextrow[k]++; nextai[k]++;
3188       }
3189     }
3190     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
3191 
3192     /* if free space is not available, make more free space */
3193     if (current_space->local_remaining<bnzi) {
3194       ierr = GetMoreSpace(current_space->total_array_size,&current_space);CHKERRQ(ierr);
3195       nspacedouble++;
3196     }
3197     /* copy data into free space, then initialize lnk */
3198     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
3199     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
3200 
3201     current_space->array           += bnzi;
3202     current_space->local_used      += bnzi;
3203     current_space->local_remaining -= bnzi;
3204 
3205     bi[i+1] = bi[i] + bnzi;
3206   }
3207 
3208   ierr = PetscFree(buf_ri_k);CHKERRQ(ierr);
3209 
3210   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
3211   ierr = MakeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
3212   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
3213 
3214   /* create symbolic parallel matrix B_mpi */
3215   /*---------------------------------------*/
3216   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
3217   if (n==PETSC_DECIDE) {
3218     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
3219   } else {
3220     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3221   }
3222   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
3223   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
3224   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3225 
3226   /* B_mpi is not ready for use - assembly will be done by MatMerge_SeqsToMPINumeric() */
3227   B_mpi->assembled     = PETSC_FALSE;
3228   B_mpi->ops->destroy  = MatDestroy_MPIAIJ_SeqsToMPI;
3229   merge->bi            = bi;
3230   merge->bj            = bj;
3231   merge->buf_ri        = buf_ri;
3232   merge->buf_rj        = buf_rj;
3233   merge->coi           = PETSC_NULL;
3234   merge->coj           = PETSC_NULL;
3235   merge->owners_co     = PETSC_NULL;
3236 
3237   /* attach the supporting struct to B_mpi for reuse */
3238   ierr = PetscObjectContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3239   ierr = PetscObjectContainerSetPointer(container,merge);CHKERRQ(ierr);
3240   ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
3241   *mpimat = B_mpi;
3242 
3243   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
3244   ierr = PetscLogEventEnd(logkey_seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
3245   PetscFunctionReturn(0);
3246 }
3247 
3248 static PetscEvent logkey_seqstompi = 0;
3249 #undef __FUNCT__
3250 #define __FUNCT__ "MatMerge_SeqsToMPI"
3251 PetscErrorCode PETSCMAT_DLLEXPORT MatMerge_SeqsToMPI(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
3252 {
3253   PetscErrorCode   ierr;
3254 
3255   PetscFunctionBegin;
3256   if (!logkey_seqstompi) {
3257     ierr = PetscLogEventRegister(&logkey_seqstompi,"MatMerge_SeqsToMPI",MAT_COOKIE);
3258   }
3259   ierr = PetscLogEventBegin(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3260   if (scall == MAT_INITIAL_MATRIX){
3261     ierr = MatMerge_SeqsToMPISymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
3262   }
3263   ierr = MatMerge_SeqsToMPINumeric(seqmat,*mpimat);CHKERRQ(ierr);
3264   ierr = PetscLogEventEnd(logkey_seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267 static PetscEvent logkey_getlocalmat = 0;
3268 #undef __FUNCT__
3269 #define __FUNCT__ "MatGetLocalMat"
3270 /*@C
3271      MatGetLocalMat - Creates a SeqAIJ matrix by taking all its local rows
3272 
3273     Not Collective
3274 
3275    Input Parameters:
3276 +    A - the matrix
3277 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3278 
3279    Output Parameter:
3280 .    A_loc - the local sequential matrix generated
3281 
3282     Level: developer
3283 
3284 @*/
3285 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
3286 {
3287   PetscErrorCode  ierr;
3288   Mat_MPIAIJ      *mpimat=(Mat_MPIAIJ*)A->data;
3289   Mat_SeqAIJ      *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
3290   PetscInt        *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
3291   PetscScalar     *aa=a->a,*ba=b->a,*ca;
3292   PetscInt        am=A->m,i,j,k,cstart=mpimat->cstart;
3293   PetscInt        *ci,*cj,col,ncols_d,ncols_o,jo;
3294 
3295   PetscFunctionBegin;
3296   if (!logkey_getlocalmat) {
3297     ierr = PetscLogEventRegister(&logkey_getlocalmat,"MatGetLocalMat",MAT_COOKIE);
3298   }
3299   ierr = PetscLogEventBegin(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3300   if (scall == MAT_INITIAL_MATRIX){
3301     ierr = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
3302     ci[0] = 0;
3303     for (i=0; i<am; i++){
3304       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
3305     }
3306     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
3307     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
3308     k = 0;
3309     for (i=0; i<am; i++) {
3310       ncols_o = bi[i+1] - bi[i];
3311       ncols_d = ai[i+1] - ai[i];
3312       /* off-diagonal portion of A */
3313       for (jo=0; jo<ncols_o; jo++) {
3314         col = cmap[*bj];
3315         if (col >= cstart) break;
3316         cj[k]   = col; bj++;
3317         ca[k++] = *ba++;
3318       }
3319       /* diagonal portion of A */
3320       for (j=0; j<ncols_d; j++) {
3321         cj[k]   = cstart + *aj++;
3322         ca[k++] = *aa++;
3323       }
3324       /* off-diagonal portion of A */
3325       for (j=jo; j<ncols_o; j++) {
3326         cj[k]   = cmap[*bj++];
3327         ca[k++] = *ba++;
3328       }
3329     }
3330     /* put together the new matrix */
3331     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
3332     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3333     /* Since these are PETSc arrays, change flags to free them as necessary. */
3334     mat = (Mat_SeqAIJ*)(*A_loc)->data;
3335     mat->freedata = PETSC_TRUE;
3336     mat->nonew    = 0;
3337   } else if (scall == MAT_REUSE_MATRIX){
3338     mat=(Mat_SeqAIJ*)(*A_loc)->data;
3339     ci = mat->i; cj = mat->j; ca = mat->a;
3340     for (i=0; i<am; i++) {
3341       /* off-diagonal portion of A */
3342       ncols_o = bi[i+1] - bi[i];
3343       for (jo=0; jo<ncols_o; jo++) {
3344         col = cmap[*bj];
3345         if (col >= cstart) break;
3346         *ca++ = *ba++; bj++;
3347       }
3348       /* diagonal portion of A */
3349       ncols_d = ai[i+1] - ai[i];
3350       for (j=0; j<ncols_d; j++) *ca++ = *aa++;
3351       /* off-diagonal portion of A */
3352       for (j=jo; j<ncols_o; j++) {
3353         *ca++ = *ba++; bj++;
3354       }
3355     }
3356   } else {
3357     SETERRQ1(PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
3358   }
3359 
3360   ierr = PetscLogEventEnd(logkey_getlocalmat,A,0,0,0);CHKERRQ(ierr);
3361   PetscFunctionReturn(0);
3362 }
3363 
3364 static PetscEvent logkey_getlocalmatcondensed = 0;
3365 #undef __FUNCT__
3366 #define __FUNCT__ "MatGetLocalMatCondensed"
3367 /*@C
3368      MatGetLocalMatCondensed - Creates a SeqAIJ matrix by taking all its local rows and NON-ZERO columns
3369 
3370     Not Collective
3371 
3372    Input Parameters:
3373 +    A - the matrix
3374 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3375 -    row, col - index sets of rows and columns to extract (or PETSC_NULL)
3376 
3377    Output Parameter:
3378 .    A_loc - the local sequential matrix generated
3379 
3380     Level: developer
3381 
3382 @*/
3383 PetscErrorCode PETSCMAT_DLLEXPORT MatGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
3384 {
3385   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data;
3386   PetscErrorCode    ierr;
3387   PetscInt          i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
3388   IS                isrowa,iscola;
3389   Mat               *aloc;
3390 
3391   PetscFunctionBegin;
3392   if (!logkey_getlocalmatcondensed) {
3393     ierr = PetscLogEventRegister(&logkey_getlocalmatcondensed,"MatGetLocalMatCondensed",MAT_COOKIE);
3394   }
3395   ierr = PetscLogEventBegin(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3396   if (!row){
3397     start = a->rstart; end = a->rend;
3398     ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
3399   } else {
3400     isrowa = *row;
3401   }
3402   if (!col){
3403     start = a->cstart;
3404     cmap  = a->garray;
3405     nzA   = a->A->n;
3406     nzB   = a->B->n;
3407     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3408     ncols = 0;
3409     for (i=0; i<nzB; i++) {
3410       if (cmap[i] < start) idx[ncols++] = cmap[i];
3411       else break;
3412     }
3413     imark = i;
3414     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
3415     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
3416     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&iscola);CHKERRQ(ierr);
3417     ierr = PetscFree(idx);CHKERRQ(ierr);
3418   } else {
3419     iscola = *col;
3420   }
3421   if (scall != MAT_INITIAL_MATRIX){
3422     ierr = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
3423     aloc[0] = *A_loc;
3424   }
3425   ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
3426   *A_loc = aloc[0];
3427   ierr = PetscFree(aloc);CHKERRQ(ierr);
3428   if (!row){
3429     ierr = ISDestroy(isrowa);CHKERRQ(ierr);
3430   }
3431   if (!col){
3432     ierr = ISDestroy(iscola);CHKERRQ(ierr);
3433   }
3434   ierr = PetscLogEventEnd(logkey_getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
3435   PetscFunctionReturn(0);
3436 }
3437 
3438 static PetscEvent logkey_GetBrowsOfAcols = 0;
3439 #undef __FUNCT__
3440 #define __FUNCT__ "MatGetBrowsOfAcols"
3441 /*@C
3442     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
3443 
3444     Collective on Mat
3445 
3446    Input Parameters:
3447 +    A,B - the matrices in mpiaij format
3448 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3449 -    rowb, colb - index sets of rows and columns of B to extract (or PETSC_NULL)
3450 
3451    Output Parameter:
3452 +    rowb, colb - index sets of rows and columns of B to extract
3453 .    brstart - row index of B_seq from which next B->m rows are taken from B's local rows
3454 -    B_seq - the sequential matrix generated
3455 
3456     Level: developer
3457 
3458 @*/
3459 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,PetscInt *brstart,Mat *B_seq)
3460 {
3461   Mat_MPIAIJ        *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3462   PetscErrorCode    ierr;
3463   PetscInt          *idx,i,start,ncols,nzA,nzB,*cmap,imark;
3464   IS                isrowb,iscolb;
3465   Mat               *bseq;
3466 
3467   PetscFunctionBegin;
3468   if (a->cstart != b->rstart || a->cend != b->rend){
3469     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",a->cstart,a->cend,b->rstart,b->rend);
3470   }
3471   if (!logkey_GetBrowsOfAcols) {
3472     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAcols,"MatGetBrowsOfAcols",MAT_COOKIE);
3473   }
3474   ierr = PetscLogEventBegin(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3475 
3476   if (scall == MAT_INITIAL_MATRIX){
3477     start = a->cstart;
3478     cmap  = a->garray;
3479     nzA   = a->A->n;
3480     nzB   = a->B->n;
3481     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
3482     ncols = 0;
3483     for (i=0; i<nzB; i++) {  /* row < local row index */
3484       if (cmap[i] < start) idx[ncols++] = cmap[i];
3485       else break;
3486     }
3487     imark = i;
3488     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
3489     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
3490     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,&isrowb);CHKERRQ(ierr);
3491     ierr = PetscFree(idx);CHKERRQ(ierr);
3492     *brstart = imark;
3493     ierr = ISCreateStride(PETSC_COMM_SELF,B->N,0,1,&iscolb);CHKERRQ(ierr);
3494   } else {
3495     if (!rowb || !colb) SETERRQ(PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
3496     isrowb = *rowb; iscolb = *colb;
3497     ierr = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
3498     bseq[0] = *B_seq;
3499   }
3500   ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
3501   *B_seq = bseq[0];
3502   ierr = PetscFree(bseq);CHKERRQ(ierr);
3503   if (!rowb){
3504     ierr = ISDestroy(isrowb);CHKERRQ(ierr);
3505   } else {
3506     *rowb = isrowb;
3507   }
3508   if (!colb){
3509     ierr = ISDestroy(iscolb);CHKERRQ(ierr);
3510   } else {
3511     *colb = iscolb;
3512   }
3513   ierr = PetscLogEventEnd(logkey_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
3514   PetscFunctionReturn(0);
3515 }
3516 
3517 static PetscEvent logkey_GetBrowsOfAocols = 0;
3518 #undef __FUNCT__
3519 #define __FUNCT__ "MatGetBrowsOfAoCols"
3520 /*@C
3521     MatGetBrowsOfAoCols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
3522     of the OFF-DIAGONAL portion of local A
3523 
3524     Collective on Mat
3525 
3526    Input Parameters:
3527 +    A,B - the matrices in mpiaij format
3528 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
3529 .    startsj - starting point in B's sending and receiving j-arrays, saved for MAT_REUSE (or PETSC_NULL)
3530 -    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or PETSC_NULL)
3531 
3532    Output Parameter:
3533 +    B_oth - the sequential matrix generated
3534 
3535     Level: developer
3536 
3537 @*/
3538 PetscErrorCode PETSCMAT_DLLEXPORT MatGetBrowsOfAoCols(Mat A,Mat B,MatReuse scall,PetscInt **startsj,PetscScalar **bufa_ptr,Mat *B_oth)
3539 {
3540   VecScatter_MPI_General *gen_to,*gen_from;
3541   PetscErrorCode         ierr;
3542   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data,*b=(Mat_MPIAIJ*)B->data;
3543   Mat_SeqAIJ             *b_oth;
3544   VecScatter             ctx=a->Mvctx;
3545   MPI_Comm               comm=ctx->comm;
3546   PetscMPIInt            *rprocs,*sprocs,tag=ctx->tag,rank;
3547   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->n,row,*b_othi,*b_othj;
3548   PetscScalar            *rvalues,*svalues,*b_otha,*bufa,*bufA;
3549   PetscInt               i,k,l,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
3550   MPI_Request            *rwaits,*swaits;
3551   MPI_Status             *sstatus,rstatus;
3552   PetscInt               *cols;
3553   PetscScalar            *vals;
3554   PetscMPIInt            j;
3555 
3556   PetscFunctionBegin;
3557   if (a->cstart != b->rstart || a->cend != b->rend){
3558     SETERRQ4(PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",a->cstart,a->cend,b->rstart,b->rend);
3559   }
3560   if (!logkey_GetBrowsOfAocols) {
3561     ierr = PetscLogEventRegister(&logkey_GetBrowsOfAocols,"MatGetBrAoCol",MAT_COOKIE);
3562   }
3563   ierr = PetscLogEventBegin(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3564   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3565 
3566   gen_to   = (VecScatter_MPI_General*)ctx->todata;
3567   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
3568   rvalues  = gen_from->values; /* holds the length of sending row */
3569   svalues  = gen_to->values;   /* holds the length of receiving row */
3570   nrecvs   = gen_from->n;
3571   nsends   = gen_to->n;
3572   rwaits   = gen_from->requests;
3573   swaits   = gen_to->requests;
3574   srow     = gen_to->indices;   /* local row index to be sent */
3575   rstarts  = gen_from->starts;
3576   sstarts  = gen_to->starts;
3577   rprocs   = gen_from->procs;
3578   sprocs   = gen_to->procs;
3579   sstatus  = gen_to->sstatus;
3580 
3581   if (!startsj || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
3582   if (scall == MAT_INITIAL_MATRIX){
3583     /* i-array */
3584     /*---------*/
3585     /*  post receives */
3586     for (i=0; i<nrecvs; i++){
3587       rowlen = (PetscInt*)rvalues + rstarts[i];
3588       nrows = rstarts[i+1]-rstarts[i];
3589       ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3590     }
3591 
3592     /* pack the outgoing message */
3593     ierr = PetscMalloc((nsends+nrecvs+3)*sizeof(PetscInt),&sstartsj);CHKERRQ(ierr);
3594     rstartsj = sstartsj + nsends +1;
3595     sstartsj[0] = 0;  rstartsj[0] = 0;
3596     len = 0; /* total length of j or a array to be sent */
3597     k = 0;
3598     for (i=0; i<nsends; i++){
3599       rowlen = (PetscInt*)svalues + sstarts[i];
3600       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3601       for (j=0; j<nrows; j++) {
3602         row = srow[k] + b->rowners[rank]; /* global row idx */
3603         ierr = MatGetRow_MPIAIJ(B,row,&rowlen[j],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr); /* rowlength */
3604         len += rowlen[j];
3605         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
3606         k++;
3607       }
3608       ierr = MPI_Isend(rowlen,nrows,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3609        sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
3610     }
3611     /* recvs and sends of i-array are completed */
3612     i = nrecvs;
3613     while (i--) {
3614       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3615     }
3616     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3617     /* allocate buffers for sending j and a arrays */
3618     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
3619     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
3620 
3621     /* create i-array of B_oth */
3622     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
3623     b_othi[0] = 0;
3624     len = 0; /* total length of j or a array to be received */
3625     k = 0;
3626     for (i=0; i<nrecvs; i++){
3627       rowlen = (PetscInt*)rvalues + rstarts[i];
3628       nrows = rstarts[i+1]-rstarts[i];
3629       for (j=0; j<nrows; j++) {
3630         b_othi[k+1] = b_othi[k] + rowlen[j];
3631         len += rowlen[j]; k++;
3632       }
3633       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
3634     }
3635 
3636     /* allocate space for j and a arrrays of B_oth */
3637     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
3638     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscScalar),&b_otha);CHKERRQ(ierr);
3639 
3640     /* j-array */
3641     /*---------*/
3642     /*  post receives of j-array */
3643     for (i=0; i<nrecvs; i++){
3644       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3645       ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3646     }
3647     k = 0;
3648     for (i=0; i<nsends; i++){
3649       nrows = sstarts[i+1]-sstarts[i]; /* num of rows */
3650       bufJ = bufj+sstartsj[i];
3651       for (j=0; j<nrows; j++) {
3652         row  = srow[k++] + b->rowners[rank]; /* global row idx */
3653         ierr = MatGetRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3654         for (l=0; l<ncols; l++){
3655           *bufJ++ = cols[l];
3656         }
3657         ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,&cols,PETSC_NULL);CHKERRQ(ierr);
3658       }
3659       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3660     }
3661 
3662     /* recvs and sends of j-array are completed */
3663     i = nrecvs;
3664     while (i--) {
3665       ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3666     }
3667     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3668   } else if (scall == MAT_REUSE_MATRIX){
3669     sstartsj = *startsj;
3670     rstartsj = sstartsj + nsends +1;
3671     bufa     = *bufa_ptr;
3672     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
3673     b_otha   = b_oth->a;
3674   } else {
3675     SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
3676   }
3677 
3678   /* a-array */
3679   /*---------*/
3680   /*  post receives of a-array */
3681   for (i=0; i<nrecvs; i++){
3682     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
3683     ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
3684   }
3685   k = 0;
3686   for (i=0; i<nsends; i++){
3687     nrows = sstarts[i+1]-sstarts[i];
3688     bufA = bufa+sstartsj[i];
3689     for (j=0; j<nrows; j++) {
3690       row  = srow[k++] + b->rowners[rank]; /* global row idx */
3691       ierr = MatGetRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3692       for (l=0; l<ncols; l++){
3693         *bufA++ = vals[l];
3694       }
3695       ierr = MatRestoreRow_MPIAIJ(B,row,&ncols,PETSC_NULL,&vals);CHKERRQ(ierr);
3696 
3697     }
3698     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
3699   }
3700   /* recvs and sends of a-array are completed */
3701   i = nrecvs;
3702   while (i--) {
3703     ierr = MPI_Waitany(nrecvs,rwaits,&j,&rstatus);CHKERRQ(ierr);
3704   }
3705    if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
3706 
3707   if (scall == MAT_INITIAL_MATRIX){
3708     /* put together the new matrix */
3709     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
3710 
3711     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
3712     /* Since these are PETSc arrays, change flags to free them as necessary. */
3713     b_oth = (Mat_SeqAIJ *)(*B_oth)->data;
3714     b_oth->freedata = PETSC_TRUE;
3715     b_oth->nonew    = 0;
3716 
3717     ierr = PetscFree(bufj);CHKERRQ(ierr);
3718     if (!startsj || !bufa_ptr){
3719       ierr = PetscFree(sstartsj);CHKERRQ(ierr);
3720       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
3721     } else {
3722       *startsj  = sstartsj;
3723       *bufa_ptr = bufa;
3724     }
3725   }
3726   ierr = PetscLogEventEnd(logkey_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
3727 
3728   PetscFunctionReturn(0);
3729 }
3730 
3731 /*MC
3732    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
3733 
3734    Options Database Keys:
3735 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
3736 
3737   Level: beginner
3738 
3739 .seealso: MatCreateMPIAIJ
3740 M*/
3741 
3742 EXTERN_C_BEGIN
3743 #undef __FUNCT__
3744 #define __FUNCT__ "MatCreate_MPIAIJ"
3745 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIAIJ(Mat B)
3746 {
3747   Mat_MPIAIJ     *b;
3748   PetscErrorCode ierr;
3749   PetscInt       i;
3750   PetscMPIInt    size;
3751 
3752   PetscFunctionBegin;
3753   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
3754 
3755   ierr            = PetscNew(Mat_MPIAIJ,&b);CHKERRQ(ierr);
3756   B->data         = (void*)b;
3757   ierr            = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3758   B->factor       = 0;
3759   B->bs           = 1;
3760   B->assembled    = PETSC_FALSE;
3761   B->mapping      = 0;
3762 
3763   B->insertmode      = NOT_SET_VALUES;
3764   b->size            = size;
3765   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
3766 
3767   ierr = PetscSplitOwnership(B->comm,&B->m,&B->M);CHKERRQ(ierr);
3768   ierr = PetscSplitOwnership(B->comm,&B->n,&B->N);CHKERRQ(ierr);
3769 
3770   /* the information in the maps duplicates the information computed below, eventually
3771      we should remove the duplicate information that is not contained in the maps */
3772   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
3773   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
3774 
3775   /* build local table of row and column ownerships */
3776   ierr = PetscMalloc(2*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
3777   ierr = PetscLogObjectMemory(B,2*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIAIJ));CHKERRQ(ierr);
3778   b->cowners = b->rowners + b->size + 2;
3779   ierr = MPI_Allgather(&B->m,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3780   b->rowners[0] = 0;
3781   for (i=2; i<=b->size; i++) {
3782     b->rowners[i] += b->rowners[i-1];
3783   }
3784   b->rstart = b->rowners[b->rank];
3785   b->rend   = b->rowners[b->rank+1];
3786   ierr = MPI_Allgather(&B->n,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
3787   b->cowners[0] = 0;
3788   for (i=2; i<=b->size; i++) {
3789     b->cowners[i] += b->cowners[i-1];
3790   }
3791   b->cstart = b->cowners[b->rank];
3792   b->cend   = b->cowners[b->rank+1];
3793 
3794   /* build cache for off array entries formed */
3795   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
3796   b->donotstash  = PETSC_FALSE;
3797   b->colmap      = 0;
3798   b->garray      = 0;
3799   b->roworiented = PETSC_TRUE;
3800 
3801   /* stuff used for matrix vector multiply */
3802   b->lvec      = PETSC_NULL;
3803   b->Mvctx     = PETSC_NULL;
3804 
3805   /* stuff for MatGetRow() */
3806   b->rowindices   = 0;
3807   b->rowvalues    = 0;
3808   b->getrowactive = PETSC_FALSE;
3809 
3810   /* Explicitly create 2 MATSEQAIJ matrices. */
3811   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3812   ierr = MatSetSizes(b->A,B->m,B->n,B->m,B->n);CHKERRQ(ierr);
3813   ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3814   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3815   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3816   ierr = MatSetSizes(b->B,B->m,B->N,B->m,B->N);CHKERRQ(ierr);
3817   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3818   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3819 
3820   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3821                                      "MatStoreValues_MPIAIJ",
3822                                      MatStoreValues_MPIAIJ);CHKERRQ(ierr);
3823   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3824                                      "MatRetrieveValues_MPIAIJ",
3825                                      MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
3826   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3827 				     "MatGetDiagonalBlock_MPIAIJ",
3828                                      MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
3829   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3830 				     "MatIsTranspose_MPIAIJ",
3831 				     MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
3832   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocation_C",
3833 				     "MatMPIAIJSetPreallocation_MPIAIJ",
3834 				     MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
3835   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",
3836 				     "MatMPIAIJSetPreallocationCSR_MPIAIJ",
3837 				     MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
3838   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3839 				     "MatDiagonalScaleLocal_MPIAIJ",
3840 				     MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
3841   PetscFunctionReturn(0);
3842 }
3843 EXTERN_C_END
3844