xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision eb3f19e46b38d5d7d03c8a3cfecdfb705cfcba06)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633   InsertMode     addv;
634 
635   PetscFunctionBegin;
636   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
637 
638   /* make sure all processors are either in INSERTMODE or ADDMODE */
639   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641   mat->insertmode = addv; /* in case this processor had no cache */
642 
643   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
655   PetscErrorCode ierr;
656   PetscMPIInt    n;
657   PetscInt       i,j,rstart,ncols,flg;
658   PetscInt       *row,*col;
659   PetscBool      other_disassembled;
660   PetscScalar    *val;
661   InsertMode     addv = mat->insertmode;
662 
663   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
664 
665   PetscFunctionBegin;
666   if (!aij->donotstash && !mat->nooffprocentries) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n; ) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) {
674           if (row[j] != rstart) break;
675         }
676         if (j < n) ncols = j-i;
677         else       ncols = n-i;
678         /* Now assemble all these values with a single function call */
679         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
680 
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
706   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
707 
708   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
709 
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718 
719   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730 {
731   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
736   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_MPIAIJ"
742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743 {
744   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
745   PetscInt      *owners = A->rmap->range;
746   PetscInt       n      = A->rmap->n;
747   PetscSF        sf;
748   PetscInt      *lrows;
749   PetscSFNode   *rrows;
750   PetscInt       r, p = 0, len = 0;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754   /* Create SF where leaves are input rows and roots are owned rows */
755   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
756   for (r = 0; r < n; ++r) lrows[r] = -1;
757   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
758   for (r = 0; r < N; ++r) {
759     const PetscInt idx   = rows[r];
760     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
761     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
762       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
763     }
764     if (A->nooffproczerorows) {
765       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
766       lrows[len++] = idx - owners[p];
767     } else {
768       rrows[r].rank = p;
769       rrows[r].index = rows[r] - owners[p];
770     }
771   }
772   if (!A->nooffproczerorows) {
773     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
774     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
775     /* Collect flags for rows to be zeroed */
776     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
777     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
778     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
779     /* Compress and put in row numbers */
780     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781   }
782   /* fix right hand side if needed */
783   if (x && b) {
784     const PetscScalar *xx;
785     PetscScalar       *bb;
786 
787     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
788     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
789     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
790     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
792   }
793   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
794   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
796     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
797   } else if (diag != 0.0) {
798     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
799     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
800     for (r = 0; r < len; ++r) {
801       const PetscInt row = lrows[r] + A->rmap->rstart;
802       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
803     }
804     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
805     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806   } else {
807     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
808   }
809   ierr = PetscFree(lrows);CHKERRQ(ierr);
810 
811   /* only change matrix nonzero state if pattern was allowed to be changed */
812   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
813     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
814     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
822 {
823   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
824   PetscErrorCode    ierr;
825   PetscMPIInt       n = A->rmap->n;
826   PetscInt          i,j,r,m,p = 0,len = 0;
827   PetscInt          *lrows,*owners = A->rmap->range;
828   PetscSFNode       *rrows;
829   PetscSF           sf;
830   const PetscScalar *xx;
831   PetscScalar       *bb,*mask;
832   Vec               xmask,lmask;
833   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
834   const PetscInt    *aj, *ii,*ridx;
835   PetscScalar       *aa;
836 
837   PetscFunctionBegin;
838   /* Create SF where leaves are input rows and roots are owned rows */
839   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
840   for (r = 0; r < n; ++r) lrows[r] = -1;
841   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
842   for (r = 0; r < N; ++r) {
843     const PetscInt idx   = rows[r];
844     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
845     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
846       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
847     }
848     rrows[r].rank  = p;
849     rrows[r].index = rows[r] - owners[p];
850   }
851   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
852   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
853   /* Collect flags for rows to be zeroed */
854   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
856   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
857   /* Compress and put in row numbers */
858   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
861   /* handle off diagonal part of matrix */
862   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
863   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
864   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
865   for (i=0; i<len; i++) bb[lrows[i]] = 1;
866   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
867   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
870   if (x) {
871     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
874     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
875   }
876   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
877   /* remove zeroed rows of off diagonal matrix */
878   ii = aij->i;
879   for (i=0; i<len; i++) {
880     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
881   }
882   /* loop over all elements of off process part of matrix zeroing removed columns*/
883   if (aij->compressedrow.use) {
884     m    = aij->compressedrow.nrows;
885     ii   = aij->compressedrow.i;
886     ridx = aij->compressedrow.rindex;
887     for (i=0; i<m; i++) {
888       n  = ii[i+1] - ii[i];
889       aj = aij->j + ii[i];
890       aa = aij->a + ii[i];
891 
892       for (j=0; j<n; j++) {
893         if (PetscAbsScalar(mask[*aj])) {
894           if (b) bb[*ridx] -= *aa*xx[*aj];
895           *aa = 0.0;
896         }
897         aa++;
898         aj++;
899       }
900       ridx++;
901     }
902   } else { /* do not use compressed row format */
903     m = l->B->rmap->n;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[i] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916     }
917   }
918   if (x) {
919     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
920     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   }
922   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
923   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
924   ierr = PetscFree(lrows);CHKERRQ(ierr);
925 
926   /* only change matrix nonzero state if pattern was allowed to be changed */
927   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
928     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
929     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
930   }
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMult_MPIAIJ"
936 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
937 {
938   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
939   PetscErrorCode ierr;
940   PetscInt       nt;
941 
942   PetscFunctionBegin;
943   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
944   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
945   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
947   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
954 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
955 {
956   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
957   PetscErrorCode ierr;
958 
959   PetscFunctionBegin;
960   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
961   PetscFunctionReturn(0);
962 }
963 
964 #undef __FUNCT__
965 #define __FUNCT__ "MatMultAdd_MPIAIJ"
966 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
967 {
968   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
969   PetscErrorCode ierr;
970 
971   PetscFunctionBegin;
972   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
973   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
974   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
981 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
982 {
983   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
984   PetscErrorCode ierr;
985   PetscBool      merged;
986 
987   PetscFunctionBegin;
988   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
989   /* do nondiagonal part */
990   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
991   if (!merged) {
992     /* send it on its way */
993     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
994     /* do local part */
995     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
996     /* receive remote parts: note this assumes the values are not actually */
997     /* added in yy until the next line, */
998     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
999   } else {
1000     /* do local part */
1001     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1002     /* send it on its way */
1003     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004     /* values actually were received in the Begin() but we need to call this nop */
1005     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1006   }
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1012 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1013 {
1014   MPI_Comm       comm;
1015   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1016   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1017   IS             Me,Notme;
1018   PetscErrorCode ierr;
1019   PetscInt       M,N,first,last,*notme,i;
1020   PetscMPIInt    size;
1021 
1022   PetscFunctionBegin;
1023   /* Easy test: symmetric diagonal block */
1024   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1025   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1026   if (!*f) PetscFunctionReturn(0);
1027   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1028   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1029   if (size == 1) PetscFunctionReturn(0);
1030 
1031   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1032   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1033   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1034   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1035   for (i=0; i<first; i++) notme[i] = i;
1036   for (i=last; i<M; i++) notme[i-last+first] = i;
1037   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1038   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1039   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1040   Aoff = Aoffs[0];
1041   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1042   Boff = Boffs[0];
1043   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1044   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1045   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1046   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1047   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1048   ierr = PetscFree(notme);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1054 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1055 {
1056   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   /* do nondiagonal part */
1061   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1062   /* send it on its way */
1063   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1064   /* do local part */
1065   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1066   /* receive remote parts */
1067   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 /*
1072   This only works correctly for square matrices where the subblock A->A is the
1073    diagonal block
1074 */
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1077 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1078 {
1079   PetscErrorCode ierr;
1080   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1081 
1082   PetscFunctionBegin;
1083   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1084   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1085   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatScale_MPIAIJ"
1091 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1092 {
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1098   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatDestroy_Redundant"
1104 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1105 {
1106   PetscErrorCode ierr;
1107   Mat_Redundant  *redund = *redundant;
1108   PetscInt       i;
1109 
1110   PetscFunctionBegin;
1111   *redundant = NULL;
1112   if (redund){
1113     if (redund->matseq) { /* via MatGetSubMatrices()  */
1114       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1115       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1116       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1117       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1118     } else {
1119       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1120       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1121       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1122       for (i=0; i<redund->nrecvs; i++) {
1123         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1124         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1125       }
1126       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1127     }
1128 
1129     if (redund->psubcomm) {
1130       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1131     }
1132     ierr = PetscFree(redund);CHKERRQ(ierr);
1133   }
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatDestroy_MPIAIJ"
1139 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1140 {
1141   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1142   PetscErrorCode ierr;
1143 
1144   PetscFunctionBegin;
1145 #if defined(PETSC_USE_LOG)
1146   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1147 #endif
1148   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1149   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1150   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1151   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1152   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1153 #if defined(PETSC_USE_CTABLE)
1154   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1155 #else
1156   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1157 #endif
1158   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1159   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1160   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1161   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1162   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1163   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1164 
1165   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1167   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1169   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1173   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1174   PetscFunctionReturn(0);
1175 }
1176 
1177 #undef __FUNCT__
1178 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1179 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1180 {
1181   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1182   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1183   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1184   PetscErrorCode ierr;
1185   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1186   int            fd;
1187   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1188   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1189   PetscScalar    *column_values;
1190   PetscInt       message_count,flowcontrolcount;
1191   FILE           *file;
1192 
1193   PetscFunctionBegin;
1194   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1195   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1196   nz   = A->nz + B->nz;
1197   if (!rank) {
1198     header[0] = MAT_FILE_CLASSID;
1199     header[1] = mat->rmap->N;
1200     header[2] = mat->cmap->N;
1201 
1202     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1203     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1204     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1205     /* get largest number of rows any processor has */
1206     rlen  = mat->rmap->n;
1207     range = mat->rmap->range;
1208     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1209   } else {
1210     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1211     rlen = mat->rmap->n;
1212   }
1213 
1214   /* load up the local row counts */
1215   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1216   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1217 
1218   /* store the row lengths to the file */
1219   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1220   if (!rank) {
1221     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     for (i=1; i<size; i++) {
1223       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1224       rlen = range[i+1] - range[i];
1225       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1227     }
1228     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1229   } else {
1230     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1231     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1235 
1236   /* load up the local column indices */
1237   nzmax = nz; /* th processor needs space a largest processor needs */
1238   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1240   cnt   = 0;
1241   for (i=0; i<mat->rmap->n; i++) {
1242     for (j=B->i[i]; j<B->i[i+1]; j++) {
1243       if ((col = garray[B->j[j]]) > cstart) break;
1244       column_indices[cnt++] = col;
1245     }
1246     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1247     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1248   }
1249   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1250 
1251   /* store the column indices to the file */
1252   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1253   if (!rank) {
1254     MPI_Status status;
1255     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1256     for (i=1; i<size; i++) {
1257       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1258       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1259       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1260       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1261       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1262     }
1263     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1264   } else {
1265     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1266     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1268     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1269   }
1270   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1271 
1272   /* load up the local column values */
1273   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1274   cnt  = 0;
1275   for (i=0; i<mat->rmap->n; i++) {
1276     for (j=B->i[i]; j<B->i[i+1]; j++) {
1277       if (garray[B->j[j]] > cstart) break;
1278       column_values[cnt++] = B->a[j];
1279     }
1280     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1281     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1282   }
1283   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1284 
1285   /* store the column values to the file */
1286   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1287   if (!rank) {
1288     MPI_Status status;
1289     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1290     for (i=1; i<size; i++) {
1291       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1292       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1293       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1294       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1296     }
1297     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1298   } else {
1299     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1300     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1301     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1302     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1303   }
1304   ierr = PetscFree(column_values);CHKERRQ(ierr);
1305 
1306   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1307   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #include <petscdraw.h>
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1314 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1315 {
1316   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1317   PetscErrorCode    ierr;
1318   PetscMPIInt       rank = aij->rank,size = aij->size;
1319   PetscBool         isdraw,iascii,isbinary;
1320   PetscViewer       sviewer;
1321   PetscViewerFormat format;
1322 
1323   PetscFunctionBegin;
1324   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1325   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1326   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1327   if (iascii) {
1328     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1329     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1330       MatInfo   info;
1331       PetscBool inodes;
1332 
1333       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1334       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1335       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1336       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1337       if (!inodes) {
1338         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1339                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1340       } else {
1341         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1342                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1343       }
1344       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1345       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1346       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1347       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1348       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1349       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1350       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1351       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1352       PetscFunctionReturn(0);
1353     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1354       PetscInt inodecount,inodelimit,*inodes;
1355       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1356       if (inodes) {
1357         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1358       } else {
1359         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1360       }
1361       PetscFunctionReturn(0);
1362     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1363       PetscFunctionReturn(0);
1364     }
1365   } else if (isbinary) {
1366     if (size == 1) {
1367       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1368       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1369     } else {
1370       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1371     }
1372     PetscFunctionReturn(0);
1373   } else if (isdraw) {
1374     PetscDraw draw;
1375     PetscBool isnull;
1376     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1377     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1378   }
1379 
1380   {
1381     /* assemble the entire matrix onto first processor. */
1382     Mat        A;
1383     Mat_SeqAIJ *Aloc;
1384     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1385     MatScalar  *a;
1386     const char *matname;
1387 
1388     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1389     if (!rank) {
1390       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1391     } else {
1392       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1393     }
1394     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1395     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1396     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1397     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1398     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1399 
1400     /* copy over the A part */
1401     Aloc = (Mat_SeqAIJ*)aij->A->data;
1402     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1403     row  = mat->rmap->rstart;
1404     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1405     for (i=0; i<m; i++) {
1406       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1407       row++;
1408       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1409     }
1410     aj = Aloc->j;
1411     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1412 
1413     /* copy over the B part */
1414     Aloc = (Mat_SeqAIJ*)aij->B->data;
1415     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1416     row  = mat->rmap->rstart;
1417     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1418     ct   = cols;
1419     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1420     for (i=0; i<m; i++) {
1421       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1422       row++;
1423       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1424     }
1425     ierr = PetscFree(ct);CHKERRQ(ierr);
1426     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1427     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1428     /*
1429        Everyone has to call to draw the matrix since the graphics waits are
1430        synchronized across all processors that share the PetscDraw object
1431     */
1432     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1433     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1434     if (!rank) {
1435       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1436       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1437     }
1438     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1439     ierr = MatDestroy(&A);CHKERRQ(ierr);
1440   }
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatView_MPIAIJ"
1446 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1447 {
1448   PetscErrorCode ierr;
1449   PetscBool      iascii,isdraw,issocket,isbinary;
1450 
1451   PetscFunctionBegin;
1452   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1453   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1456   if (iascii || isdraw || isbinary || issocket) {
1457     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1458   }
1459   PetscFunctionReturn(0);
1460 }
1461 
1462 #undef __FUNCT__
1463 #define __FUNCT__ "MatSOR_MPIAIJ"
1464 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1465 {
1466   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1467   PetscErrorCode ierr;
1468   Vec            bb1 = 0;
1469   PetscBool      hasop;
1470 
1471   PetscFunctionBegin;
1472   if (flag == SOR_APPLY_UPPER) {
1473     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1474     PetscFunctionReturn(0);
1475   }
1476 
1477   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1478     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1479   }
1480 
1481   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1482     if (flag & SOR_ZERO_INITIAL_GUESS) {
1483       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1484       its--;
1485     }
1486 
1487     while (its--) {
1488       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490 
1491       /* update rhs: bb1 = bb - B*x */
1492       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1493       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1494 
1495       /* local sweep */
1496       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1497     }
1498   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1499     if (flag & SOR_ZERO_INITIAL_GUESS) {
1500       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1501       its--;
1502     }
1503     while (its--) {
1504       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506 
1507       /* update rhs: bb1 = bb - B*x */
1508       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1509       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1510 
1511       /* local sweep */
1512       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1513     }
1514   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1515     if (flag & SOR_ZERO_INITIAL_GUESS) {
1516       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1517       its--;
1518     }
1519     while (its--) {
1520       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1522 
1523       /* update rhs: bb1 = bb - B*x */
1524       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1525       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1526 
1527       /* local sweep */
1528       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1529     }
1530   } else if (flag & SOR_EISENSTAT) {
1531     Vec xx1;
1532 
1533     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1534     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1535 
1536     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1537     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1538     if (!mat->diag) {
1539       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1540       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1541     }
1542     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1543     if (hasop) {
1544       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1545     } else {
1546       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1547     }
1548     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1549 
1550     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1551 
1552     /* local sweep */
1553     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1554     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1555     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1556   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1557 
1558   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatPermute_MPIAIJ"
1564 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1565 {
1566   Mat            aA,aB,Aperm;
1567   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1568   PetscScalar    *aa,*ba;
1569   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1570   PetscSF        rowsf,sf;
1571   IS             parcolp = NULL;
1572   PetscBool      done;
1573   PetscErrorCode ierr;
1574 
1575   PetscFunctionBegin;
1576   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1577   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1578   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1579   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1580 
1581   /* Invert row permutation to find out where my rows should go */
1582   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1583   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1584   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1585   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1586   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1587   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1588 
1589   /* Invert column permutation to find out where my columns should go */
1590   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1591   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1592   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1593   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1594   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1595   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1596   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1597 
1598   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1599   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1600   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1601 
1602   /* Find out where my gcols should go */
1603   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1604   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1605   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1606   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1607   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1608   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1609   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1610   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1611 
1612   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1613   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1614   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1615   for (i=0; i<m; i++) {
1616     PetscInt row = rdest[i],rowner;
1617     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1618     for (j=ai[i]; j<ai[i+1]; j++) {
1619       PetscInt cowner,col = cdest[aj[j]];
1620       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1621       if (rowner == cowner) dnnz[i]++;
1622       else onnz[i]++;
1623     }
1624     for (j=bi[i]; j<bi[i+1]; j++) {
1625       PetscInt cowner,col = gcdest[bj[j]];
1626       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1627       if (rowner == cowner) dnnz[i]++;
1628       else onnz[i]++;
1629     }
1630   }
1631   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1632   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1633   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1634   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1635   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1636 
1637   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1638   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1639   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1640   for (i=0; i<m; i++) {
1641     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1642     PetscInt j0,rowlen;
1643     rowlen = ai[i+1] - ai[i];
1644     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1645       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1646       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1647     }
1648     rowlen = bi[i+1] - bi[i];
1649     for (j0=j=0; j<rowlen; j0=j) {
1650       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1651       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1652     }
1653   }
1654   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1655   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1656   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1657   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1658   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1659   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1660   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1661   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1662   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1663   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1664   *B = Aperm;
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 #undef __FUNCT__
1669 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1670 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1671 {
1672   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1673   Mat            A    = mat->A,B = mat->B;
1674   PetscErrorCode ierr;
1675   PetscReal      isend[5],irecv[5];
1676 
1677   PetscFunctionBegin;
1678   info->block_size = 1.0;
1679   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1680 
1681   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1682   isend[3] = info->memory;  isend[4] = info->mallocs;
1683 
1684   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1685 
1686   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1687   isend[3] += info->memory;  isend[4] += info->mallocs;
1688   if (flag == MAT_LOCAL) {
1689     info->nz_used      = isend[0];
1690     info->nz_allocated = isend[1];
1691     info->nz_unneeded  = isend[2];
1692     info->memory       = isend[3];
1693     info->mallocs      = isend[4];
1694   } else if (flag == MAT_GLOBAL_MAX) {
1695     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1696 
1697     info->nz_used      = irecv[0];
1698     info->nz_allocated = irecv[1];
1699     info->nz_unneeded  = irecv[2];
1700     info->memory       = irecv[3];
1701     info->mallocs      = irecv[4];
1702   } else if (flag == MAT_GLOBAL_SUM) {
1703     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1704 
1705     info->nz_used      = irecv[0];
1706     info->nz_allocated = irecv[1];
1707     info->nz_unneeded  = irecv[2];
1708     info->memory       = irecv[3];
1709     info->mallocs      = irecv[4];
1710   }
1711   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1712   info->fill_ratio_needed = 0;
1713   info->factor_mallocs    = 0;
1714   PetscFunctionReturn(0);
1715 }
1716 
1717 #undef __FUNCT__
1718 #define __FUNCT__ "MatSetOption_MPIAIJ"
1719 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1720 {
1721   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1722   PetscErrorCode ierr;
1723 
1724   PetscFunctionBegin;
1725   switch (op) {
1726   case MAT_NEW_NONZERO_LOCATIONS:
1727   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1728   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1729   case MAT_KEEP_NONZERO_PATTERN:
1730   case MAT_NEW_NONZERO_LOCATION_ERR:
1731   case MAT_USE_INODES:
1732   case MAT_IGNORE_ZERO_ENTRIES:
1733     MatCheckPreallocated(A,1);
1734     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1735     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1736     break;
1737   case MAT_ROW_ORIENTED:
1738     a->roworiented = flg;
1739 
1740     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1741     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1742     break;
1743   case MAT_NEW_DIAGONALS:
1744     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1745     break;
1746   case MAT_IGNORE_OFF_PROC_ENTRIES:
1747     a->donotstash = flg;
1748     break;
1749   case MAT_SPD:
1750     A->spd_set = PETSC_TRUE;
1751     A->spd     = flg;
1752     if (flg) {
1753       A->symmetric                  = PETSC_TRUE;
1754       A->structurally_symmetric     = PETSC_TRUE;
1755       A->symmetric_set              = PETSC_TRUE;
1756       A->structurally_symmetric_set = PETSC_TRUE;
1757     }
1758     break;
1759   case MAT_SYMMETRIC:
1760     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1761     break;
1762   case MAT_STRUCTURALLY_SYMMETRIC:
1763     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1764     break;
1765   case MAT_HERMITIAN:
1766     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1767     break;
1768   case MAT_SYMMETRY_ETERNAL:
1769     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1770     break;
1771   default:
1772     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1773   }
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatGetRow_MPIAIJ"
1779 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1780 {
1781   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1782   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1783   PetscErrorCode ierr;
1784   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1785   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1786   PetscInt       *cmap,*idx_p;
1787 
1788   PetscFunctionBegin;
1789   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1790   mat->getrowactive = PETSC_TRUE;
1791 
1792   if (!mat->rowvalues && (idx || v)) {
1793     /*
1794         allocate enough space to hold information from the longest row.
1795     */
1796     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1797     PetscInt   max = 1,tmp;
1798     for (i=0; i<matin->rmap->n; i++) {
1799       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1800       if (max < tmp) max = tmp;
1801     }
1802     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1803   }
1804 
1805   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1806   lrow = row - rstart;
1807 
1808   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1809   if (!v)   {pvA = 0; pvB = 0;}
1810   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1811   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1812   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1813   nztot = nzA + nzB;
1814 
1815   cmap = mat->garray;
1816   if (v  || idx) {
1817     if (nztot) {
1818       /* Sort by increasing column numbers, assuming A and B already sorted */
1819       PetscInt imark = -1;
1820       if (v) {
1821         *v = v_p = mat->rowvalues;
1822         for (i=0; i<nzB; i++) {
1823           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1824           else break;
1825         }
1826         imark = i;
1827         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1828         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1829       }
1830       if (idx) {
1831         *idx = idx_p = mat->rowindices;
1832         if (imark > -1) {
1833           for (i=0; i<imark; i++) {
1834             idx_p[i] = cmap[cworkB[i]];
1835           }
1836         } else {
1837           for (i=0; i<nzB; i++) {
1838             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1839             else break;
1840           }
1841           imark = i;
1842         }
1843         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1844         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1845       }
1846     } else {
1847       if (idx) *idx = 0;
1848       if (v)   *v   = 0;
1849     }
1850   }
1851   *nz  = nztot;
1852   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1853   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1859 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1860 {
1861   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1862 
1863   PetscFunctionBegin;
1864   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1865   aij->getrowactive = PETSC_FALSE;
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatNorm_MPIAIJ"
1871 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1872 {
1873   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1874   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1875   PetscErrorCode ierr;
1876   PetscInt       i,j,cstart = mat->cmap->rstart;
1877   PetscReal      sum = 0.0;
1878   MatScalar      *v;
1879 
1880   PetscFunctionBegin;
1881   if (aij->size == 1) {
1882     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1883   } else {
1884     if (type == NORM_FROBENIUS) {
1885       v = amat->a;
1886       for (i=0; i<amat->nz; i++) {
1887         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1888       }
1889       v = bmat->a;
1890       for (i=0; i<bmat->nz; i++) {
1891         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1892       }
1893       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1894       *norm = PetscSqrtReal(*norm);
1895     } else if (type == NORM_1) { /* max column norm */
1896       PetscReal *tmp,*tmp2;
1897       PetscInt  *jj,*garray = aij->garray;
1898       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1899       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1900       *norm = 0.0;
1901       v     = amat->a; jj = amat->j;
1902       for (j=0; j<amat->nz; j++) {
1903         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1904       }
1905       v = bmat->a; jj = bmat->j;
1906       for (j=0; j<bmat->nz; j++) {
1907         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1908       }
1909       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1910       for (j=0; j<mat->cmap->N; j++) {
1911         if (tmp2[j] > *norm) *norm = tmp2[j];
1912       }
1913       ierr = PetscFree(tmp);CHKERRQ(ierr);
1914       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1915     } else if (type == NORM_INFINITY) { /* max row norm */
1916       PetscReal ntemp = 0.0;
1917       for (j=0; j<aij->A->rmap->n; j++) {
1918         v   = amat->a + amat->i[j];
1919         sum = 0.0;
1920         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1921           sum += PetscAbsScalar(*v); v++;
1922         }
1923         v = bmat->a + bmat->i[j];
1924         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1925           sum += PetscAbsScalar(*v); v++;
1926         }
1927         if (sum > ntemp) ntemp = sum;
1928       }
1929       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1930     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1931   }
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatTranspose_MPIAIJ"
1937 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1938 {
1939   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1940   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1941   PetscErrorCode ierr;
1942   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1943   PetscInt       cstart = A->cmap->rstart,ncol;
1944   Mat            B;
1945   MatScalar      *array;
1946 
1947   PetscFunctionBegin;
1948   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1949 
1950   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1951   ai = Aloc->i; aj = Aloc->j;
1952   bi = Bloc->i; bj = Bloc->j;
1953   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1954     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1955     PetscSFNode          *oloc;
1956     PETSC_UNUSED PetscSF sf;
1957 
1958     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1959     /* compute d_nnz for preallocation */
1960     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1961     for (i=0; i<ai[ma]; i++) {
1962       d_nnz[aj[i]]++;
1963       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1964     }
1965     /* compute local off-diagonal contributions */
1966     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1967     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1968     /* map those to global */
1969     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1970     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1971     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1972     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1973     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1974     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1975     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1976 
1977     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1978     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1979     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1980     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1981     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1982     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1983   } else {
1984     B    = *matout;
1985     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1986     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1987   }
1988 
1989   /* copy over the A part */
1990   array = Aloc->a;
1991   row   = A->rmap->rstart;
1992   for (i=0; i<ma; i++) {
1993     ncol = ai[i+1]-ai[i];
1994     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1995     row++;
1996     array += ncol; aj += ncol;
1997   }
1998   aj = Aloc->j;
1999   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2000 
2001   /* copy over the B part */
2002   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2003   array = Bloc->a;
2004   row   = A->rmap->rstart;
2005   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2006   cols_tmp = cols;
2007   for (i=0; i<mb; i++) {
2008     ncol = bi[i+1]-bi[i];
2009     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2010     row++;
2011     array += ncol; cols_tmp += ncol;
2012   }
2013   ierr = PetscFree(cols);CHKERRQ(ierr);
2014 
2015   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2016   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2017   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2018     *matout = B;
2019   } else {
2020     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2021   }
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2027 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2028 {
2029   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2030   Mat            a    = aij->A,b = aij->B;
2031   PetscErrorCode ierr;
2032   PetscInt       s1,s2,s3;
2033 
2034   PetscFunctionBegin;
2035   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2036   if (rr) {
2037     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2038     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2039     /* Overlap communication with computation. */
2040     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2041   }
2042   if (ll) {
2043     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2044     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2045     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2046   }
2047   /* scale  the diagonal block */
2048   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2049 
2050   if (rr) {
2051     /* Do a scatter end and then right scale the off-diagonal block */
2052     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2053     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2054   }
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2060 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2061 {
2062   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2063   PetscErrorCode ierr;
2064 
2065   PetscFunctionBegin;
2066   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatEqual_MPIAIJ"
2072 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2073 {
2074   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2075   Mat            a,b,c,d;
2076   PetscBool      flg;
2077   PetscErrorCode ierr;
2078 
2079   PetscFunctionBegin;
2080   a = matA->A; b = matA->B;
2081   c = matB->A; d = matB->B;
2082 
2083   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2084   if (flg) {
2085     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2086   }
2087   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 #undef __FUNCT__
2092 #define __FUNCT__ "MatCopy_MPIAIJ"
2093 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2094 {
2095   PetscErrorCode ierr;
2096   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2097   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2098 
2099   PetscFunctionBegin;
2100   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2101   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2102     /* because of the column compression in the off-processor part of the matrix a->B,
2103        the number of columns in a->B and b->B may be different, hence we cannot call
2104        the MatCopy() directly on the two parts. If need be, we can provide a more
2105        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2106        then copying the submatrices */
2107     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2108   } else {
2109     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2110     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2111   }
2112   PetscFunctionReturn(0);
2113 }
2114 
2115 #undef __FUNCT__
2116 #define __FUNCT__ "MatSetUp_MPIAIJ"
2117 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2118 {
2119   PetscErrorCode ierr;
2120 
2121   PetscFunctionBegin;
2122   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 /*
2127    Computes the number of nonzeros per row needed for preallocation when X and Y
2128    have different nonzero structure.
2129 */
2130 #undef __FUNCT__
2131 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2132 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2133 {
2134   PetscInt       i,j,k,nzx,nzy;
2135 
2136   PetscFunctionBegin;
2137   /* Set the number of nonzeros in the new matrix */
2138   for (i=0; i<m; i++) {
2139     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2140     nzx = xi[i+1] - xi[i];
2141     nzy = yi[i+1] - yi[i];
2142     nnz[i] = 0;
2143     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2144       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2145       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2146       nnz[i]++;
2147     }
2148     for (; k<nzy; k++) nnz[i]++;
2149   }
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2156 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2157 {
2158   PetscErrorCode ierr;
2159   PetscInt       m = Y->rmap->N;
2160   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2161   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2162 
2163   PetscFunctionBegin;
2164   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatAXPY_MPIAIJ"
2170 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2171 {
2172   PetscErrorCode ierr;
2173   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2174   PetscBLASInt   bnz,one=1;
2175   Mat_SeqAIJ     *x,*y;
2176 
2177   PetscFunctionBegin;
2178   if (str == SAME_NONZERO_PATTERN) {
2179     PetscScalar alpha = a;
2180     x    = (Mat_SeqAIJ*)xx->A->data;
2181     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2182     y    = (Mat_SeqAIJ*)yy->A->data;
2183     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2184     x    = (Mat_SeqAIJ*)xx->B->data;
2185     y    = (Mat_SeqAIJ*)yy->B->data;
2186     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2187     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2188     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2189   } else if (str == SUBSET_NONZERO_PATTERN) {
2190     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2191   } else {
2192     Mat      B;
2193     PetscInt *nnz_d,*nnz_o;
2194     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2195     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2196     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2197     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2198     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2199     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2200     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2201     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2202     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2203     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2204     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2205     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2206     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2207     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2208   }
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2213 
2214 #undef __FUNCT__
2215 #define __FUNCT__ "MatConjugate_MPIAIJ"
2216 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2217 {
2218 #if defined(PETSC_USE_COMPLEX)
2219   PetscErrorCode ierr;
2220   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2221 
2222   PetscFunctionBegin;
2223   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2224   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2225 #else
2226   PetscFunctionBegin;
2227 #endif
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatRealPart_MPIAIJ"
2233 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2234 {
2235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2236   PetscErrorCode ierr;
2237 
2238   PetscFunctionBegin;
2239   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2240   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2241   PetscFunctionReturn(0);
2242 }
2243 
2244 #undef __FUNCT__
2245 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2246 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2247 {
2248   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2249   PetscErrorCode ierr;
2250 
2251   PetscFunctionBegin;
2252   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2253   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #if defined(PETSC_HAVE_PBGL)
2258 
2259 #include <boost/parallel/mpi/bsp_process_group.hpp>
2260 #include <boost/graph/distributed/ilu_default_graph.hpp>
2261 #include <boost/graph/distributed/ilu_0_block.hpp>
2262 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2263 #include <boost/graph/distributed/petsc/interface.hpp>
2264 #include <boost/multi_array.hpp>
2265 #include <boost/parallel/distributed_property_map->hpp>
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2269 /*
2270   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2271 */
2272 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2273 {
2274   namespace petsc = boost::distributed::petsc;
2275 
2276   namespace graph_dist = boost::graph::distributed;
2277   using boost::graph::distributed::ilu_default::process_group_type;
2278   using boost::graph::ilu_permuted;
2279 
2280   PetscBool      row_identity, col_identity;
2281   PetscContainer c;
2282   PetscInt       m, n, M, N;
2283   PetscErrorCode ierr;
2284 
2285   PetscFunctionBegin;
2286   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2287   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2288   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2289   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2290 
2291   process_group_type pg;
2292   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2293   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2294   lgraph_type& level_graph = *lgraph_p;
2295   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2296 
2297   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2298   ilu_permuted(level_graph);
2299 
2300   /* put together the new matrix */
2301   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2302   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2303   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2304   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2305   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2306   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2307   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2308   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2309 
2310   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2311   ierr = PetscContainerSetPointer(c, lgraph_p);
2312   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2313   ierr = PetscContainerDestroy(&c);
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2319 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2320 {
2321   PetscFunctionBegin;
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatSolve_MPIAIJ"
2327 /*
2328   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2329 */
2330 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2331 {
2332   namespace graph_dist = boost::graph::distributed;
2333 
2334   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2335   lgraph_type    *lgraph_p;
2336   PetscContainer c;
2337   PetscErrorCode ierr;
2338 
2339   PetscFunctionBegin;
2340   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2341   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2342   ierr = VecCopy(b, x);CHKERRQ(ierr);
2343 
2344   PetscScalar *array_x;
2345   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2346   PetscInt sx;
2347   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2348 
2349   PetscScalar *array_b;
2350   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2351   PetscInt sb;
2352   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2353 
2354   lgraph_type& level_graph = *lgraph_p;
2355   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2356 
2357   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2358   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2359   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2360 
2361   typedef boost::iterator_property_map<array_ref_type::iterator,
2362                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2363   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2364   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2365 
2366   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2367   PetscFunctionReturn(0);
2368 }
2369 #endif
2370 
2371 
2372 #undef __FUNCT__
2373 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2374 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2375 {
2376   PetscMPIInt    rank,size;
2377   MPI_Comm       comm;
2378   PetscErrorCode ierr;
2379   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2380   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2381   PetscInt       *rowrange = mat->rmap->range;
2382   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2383   Mat            A = aij->A,B=aij->B,C=*matredundant;
2384   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2385   PetscScalar    *sbuf_a;
2386   PetscInt       nzlocal=a->nz+b->nz;
2387   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2388   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2389   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2390   MatScalar      *aworkA,*aworkB;
2391   PetscScalar    *vals;
2392   PetscMPIInt    tag1,tag2,tag3,imdex;
2393   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2394   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2395   MPI_Status     recv_status,*send_status;
2396   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2397   PetscInt       **rbuf_j=NULL;
2398   PetscScalar    **rbuf_a=NULL;
2399   Mat_Redundant  *redund =NULL;
2400 
2401   PetscFunctionBegin;
2402   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2403   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2404   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2405   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2406   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2407 
2408   if (reuse == MAT_REUSE_MATRIX) {
2409     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2410     if (subsize == 1) {
2411       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2412       redund = c->redundant;
2413     } else {
2414       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2415       redund = c->redundant;
2416     }
2417     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2418 
2419     nsends    = redund->nsends;
2420     nrecvs    = redund->nrecvs;
2421     send_rank = redund->send_rank;
2422     recv_rank = redund->recv_rank;
2423     sbuf_nz   = redund->sbuf_nz;
2424     rbuf_nz   = redund->rbuf_nz;
2425     sbuf_j    = redund->sbuf_j;
2426     sbuf_a    = redund->sbuf_a;
2427     rbuf_j    = redund->rbuf_j;
2428     rbuf_a    = redund->rbuf_a;
2429   }
2430 
2431   if (reuse == MAT_INITIAL_MATRIX) {
2432     PetscInt    nleftover,np_subcomm;
2433 
2434     /* get the destination processors' id send_rank, nsends and nrecvs */
2435     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2436 
2437     np_subcomm = size/nsubcomm;
2438     nleftover  = size - nsubcomm*np_subcomm;
2439 
2440     /* block of codes below is specific for INTERLACED */
2441     /* ------------------------------------------------*/
2442     nsends = 0; nrecvs = 0;
2443     for (i=0; i<size; i++) {
2444       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2445         send_rank[nsends++] = i;
2446         recv_rank[nrecvs++] = i;
2447       }
2448     }
2449     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2450       i = size-nleftover-1;
2451       j = 0;
2452       while (j < nsubcomm - nleftover) {
2453         send_rank[nsends++] = i;
2454         i--; j++;
2455       }
2456     }
2457 
2458     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2459       for (i=0; i<nleftover; i++) {
2460         recv_rank[nrecvs++] = size-nleftover+i;
2461       }
2462     }
2463     /*----------------------------------------------*/
2464 
2465     /* allocate sbuf_j, sbuf_a */
2466     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2467     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2468     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2469     /*
2470     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2471     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2472      */
2473   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2474 
2475   /* copy mat's local entries into the buffers */
2476   if (reuse == MAT_INITIAL_MATRIX) {
2477     rownz_max = 0;
2478     rptr      = sbuf_j;
2479     cols      = sbuf_j + rend-rstart + 1;
2480     vals      = sbuf_a;
2481     rptr[0]   = 0;
2482     for (i=0; i<rend-rstart; i++) {
2483       row    = i + rstart;
2484       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2485       ncols  = nzA + nzB;
2486       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2487       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2488       /* load the column indices for this row into cols */
2489       lwrite = 0;
2490       for (l=0; l<nzB; l++) {
2491         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2492           vals[lwrite]   = aworkB[l];
2493           cols[lwrite++] = ctmp;
2494         }
2495       }
2496       for (l=0; l<nzA; l++) {
2497         vals[lwrite]   = aworkA[l];
2498         cols[lwrite++] = cstart + cworkA[l];
2499       }
2500       for (l=0; l<nzB; l++) {
2501         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2502           vals[lwrite]   = aworkB[l];
2503           cols[lwrite++] = ctmp;
2504         }
2505       }
2506       vals     += ncols;
2507       cols     += ncols;
2508       rptr[i+1] = rptr[i] + ncols;
2509       if (rownz_max < ncols) rownz_max = ncols;
2510     }
2511     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2512   } else { /* only copy matrix values into sbuf_a */
2513     rptr    = sbuf_j;
2514     vals    = sbuf_a;
2515     rptr[0] = 0;
2516     for (i=0; i<rend-rstart; i++) {
2517       row    = i + rstart;
2518       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2519       ncols  = nzA + nzB;
2520       cworkB = b->j + b->i[i];
2521       aworkA = a->a + a->i[i];
2522       aworkB = b->a + b->i[i];
2523       lwrite = 0;
2524       for (l=0; l<nzB; l++) {
2525         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2526       }
2527       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2528       for (l=0; l<nzB; l++) {
2529         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2530       }
2531       vals     += ncols;
2532       rptr[i+1] = rptr[i] + ncols;
2533     }
2534   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2535 
2536   /* send nzlocal to others, and recv other's nzlocal */
2537   /*--------------------------------------------------*/
2538   if (reuse == MAT_INITIAL_MATRIX) {
2539     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2540 
2541     s_waits2 = s_waits3 + nsends;
2542     s_waits1 = s_waits2 + nsends;
2543     r_waits1 = s_waits1 + nsends;
2544     r_waits2 = r_waits1 + nrecvs;
2545     r_waits3 = r_waits2 + nrecvs;
2546   } else {
2547     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2548 
2549     r_waits3 = s_waits3 + nsends;
2550   }
2551 
2552   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2553   if (reuse == MAT_INITIAL_MATRIX) {
2554     /* get new tags to keep the communication clean */
2555     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2556     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2557     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2558 
2559     /* post receives of other's nzlocal */
2560     for (i=0; i<nrecvs; i++) {
2561       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2562     }
2563     /* send nzlocal to others */
2564     for (i=0; i<nsends; i++) {
2565       sbuf_nz[i] = nzlocal;
2566       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2567     }
2568     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2569     count = nrecvs;
2570     while (count) {
2571       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2572 
2573       recv_rank[imdex] = recv_status.MPI_SOURCE;
2574       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2575       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2576 
2577       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2578 
2579       rbuf_nz[imdex] += i + 2;
2580 
2581       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2582       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2583       count--;
2584     }
2585     /* wait on sends of nzlocal */
2586     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2587     /* send mat->i,j to others, and recv from other's */
2588     /*------------------------------------------------*/
2589     for (i=0; i<nsends; i++) {
2590       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2591       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2592     }
2593     /* wait on receives of mat->i,j */
2594     /*------------------------------*/
2595     count = nrecvs;
2596     while (count) {
2597       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2598       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2599       count--;
2600     }
2601     /* wait on sends of mat->i,j */
2602     /*---------------------------*/
2603     if (nsends) {
2604       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2605     }
2606   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2607 
2608   /* post receives, send and receive mat->a */
2609   /*----------------------------------------*/
2610   for (imdex=0; imdex<nrecvs; imdex++) {
2611     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2612   }
2613   for (i=0; i<nsends; i++) {
2614     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2615   }
2616   count = nrecvs;
2617   while (count) {
2618     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2619     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2620     count--;
2621   }
2622   if (nsends) {
2623     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2624   }
2625 
2626   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2627 
2628   /* create redundant matrix */
2629   /*-------------------------*/
2630   if (reuse == MAT_INITIAL_MATRIX) {
2631     const PetscInt *range;
2632     PetscInt       rstart_sub,rend_sub,mloc_sub;
2633 
2634     /* compute rownz_max for preallocation */
2635     for (imdex=0; imdex<nrecvs; imdex++) {
2636       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2637       rptr = rbuf_j[imdex];
2638       for (i=0; i<j; i++) {
2639         ncols = rptr[i+1] - rptr[i];
2640         if (rownz_max < ncols) rownz_max = ncols;
2641       }
2642     }
2643 
2644     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2645 
2646     /* get local size of redundant matrix
2647        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2648     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2649     rstart_sub = range[nsubcomm*subrank];
2650     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2651       rend_sub = range[nsubcomm*(subrank+1)];
2652     } else {
2653       rend_sub = mat->rmap->N;
2654     }
2655     mloc_sub = rend_sub - rstart_sub;
2656 
2657     if (M == N) {
2658       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2659     } else { /* non-square matrix */
2660       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2661     }
2662     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2663     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2664     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2665     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2666   } else {
2667     C = *matredundant;
2668   }
2669 
2670   /* insert local matrix entries */
2671   rptr = sbuf_j;
2672   cols = sbuf_j + rend-rstart + 1;
2673   vals = sbuf_a;
2674   for (i=0; i<rend-rstart; i++) {
2675     row   = i + rstart;
2676     ncols = rptr[i+1] - rptr[i];
2677     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2678     vals += ncols;
2679     cols += ncols;
2680   }
2681   /* insert received matrix entries */
2682   for (imdex=0; imdex<nrecvs; imdex++) {
2683     rstart = rowrange[recv_rank[imdex]];
2684     rend   = rowrange[recv_rank[imdex]+1];
2685     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2686     rptr   = rbuf_j[imdex];
2687     cols   = rbuf_j[imdex] + rend-rstart + 1;
2688     vals   = rbuf_a[imdex];
2689     for (i=0; i<rend-rstart; i++) {
2690       row   = i + rstart;
2691       ncols = rptr[i+1] - rptr[i];
2692       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2693       vals += ncols;
2694       cols += ncols;
2695     }
2696   }
2697   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2698   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2699 
2700   if (reuse == MAT_INITIAL_MATRIX) {
2701     *matredundant = C;
2702 
2703     /* create a supporting struct and attach it to C for reuse */
2704     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2705     if (subsize == 1) {
2706       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2707       c->redundant = redund;
2708     } else {
2709       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2710       c->redundant = redund;
2711     }
2712 
2713     redund->nzlocal   = nzlocal;
2714     redund->nsends    = nsends;
2715     redund->nrecvs    = nrecvs;
2716     redund->send_rank = send_rank;
2717     redund->recv_rank = recv_rank;
2718     redund->sbuf_nz   = sbuf_nz;
2719     redund->rbuf_nz   = rbuf_nz;
2720     redund->sbuf_j    = sbuf_j;
2721     redund->sbuf_a    = sbuf_a;
2722     redund->rbuf_j    = rbuf_j;
2723     redund->rbuf_a    = rbuf_a;
2724     redund->psubcomm  = NULL;
2725   }
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 #undef __FUNCT__
2730 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2731 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2732 {
2733   PetscErrorCode ierr;
2734   MPI_Comm       comm;
2735   PetscMPIInt    size,subsize;
2736   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2737   Mat_Redundant  *redund=NULL;
2738   PetscSubcomm   psubcomm=NULL;
2739   MPI_Comm       subcomm_in=subcomm;
2740   Mat            *matseq;
2741   IS             isrow,iscol;
2742 
2743   PetscFunctionBegin;
2744   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2745     if (reuse ==  MAT_INITIAL_MATRIX) {
2746       /* create psubcomm, then get subcomm */
2747       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2748       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2749       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2750 
2751       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2752       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2753       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2754       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2755       subcomm = psubcomm->comm;
2756     } else { /* retrieve psubcomm and subcomm */
2757       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2758       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2759       if (subsize == 1) {
2760         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2761         redund = c->redundant;
2762       } else {
2763         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2764         redund = c->redundant;
2765       }
2766       psubcomm = redund->psubcomm;
2767     }
2768     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2769       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2770       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2771         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2772         if (subsize == 1) {
2773           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2774           c->redundant->psubcomm = psubcomm;
2775         } else {
2776           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2777           c->redundant->psubcomm = psubcomm ;
2778         }
2779       }
2780       PetscFunctionReturn(0);
2781     }
2782   }
2783 
2784   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2785   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2786   if (reuse == MAT_INITIAL_MATRIX) {
2787     /* create a local sequential matrix matseq[0] */
2788     mloc_sub = PETSC_DECIDE;
2789     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2790     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2791     rstart = rend - mloc_sub;
2792     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2793     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2794   } else { /* reuse == MAT_REUSE_MATRIX */
2795     if (subsize == 1) {
2796       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2797       redund = c->redundant;
2798     } else {
2799       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2800       redund = c->redundant;
2801     }
2802 
2803     isrow  = redund->isrow;
2804     iscol  = redund->iscol;
2805     matseq = redund->matseq;
2806   }
2807   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2808   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2809 
2810   if (reuse == MAT_INITIAL_MATRIX) {
2811     /* create a supporting struct and attach it to C for reuse */
2812     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2813     if (subsize == 1) {
2814       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2815       c->redundant = redund;
2816     } else {
2817       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2818       c->redundant = redund;
2819     }
2820     redund->isrow    = isrow;
2821     redund->iscol    = iscol;
2822     redund->matseq   = matseq;
2823     redund->psubcomm = psubcomm;
2824   }
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 #undef __FUNCT__
2829 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2830 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2831 {
2832   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2833   PetscErrorCode ierr;
2834   PetscInt       i,*idxb = 0;
2835   PetscScalar    *va,*vb;
2836   Vec            vtmp;
2837 
2838   PetscFunctionBegin;
2839   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2840   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2841   if (idx) {
2842     for (i=0; i<A->rmap->n; i++) {
2843       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2844     }
2845   }
2846 
2847   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2848   if (idx) {
2849     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2850   }
2851   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2852   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2853 
2854   for (i=0; i<A->rmap->n; i++) {
2855     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2856       va[i] = vb[i];
2857       if (idx) idx[i] = a->garray[idxb[i]];
2858     }
2859   }
2860 
2861   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2862   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2863   ierr = PetscFree(idxb);CHKERRQ(ierr);
2864   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2870 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2871 {
2872   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2873   PetscErrorCode ierr;
2874   PetscInt       i,*idxb = 0;
2875   PetscScalar    *va,*vb;
2876   Vec            vtmp;
2877 
2878   PetscFunctionBegin;
2879   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2880   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2881   if (idx) {
2882     for (i=0; i<A->cmap->n; i++) {
2883       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2884     }
2885   }
2886 
2887   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2888   if (idx) {
2889     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2890   }
2891   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2892   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2893 
2894   for (i=0; i<A->rmap->n; i++) {
2895     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2896       va[i] = vb[i];
2897       if (idx) idx[i] = a->garray[idxb[i]];
2898     }
2899   }
2900 
2901   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2902   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2903   ierr = PetscFree(idxb);CHKERRQ(ierr);
2904   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2905   PetscFunctionReturn(0);
2906 }
2907 
2908 #undef __FUNCT__
2909 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2910 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2911 {
2912   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2913   PetscInt       n      = A->rmap->n;
2914   PetscInt       cstart = A->cmap->rstart;
2915   PetscInt       *cmap  = mat->garray;
2916   PetscInt       *diagIdx, *offdiagIdx;
2917   Vec            diagV, offdiagV;
2918   PetscScalar    *a, *diagA, *offdiagA;
2919   PetscInt       r;
2920   PetscErrorCode ierr;
2921 
2922   PetscFunctionBegin;
2923   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2924   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2925   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2926   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2927   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2928   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2929   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2930   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2931   for (r = 0; r < n; ++r) {
2932     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2933       a[r]   = diagA[r];
2934       idx[r] = cstart + diagIdx[r];
2935     } else {
2936       a[r]   = offdiagA[r];
2937       idx[r] = cmap[offdiagIdx[r]];
2938     }
2939   }
2940   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2941   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2942   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2943   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2944   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2945   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 #undef __FUNCT__
2950 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2951 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2952 {
2953   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2954   PetscInt       n      = A->rmap->n;
2955   PetscInt       cstart = A->cmap->rstart;
2956   PetscInt       *cmap  = mat->garray;
2957   PetscInt       *diagIdx, *offdiagIdx;
2958   Vec            diagV, offdiagV;
2959   PetscScalar    *a, *diagA, *offdiagA;
2960   PetscInt       r;
2961   PetscErrorCode ierr;
2962 
2963   PetscFunctionBegin;
2964   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2965   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2966   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2967   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2968   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2969   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2970   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2971   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2972   for (r = 0; r < n; ++r) {
2973     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2974       a[r]   = diagA[r];
2975       idx[r] = cstart + diagIdx[r];
2976     } else {
2977       a[r]   = offdiagA[r];
2978       idx[r] = cmap[offdiagIdx[r]];
2979     }
2980   }
2981   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2982   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2983   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2984   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2985   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2986   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 #undef __FUNCT__
2991 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
2992 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2993 {
2994   PetscErrorCode ierr;
2995   Mat            *dummy;
2996 
2997   PetscFunctionBegin;
2998   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2999   *newmat = *dummy;
3000   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3001   PetscFunctionReturn(0);
3002 }
3003 
3004 #undef __FUNCT__
3005 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3006 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3007 {
3008   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3009   PetscErrorCode ierr;
3010 
3011   PetscFunctionBegin;
3012   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 #undef __FUNCT__
3017 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3018 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3019 {
3020   PetscErrorCode ierr;
3021   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3022 
3023   PetscFunctionBegin;
3024   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3025   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3026   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3027   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3028   PetscFunctionReturn(0);
3029 }
3030 
3031 /* -------------------------------------------------------------------*/
3032 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3033                                        MatGetRow_MPIAIJ,
3034                                        MatRestoreRow_MPIAIJ,
3035                                        MatMult_MPIAIJ,
3036                                 /* 4*/ MatMultAdd_MPIAIJ,
3037                                        MatMultTranspose_MPIAIJ,
3038                                        MatMultTransposeAdd_MPIAIJ,
3039 #if defined(PETSC_HAVE_PBGL)
3040                                        MatSolve_MPIAIJ,
3041 #else
3042                                        0,
3043 #endif
3044                                        0,
3045                                        0,
3046                                 /*10*/ 0,
3047                                        0,
3048                                        0,
3049                                        MatSOR_MPIAIJ,
3050                                        MatTranspose_MPIAIJ,
3051                                 /*15*/ MatGetInfo_MPIAIJ,
3052                                        MatEqual_MPIAIJ,
3053                                        MatGetDiagonal_MPIAIJ,
3054                                        MatDiagonalScale_MPIAIJ,
3055                                        MatNorm_MPIAIJ,
3056                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3057                                        MatAssemblyEnd_MPIAIJ,
3058                                        MatSetOption_MPIAIJ,
3059                                        MatZeroEntries_MPIAIJ,
3060                                 /*24*/ MatZeroRows_MPIAIJ,
3061                                        0,
3062 #if defined(PETSC_HAVE_PBGL)
3063                                        0,
3064 #else
3065                                        0,
3066 #endif
3067                                        0,
3068                                        0,
3069                                 /*29*/ MatSetUp_MPIAIJ,
3070 #if defined(PETSC_HAVE_PBGL)
3071                                        0,
3072 #else
3073                                        0,
3074 #endif
3075                                        0,
3076                                        0,
3077                                        0,
3078                                 /*34*/ MatDuplicate_MPIAIJ,
3079                                        0,
3080                                        0,
3081                                        0,
3082                                        0,
3083                                 /*39*/ MatAXPY_MPIAIJ,
3084                                        MatGetSubMatrices_MPIAIJ,
3085                                        MatIncreaseOverlap_MPIAIJ,
3086                                        MatGetValues_MPIAIJ,
3087                                        MatCopy_MPIAIJ,
3088                                 /*44*/ MatGetRowMax_MPIAIJ,
3089                                        MatScale_MPIAIJ,
3090                                        0,
3091                                        0,
3092                                        MatZeroRowsColumns_MPIAIJ,
3093                                 /*49*/ MatSetRandom_MPIAIJ,
3094                                        0,
3095                                        0,
3096                                        0,
3097                                        0,
3098                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3099                                        0,
3100                                        MatSetUnfactored_MPIAIJ,
3101                                        MatPermute_MPIAIJ,
3102                                        0,
3103                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3104                                        MatDestroy_MPIAIJ,
3105                                        MatView_MPIAIJ,
3106                                        0,
3107                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3108                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3109                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3110                                        0,
3111                                        0,
3112                                        0,
3113                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3114                                        MatGetRowMinAbs_MPIAIJ,
3115                                        0,
3116                                        MatSetColoring_MPIAIJ,
3117                                        0,
3118                                        MatSetValuesAdifor_MPIAIJ,
3119                                 /*75*/ MatFDColoringApply_AIJ,
3120                                        0,
3121                                        0,
3122                                        0,
3123                                        MatFindZeroDiagonals_MPIAIJ,
3124                                 /*80*/ 0,
3125                                        0,
3126                                        0,
3127                                 /*83*/ MatLoad_MPIAIJ,
3128                                        0,
3129                                        0,
3130                                        0,
3131                                        0,
3132                                        0,
3133                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3134                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3135                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3136                                        MatPtAP_MPIAIJ_MPIAIJ,
3137                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3138                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3139                                        0,
3140                                        0,
3141                                        0,
3142                                        0,
3143                                 /*99*/ 0,
3144                                        0,
3145                                        0,
3146                                        MatConjugate_MPIAIJ,
3147                                        0,
3148                                 /*104*/MatSetValuesRow_MPIAIJ,
3149                                        MatRealPart_MPIAIJ,
3150                                        MatImaginaryPart_MPIAIJ,
3151                                        0,
3152                                        0,
3153                                 /*109*/0,
3154                                        MatGetRedundantMatrix_MPIAIJ,
3155                                        MatGetRowMin_MPIAIJ,
3156                                        0,
3157                                        0,
3158                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3159                                        0,
3160                                        0,
3161                                        0,
3162                                        0,
3163                                 /*119*/0,
3164                                        0,
3165                                        0,
3166                                        0,
3167                                        MatGetMultiProcBlock_MPIAIJ,
3168                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3169                                        MatGetColumnNorms_MPIAIJ,
3170                                        MatInvertBlockDiagonal_MPIAIJ,
3171                                        0,
3172                                        MatGetSubMatricesParallel_MPIAIJ,
3173                                 /*129*/0,
3174                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3175                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3176                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3177                                        0,
3178                                 /*134*/0,
3179                                        0,
3180                                        0,
3181                                        0,
3182                                        0,
3183                                 /*139*/0,
3184                                        0,
3185                                        0,
3186                                        MatFDColoringSetUp_MPIXAIJ
3187 };
3188 
3189 /* ----------------------------------------------------------------------------------------*/
3190 
3191 #undef __FUNCT__
3192 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3193 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3194 {
3195   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3196   PetscErrorCode ierr;
3197 
3198   PetscFunctionBegin;
3199   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3200   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3201   PetscFunctionReturn(0);
3202 }
3203 
3204 #undef __FUNCT__
3205 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3206 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3207 {
3208   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3209   PetscErrorCode ierr;
3210 
3211   PetscFunctionBegin;
3212   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3213   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3214   PetscFunctionReturn(0);
3215 }
3216 
3217 #undef __FUNCT__
3218 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3219 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3220 {
3221   Mat_MPIAIJ     *b;
3222   PetscErrorCode ierr;
3223 
3224   PetscFunctionBegin;
3225   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3226   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3227   b = (Mat_MPIAIJ*)B->data;
3228 
3229   if (!B->preallocated) {
3230     /* Explicitly create 2 MATSEQAIJ matrices. */
3231     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3232     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3233     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3234     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3235     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3236     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3237     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3238     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3239     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3240     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3241   }
3242 
3243   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3244   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3245   B->preallocated = PETSC_TRUE;
3246   PetscFunctionReturn(0);
3247 }
3248 
3249 #undef __FUNCT__
3250 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3251 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3252 {
3253   Mat            mat;
3254   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3255   PetscErrorCode ierr;
3256 
3257   PetscFunctionBegin;
3258   *newmat = 0;
3259   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3260   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3261   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3262   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3263   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3264   a       = (Mat_MPIAIJ*)mat->data;
3265 
3266   mat->factortype   = matin->factortype;
3267   mat->assembled    = PETSC_TRUE;
3268   mat->insertmode   = NOT_SET_VALUES;
3269   mat->preallocated = PETSC_TRUE;
3270 
3271   a->size         = oldmat->size;
3272   a->rank         = oldmat->rank;
3273   a->donotstash   = oldmat->donotstash;
3274   a->roworiented  = oldmat->roworiented;
3275   a->rowindices   = 0;
3276   a->rowvalues    = 0;
3277   a->getrowactive = PETSC_FALSE;
3278 
3279   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3280   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3281 
3282   if (oldmat->colmap) {
3283 #if defined(PETSC_USE_CTABLE)
3284     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3285 #else
3286     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3287     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3288     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3289 #endif
3290   } else a->colmap = 0;
3291   if (oldmat->garray) {
3292     PetscInt len;
3293     len  = oldmat->B->cmap->n;
3294     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3295     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3296     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3297   } else a->garray = 0;
3298 
3299   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3300   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3301   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3302   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3303   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3304   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3305   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3306   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3307   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3308   *newmat = mat;
3309   PetscFunctionReturn(0);
3310 }
3311 
3312 
3313 
3314 #undef __FUNCT__
3315 #define __FUNCT__ "MatLoad_MPIAIJ"
3316 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3317 {
3318   PetscScalar    *vals,*svals;
3319   MPI_Comm       comm;
3320   PetscErrorCode ierr;
3321   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3322   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3323   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3324   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3325   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3326   int            fd;
3327   PetscInt       bs = newMat->rmap->bs;
3328 
3329   PetscFunctionBegin;
3330   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3331   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3332   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3333   if (!rank) {
3334     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3335     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3336     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3337   }
3338 
3339   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3340   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3341   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3342   if (bs < 0) bs = 1;
3343 
3344   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3345 
3346   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3347   M    = header[1]; N = header[2];
3348   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3349   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3350   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3351 
3352   /* If global sizes are set, check if they are consistent with that given in the file */
3353   if (sizesset) {
3354     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3355   }
3356   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3357   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3358 
3359   /* determine ownership of all (block) rows */
3360   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3361   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3362   else m = newMat->rmap->n; /* Set by user */
3363 
3364   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3365   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3366 
3367   /* First process needs enough room for process with most rows */
3368   if (!rank) {
3369     mmax = rowners[1];
3370     for (i=2; i<=size; i++) {
3371       mmax = PetscMax(mmax, rowners[i]);
3372     }
3373   } else mmax = -1;             /* unused, but compilers complain */
3374 
3375   rowners[0] = 0;
3376   for (i=2; i<=size; i++) {
3377     rowners[i] += rowners[i-1];
3378   }
3379   rstart = rowners[rank];
3380   rend   = rowners[rank+1];
3381 
3382   /* distribute row lengths to all processors */
3383   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3384   if (!rank) {
3385     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3386     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3387     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3388     for (j=0; j<m; j++) {
3389       procsnz[0] += ourlens[j];
3390     }
3391     for (i=1; i<size; i++) {
3392       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3393       /* calculate the number of nonzeros on each processor */
3394       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3395         procsnz[i] += rowlengths[j];
3396       }
3397       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3398     }
3399     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3400   } else {
3401     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3402   }
3403 
3404   if (!rank) {
3405     /* determine max buffer needed and allocate it */
3406     maxnz = 0;
3407     for (i=0; i<size; i++) {
3408       maxnz = PetscMax(maxnz,procsnz[i]);
3409     }
3410     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3411 
3412     /* read in my part of the matrix column indices  */
3413     nz   = procsnz[0];
3414     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3415     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3416 
3417     /* read in every one elses and ship off */
3418     for (i=1; i<size; i++) {
3419       nz   = procsnz[i];
3420       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3421       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3422     }
3423     ierr = PetscFree(cols);CHKERRQ(ierr);
3424   } else {
3425     /* determine buffer space needed for message */
3426     nz = 0;
3427     for (i=0; i<m; i++) {
3428       nz += ourlens[i];
3429     }
3430     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3431 
3432     /* receive message of column indices*/
3433     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3434   }
3435 
3436   /* determine column ownership if matrix is not square */
3437   if (N != M) {
3438     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3439     else n = newMat->cmap->n;
3440     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3441     cstart = cend - n;
3442   } else {
3443     cstart = rstart;
3444     cend   = rend;
3445     n      = cend - cstart;
3446   }
3447 
3448   /* loop over local rows, determining number of off diagonal entries */
3449   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3450   jj   = 0;
3451   for (i=0; i<m; i++) {
3452     for (j=0; j<ourlens[i]; j++) {
3453       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3454       jj++;
3455     }
3456   }
3457 
3458   for (i=0; i<m; i++) {
3459     ourlens[i] -= offlens[i];
3460   }
3461   if (!sizesset) {
3462     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3463   }
3464 
3465   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3466 
3467   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3468 
3469   for (i=0; i<m; i++) {
3470     ourlens[i] += offlens[i];
3471   }
3472 
3473   if (!rank) {
3474     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3475 
3476     /* read in my part of the matrix numerical values  */
3477     nz   = procsnz[0];
3478     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3479 
3480     /* insert into matrix */
3481     jj      = rstart;
3482     smycols = mycols;
3483     svals   = vals;
3484     for (i=0; i<m; i++) {
3485       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3486       smycols += ourlens[i];
3487       svals   += ourlens[i];
3488       jj++;
3489     }
3490 
3491     /* read in other processors and ship out */
3492     for (i=1; i<size; i++) {
3493       nz   = procsnz[i];
3494       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3495       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3496     }
3497     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3498   } else {
3499     /* receive numeric values */
3500     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3501 
3502     /* receive message of values*/
3503     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3504 
3505     /* insert into matrix */
3506     jj      = rstart;
3507     smycols = mycols;
3508     svals   = vals;
3509     for (i=0; i<m; i++) {
3510       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3511       smycols += ourlens[i];
3512       svals   += ourlens[i];
3513       jj++;
3514     }
3515   }
3516   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3517   ierr = PetscFree(vals);CHKERRQ(ierr);
3518   ierr = PetscFree(mycols);CHKERRQ(ierr);
3519   ierr = PetscFree(rowners);CHKERRQ(ierr);
3520   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3521   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3522   PetscFunctionReturn(0);
3523 }
3524 
3525 #undef __FUNCT__
3526 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3527 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3528 {
3529   PetscErrorCode ierr;
3530   IS             iscol_local;
3531   PetscInt       csize;
3532 
3533   PetscFunctionBegin;
3534   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3535   if (call == MAT_REUSE_MATRIX) {
3536     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3537     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3538   } else {
3539     PetscInt cbs;
3540     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3541     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3542     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3543   }
3544   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3545   if (call == MAT_INITIAL_MATRIX) {
3546     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3547     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3548   }
3549   PetscFunctionReturn(0);
3550 }
3551 
3552 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3553 #undef __FUNCT__
3554 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3555 /*
3556     Not great since it makes two copies of the submatrix, first an SeqAIJ
3557   in local and then by concatenating the local matrices the end result.
3558   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3559 
3560   Note: This requires a sequential iscol with all indices.
3561 */
3562 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3563 {
3564   PetscErrorCode ierr;
3565   PetscMPIInt    rank,size;
3566   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3567   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3568   PetscBool      allcolumns, colflag;
3569   Mat            M,Mreuse;
3570   MatScalar      *vwork,*aa;
3571   MPI_Comm       comm;
3572   Mat_SeqAIJ     *aij;
3573 
3574   PetscFunctionBegin;
3575   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3576   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3577   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3578 
3579   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3580   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3581   if (colflag && ncol == mat->cmap->N) {
3582     allcolumns = PETSC_TRUE;
3583   } else {
3584     allcolumns = PETSC_FALSE;
3585   }
3586   if (call ==  MAT_REUSE_MATRIX) {
3587     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3588     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3589     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3590   } else {
3591     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3592   }
3593 
3594   /*
3595       m - number of local rows
3596       n - number of columns (same on all processors)
3597       rstart - first row in new global matrix generated
3598   */
3599   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3600   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3601   if (call == MAT_INITIAL_MATRIX) {
3602     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3603     ii  = aij->i;
3604     jj  = aij->j;
3605 
3606     /*
3607         Determine the number of non-zeros in the diagonal and off-diagonal
3608         portions of the matrix in order to do correct preallocation
3609     */
3610 
3611     /* first get start and end of "diagonal" columns */
3612     if (csize == PETSC_DECIDE) {
3613       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3614       if (mglobal == n) { /* square matrix */
3615         nlocal = m;
3616       } else {
3617         nlocal = n/size + ((n % size) > rank);
3618       }
3619     } else {
3620       nlocal = csize;
3621     }
3622     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3623     rstart = rend - nlocal;
3624     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3625 
3626     /* next, compute all the lengths */
3627     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3628     olens = dlens + m;
3629     for (i=0; i<m; i++) {
3630       jend = ii[i+1] - ii[i];
3631       olen = 0;
3632       dlen = 0;
3633       for (j=0; j<jend; j++) {
3634         if (*jj < rstart || *jj >= rend) olen++;
3635         else dlen++;
3636         jj++;
3637       }
3638       olens[i] = olen;
3639       dlens[i] = dlen;
3640     }
3641     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3642     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3643     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3644     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3645     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3646     ierr = PetscFree(dlens);CHKERRQ(ierr);
3647   } else {
3648     PetscInt ml,nl;
3649 
3650     M    = *newmat;
3651     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3652     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3653     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3654     /*
3655          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3656        rather than the slower MatSetValues().
3657     */
3658     M->was_assembled = PETSC_TRUE;
3659     M->assembled     = PETSC_FALSE;
3660   }
3661   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3662   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3663   ii   = aij->i;
3664   jj   = aij->j;
3665   aa   = aij->a;
3666   for (i=0; i<m; i++) {
3667     row   = rstart + i;
3668     nz    = ii[i+1] - ii[i];
3669     cwork = jj;     jj += nz;
3670     vwork = aa;     aa += nz;
3671     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3672   }
3673 
3674   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3675   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3676   *newmat = M;
3677 
3678   /* save submatrix used in processor for next request */
3679   if (call ==  MAT_INITIAL_MATRIX) {
3680     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3681     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3682   }
3683   PetscFunctionReturn(0);
3684 }
3685 
3686 #undef __FUNCT__
3687 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3688 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3689 {
3690   PetscInt       m,cstart, cend,j,nnz,i,d;
3691   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3692   const PetscInt *JJ;
3693   PetscScalar    *values;
3694   PetscErrorCode ierr;
3695 
3696   PetscFunctionBegin;
3697   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3698 
3699   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3700   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3701   m      = B->rmap->n;
3702   cstart = B->cmap->rstart;
3703   cend   = B->cmap->rend;
3704   rstart = B->rmap->rstart;
3705 
3706   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3707 
3708 #if defined(PETSC_USE_DEBUGGING)
3709   for (i=0; i<m; i++) {
3710     nnz = Ii[i+1]- Ii[i];
3711     JJ  = J + Ii[i];
3712     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3713     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3714     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3715   }
3716 #endif
3717 
3718   for (i=0; i<m; i++) {
3719     nnz     = Ii[i+1]- Ii[i];
3720     JJ      = J + Ii[i];
3721     nnz_max = PetscMax(nnz_max,nnz);
3722     d       = 0;
3723     for (j=0; j<nnz; j++) {
3724       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3725     }
3726     d_nnz[i] = d;
3727     o_nnz[i] = nnz - d;
3728   }
3729   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3730   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3731 
3732   if (v) values = (PetscScalar*)v;
3733   else {
3734     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3735   }
3736 
3737   for (i=0; i<m; i++) {
3738     ii   = i + rstart;
3739     nnz  = Ii[i+1]- Ii[i];
3740     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3741   }
3742   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3743   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3744 
3745   if (!v) {
3746     ierr = PetscFree(values);CHKERRQ(ierr);
3747   }
3748   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3749   PetscFunctionReturn(0);
3750 }
3751 
3752 #undef __FUNCT__
3753 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3754 /*@
3755    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3756    (the default parallel PETSc format).
3757 
3758    Collective on MPI_Comm
3759 
3760    Input Parameters:
3761 +  B - the matrix
3762 .  i - the indices into j for the start of each local row (starts with zero)
3763 .  j - the column indices for each local row (starts with zero)
3764 -  v - optional values in the matrix
3765 
3766    Level: developer
3767 
3768    Notes:
3769        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3770      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3771      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3772 
3773        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3774 
3775        The format which is used for the sparse matrix input, is equivalent to a
3776     row-major ordering.. i.e for the following matrix, the input data expected is
3777     as shown:
3778 
3779         1 0 0
3780         2 0 3     P0
3781        -------
3782         4 5 6     P1
3783 
3784      Process0 [P0]: rows_owned=[0,1]
3785         i =  {0,1,3}  [size = nrow+1  = 2+1]
3786         j =  {0,0,2}  [size = nz = 6]
3787         v =  {1,2,3}  [size = nz = 6]
3788 
3789      Process1 [P1]: rows_owned=[2]
3790         i =  {0,3}    [size = nrow+1  = 1+1]
3791         j =  {0,1,2}  [size = nz = 6]
3792         v =  {4,5,6}  [size = nz = 6]
3793 
3794 .keywords: matrix, aij, compressed row, sparse, parallel
3795 
3796 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3797           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3798 @*/
3799 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3800 {
3801   PetscErrorCode ierr;
3802 
3803   PetscFunctionBegin;
3804   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3805   PetscFunctionReturn(0);
3806 }
3807 
3808 #undef __FUNCT__
3809 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3810 /*@C
3811    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3812    (the default parallel PETSc format).  For good matrix assembly performance
3813    the user should preallocate the matrix storage by setting the parameters
3814    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3815    performance can be increased by more than a factor of 50.
3816 
3817    Collective on MPI_Comm
3818 
3819    Input Parameters:
3820 +  B - the matrix
3821 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3822            (same value is used for all local rows)
3823 .  d_nnz - array containing the number of nonzeros in the various rows of the
3824            DIAGONAL portion of the local submatrix (possibly different for each row)
3825            or NULL, if d_nz is used to specify the nonzero structure.
3826            The size of this array is equal to the number of local rows, i.e 'm'.
3827            For matrices that will be factored, you must leave room for (and set)
3828            the diagonal entry even if it is zero.
3829 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3830            submatrix (same value is used for all local rows).
3831 -  o_nnz - array containing the number of nonzeros in the various rows of the
3832            OFF-DIAGONAL portion of the local submatrix (possibly different for
3833            each row) or NULL, if o_nz is used to specify the nonzero
3834            structure. The size of this array is equal to the number
3835            of local rows, i.e 'm'.
3836 
3837    If the *_nnz parameter is given then the *_nz parameter is ignored
3838 
3839    The AIJ format (also called the Yale sparse matrix format or
3840    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3841    storage.  The stored row and column indices begin with zero.
3842    See Users-Manual: ch_mat for details.
3843 
3844    The parallel matrix is partitioned such that the first m0 rows belong to
3845    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3846    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3847 
3848    The DIAGONAL portion of the local submatrix of a processor can be defined
3849    as the submatrix which is obtained by extraction the part corresponding to
3850    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3851    first row that belongs to the processor, r2 is the last row belonging to
3852    the this processor, and c1-c2 is range of indices of the local part of a
3853    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3854    common case of a square matrix, the row and column ranges are the same and
3855    the DIAGONAL part is also square. The remaining portion of the local
3856    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3857 
3858    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3859 
3860    You can call MatGetInfo() to get information on how effective the preallocation was;
3861    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3862    You can also run with the option -info and look for messages with the string
3863    malloc in them to see if additional memory allocation was needed.
3864 
3865    Example usage:
3866 
3867    Consider the following 8x8 matrix with 34 non-zero values, that is
3868    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3869    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3870    as follows:
3871 
3872 .vb
3873             1  2  0  |  0  3  0  |  0  4
3874     Proc0   0  5  6  |  7  0  0  |  8  0
3875             9  0 10  | 11  0  0  | 12  0
3876     -------------------------------------
3877            13  0 14  | 15 16 17  |  0  0
3878     Proc1   0 18  0  | 19 20 21  |  0  0
3879             0  0  0  | 22 23  0  | 24  0
3880     -------------------------------------
3881     Proc2  25 26 27  |  0  0 28  | 29  0
3882            30  0  0  | 31 32 33  |  0 34
3883 .ve
3884 
3885    This can be represented as a collection of submatrices as:
3886 
3887 .vb
3888       A B C
3889       D E F
3890       G H I
3891 .ve
3892 
3893    Where the submatrices A,B,C are owned by proc0, D,E,F are
3894    owned by proc1, G,H,I are owned by proc2.
3895 
3896    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3897    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3898    The 'M','N' parameters are 8,8, and have the same values on all procs.
3899 
3900    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3901    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3902    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3903    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3904    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3905    matrix, ans [DF] as another SeqAIJ matrix.
3906 
3907    When d_nz, o_nz parameters are specified, d_nz storage elements are
3908    allocated for every row of the local diagonal submatrix, and o_nz
3909    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3910    One way to choose d_nz and o_nz is to use the max nonzerors per local
3911    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3912    In this case, the values of d_nz,o_nz are:
3913 .vb
3914      proc0 : dnz = 2, o_nz = 2
3915      proc1 : dnz = 3, o_nz = 2
3916      proc2 : dnz = 1, o_nz = 4
3917 .ve
3918    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3919    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3920    for proc3. i.e we are using 12+15+10=37 storage locations to store
3921    34 values.
3922 
3923    When d_nnz, o_nnz parameters are specified, the storage is specified
3924    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3925    In the above case the values for d_nnz,o_nnz are:
3926 .vb
3927      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3928      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3929      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3930 .ve
3931    Here the space allocated is sum of all the above values i.e 34, and
3932    hence pre-allocation is perfect.
3933 
3934    Level: intermediate
3935 
3936 .keywords: matrix, aij, compressed row, sparse, parallel
3937 
3938 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3939           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3940 @*/
3941 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3942 {
3943   PetscErrorCode ierr;
3944 
3945   PetscFunctionBegin;
3946   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3947   PetscValidType(B,1);
3948   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3949   PetscFunctionReturn(0);
3950 }
3951 
3952 #undef __FUNCT__
3953 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3954 /*@
3955      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3956          CSR format the local rows.
3957 
3958    Collective on MPI_Comm
3959 
3960    Input Parameters:
3961 +  comm - MPI communicator
3962 .  m - number of local rows (Cannot be PETSC_DECIDE)
3963 .  n - This value should be the same as the local size used in creating the
3964        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3965        calculated if N is given) For square matrices n is almost always m.
3966 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3967 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3968 .   i - row indices
3969 .   j - column indices
3970 -   a - matrix values
3971 
3972    Output Parameter:
3973 .   mat - the matrix
3974 
3975    Level: intermediate
3976 
3977    Notes:
3978        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3979      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3980      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3981 
3982        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3983 
3984        The format which is used for the sparse matrix input, is equivalent to a
3985     row-major ordering.. i.e for the following matrix, the input data expected is
3986     as shown:
3987 
3988         1 0 0
3989         2 0 3     P0
3990        -------
3991         4 5 6     P1
3992 
3993      Process0 [P0]: rows_owned=[0,1]
3994         i =  {0,1,3}  [size = nrow+1  = 2+1]
3995         j =  {0,0,2}  [size = nz = 6]
3996         v =  {1,2,3}  [size = nz = 6]
3997 
3998      Process1 [P1]: rows_owned=[2]
3999         i =  {0,3}    [size = nrow+1  = 1+1]
4000         j =  {0,1,2}  [size = nz = 6]
4001         v =  {4,5,6}  [size = nz = 6]
4002 
4003 .keywords: matrix, aij, compressed row, sparse, parallel
4004 
4005 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4006           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4007 @*/
4008 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4009 {
4010   PetscErrorCode ierr;
4011 
4012   PetscFunctionBegin;
4013   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4014   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4015   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4016   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4017   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4018   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4019   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4020   PetscFunctionReturn(0);
4021 }
4022 
4023 #undef __FUNCT__
4024 #define __FUNCT__ "MatCreateAIJ"
4025 /*@C
4026    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4027    (the default parallel PETSc format).  For good matrix assembly performance
4028    the user should preallocate the matrix storage by setting the parameters
4029    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4030    performance can be increased by more than a factor of 50.
4031 
4032    Collective on MPI_Comm
4033 
4034    Input Parameters:
4035 +  comm - MPI communicator
4036 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4037            This value should be the same as the local size used in creating the
4038            y vector for the matrix-vector product y = Ax.
4039 .  n - This value should be the same as the local size used in creating the
4040        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4041        calculated if N is given) For square matrices n is almost always m.
4042 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4043 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4044 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4045            (same value is used for all local rows)
4046 .  d_nnz - array containing the number of nonzeros in the various rows of the
4047            DIAGONAL portion of the local submatrix (possibly different for each row)
4048            or NULL, if d_nz is used to specify the nonzero structure.
4049            The size of this array is equal to the number of local rows, i.e 'm'.
4050 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4051            submatrix (same value is used for all local rows).
4052 -  o_nnz - array containing the number of nonzeros in the various rows of the
4053            OFF-DIAGONAL portion of the local submatrix (possibly different for
4054            each row) or NULL, if o_nz is used to specify the nonzero
4055            structure. The size of this array is equal to the number
4056            of local rows, i.e 'm'.
4057 
4058    Output Parameter:
4059 .  A - the matrix
4060 
4061    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4062    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4063    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4064 
4065    Notes:
4066    If the *_nnz parameter is given then the *_nz parameter is ignored
4067 
4068    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4069    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4070    storage requirements for this matrix.
4071 
4072    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4073    processor than it must be used on all processors that share the object for
4074    that argument.
4075 
4076    The user MUST specify either the local or global matrix dimensions
4077    (possibly both).
4078 
4079    The parallel matrix is partitioned across processors such that the
4080    first m0 rows belong to process 0, the next m1 rows belong to
4081    process 1, the next m2 rows belong to process 2 etc.. where
4082    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4083    values corresponding to [m x N] submatrix.
4084 
4085    The columns are logically partitioned with the n0 columns belonging
4086    to 0th partition, the next n1 columns belonging to the next
4087    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4088 
4089    The DIAGONAL portion of the local submatrix on any given processor
4090    is the submatrix corresponding to the rows and columns m,n
4091    corresponding to the given processor. i.e diagonal matrix on
4092    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4093    etc. The remaining portion of the local submatrix [m x (N-n)]
4094    constitute the OFF-DIAGONAL portion. The example below better
4095    illustrates this concept.
4096 
4097    For a square global matrix we define each processor's diagonal portion
4098    to be its local rows and the corresponding columns (a square submatrix);
4099    each processor's off-diagonal portion encompasses the remainder of the
4100    local matrix (a rectangular submatrix).
4101 
4102    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4103 
4104    When calling this routine with a single process communicator, a matrix of
4105    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4106    type of communicator, use the construction mechanism:
4107      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4108 
4109    By default, this format uses inodes (identical nodes) when possible.
4110    We search for consecutive rows with the same nonzero structure, thereby
4111    reusing matrix information to achieve increased efficiency.
4112 
4113    Options Database Keys:
4114 +  -mat_no_inode  - Do not use inodes
4115 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4116 -  -mat_aij_oneindex - Internally use indexing starting at 1
4117         rather than 0.  Note that when calling MatSetValues(),
4118         the user still MUST index entries starting at 0!
4119 
4120 
4121    Example usage:
4122 
4123    Consider the following 8x8 matrix with 34 non-zero values, that is
4124    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4125    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4126    as follows:
4127 
4128 .vb
4129             1  2  0  |  0  3  0  |  0  4
4130     Proc0   0  5  6  |  7  0  0  |  8  0
4131             9  0 10  | 11  0  0  | 12  0
4132     -------------------------------------
4133            13  0 14  | 15 16 17  |  0  0
4134     Proc1   0 18  0  | 19 20 21  |  0  0
4135             0  0  0  | 22 23  0  | 24  0
4136     -------------------------------------
4137     Proc2  25 26 27  |  0  0 28  | 29  0
4138            30  0  0  | 31 32 33  |  0 34
4139 .ve
4140 
4141    This can be represented as a collection of submatrices as:
4142 
4143 .vb
4144       A B C
4145       D E F
4146       G H I
4147 .ve
4148 
4149    Where the submatrices A,B,C are owned by proc0, D,E,F are
4150    owned by proc1, G,H,I are owned by proc2.
4151 
4152    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4153    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4154    The 'M','N' parameters are 8,8, and have the same values on all procs.
4155 
4156    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4157    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4158    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4159    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4160    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4161    matrix, ans [DF] as another SeqAIJ matrix.
4162 
4163    When d_nz, o_nz parameters are specified, d_nz storage elements are
4164    allocated for every row of the local diagonal submatrix, and o_nz
4165    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4166    One way to choose d_nz and o_nz is to use the max nonzerors per local
4167    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4168    In this case, the values of d_nz,o_nz are:
4169 .vb
4170      proc0 : dnz = 2, o_nz = 2
4171      proc1 : dnz = 3, o_nz = 2
4172      proc2 : dnz = 1, o_nz = 4
4173 .ve
4174    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4175    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4176    for proc3. i.e we are using 12+15+10=37 storage locations to store
4177    34 values.
4178 
4179    When d_nnz, o_nnz parameters are specified, the storage is specified
4180    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4181    In the above case the values for d_nnz,o_nnz are:
4182 .vb
4183      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4184      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4185      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4186 .ve
4187    Here the space allocated is sum of all the above values i.e 34, and
4188    hence pre-allocation is perfect.
4189 
4190    Level: intermediate
4191 
4192 .keywords: matrix, aij, compressed row, sparse, parallel
4193 
4194 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4195           MPIAIJ, MatCreateMPIAIJWithArrays()
4196 @*/
4197 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4198 {
4199   PetscErrorCode ierr;
4200   PetscMPIInt    size;
4201 
4202   PetscFunctionBegin;
4203   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4204   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4205   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4206   if (size > 1) {
4207     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4208     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4209   } else {
4210     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4211     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4212   }
4213   PetscFunctionReturn(0);
4214 }
4215 
4216 #undef __FUNCT__
4217 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4218 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4219 {
4220   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4221 
4222   PetscFunctionBegin;
4223   if (Ad)     *Ad     = a->A;
4224   if (Ao)     *Ao     = a->B;
4225   if (colmap) *colmap = a->garray;
4226   PetscFunctionReturn(0);
4227 }
4228 
4229 #undef __FUNCT__
4230 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4231 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4232 {
4233   PetscErrorCode ierr;
4234   PetscInt       i;
4235   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4236 
4237   PetscFunctionBegin;
4238   if (coloring->ctype == IS_COLORING_GLOBAL) {
4239     ISColoringValue *allcolors,*colors;
4240     ISColoring      ocoloring;
4241 
4242     /* set coloring for diagonal portion */
4243     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4244 
4245     /* set coloring for off-diagonal portion */
4246     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4247     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4248     for (i=0; i<a->B->cmap->n; i++) {
4249       colors[i] = allcolors[a->garray[i]];
4250     }
4251     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4252     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4253     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4254     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4255   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4256     ISColoringValue *colors;
4257     PetscInt        *larray;
4258     ISColoring      ocoloring;
4259 
4260     /* set coloring for diagonal portion */
4261     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4262     for (i=0; i<a->A->cmap->n; i++) {
4263       larray[i] = i + A->cmap->rstart;
4264     }
4265     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4266     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4267     for (i=0; i<a->A->cmap->n; i++) {
4268       colors[i] = coloring->colors[larray[i]];
4269     }
4270     ierr = PetscFree(larray);CHKERRQ(ierr);
4271     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4272     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4273     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4274 
4275     /* set coloring for off-diagonal portion */
4276     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4277     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4278     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4279     for (i=0; i<a->B->cmap->n; i++) {
4280       colors[i] = coloring->colors[larray[i]];
4281     }
4282     ierr = PetscFree(larray);CHKERRQ(ierr);
4283     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4284     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4285     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4286   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4287   PetscFunctionReturn(0);
4288 }
4289 
4290 #undef __FUNCT__
4291 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4292 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4293 {
4294   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4295   PetscErrorCode ierr;
4296 
4297   PetscFunctionBegin;
4298   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4299   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4300   PetscFunctionReturn(0);
4301 }
4302 
4303 #undef __FUNCT__
4304 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4305 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4306 {
4307   PetscErrorCode ierr;
4308   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4309   PetscInt       *indx;
4310 
4311   PetscFunctionBegin;
4312   /* This routine will ONLY return MPIAIJ type matrix */
4313   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4314   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4315   if (n == PETSC_DECIDE) {
4316     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4317   }
4318   /* Check sum(n) = N */
4319   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4320   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4321 
4322   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4323   rstart -= m;
4324 
4325   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4326   for (i=0; i<m; i++) {
4327     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4328     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4329     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4330   }
4331 
4332   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4333   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4334   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4335   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4336   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4337   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4338   PetscFunctionReturn(0);
4339 }
4340 
4341 #undef __FUNCT__
4342 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4343 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4344 {
4345   PetscErrorCode ierr;
4346   PetscInt       m,N,i,rstart,nnz,Ii;
4347   PetscInt       *indx;
4348   PetscScalar    *values;
4349 
4350   PetscFunctionBegin;
4351   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4352   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4353   for (i=0; i<m; i++) {
4354     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4355     Ii   = i + rstart;
4356     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4357     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4358   }
4359   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4360   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4361   PetscFunctionReturn(0);
4362 }
4363 
4364 #undef __FUNCT__
4365 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4366 /*@
4367       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4368                  matrices from each processor
4369 
4370     Collective on MPI_Comm
4371 
4372    Input Parameters:
4373 +    comm - the communicators the parallel matrix will live on
4374 .    inmat - the input sequential matrices
4375 .    n - number of local columns (or PETSC_DECIDE)
4376 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4377 
4378    Output Parameter:
4379 .    outmat - the parallel matrix generated
4380 
4381     Level: advanced
4382 
4383    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4384 
4385 @*/
4386 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4387 {
4388   PetscErrorCode ierr;
4389   PetscMPIInt    size;
4390 
4391   PetscFunctionBegin;
4392   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4393   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4394   if (size == 1) {
4395     if (scall == MAT_INITIAL_MATRIX) {
4396       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4397     } else {
4398       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4399     }
4400   } else {
4401     if (scall == MAT_INITIAL_MATRIX) {
4402       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4403     }
4404     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4405   }
4406   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4407   PetscFunctionReturn(0);
4408 }
4409 
4410 #undef __FUNCT__
4411 #define __FUNCT__ "MatFileSplit"
4412 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4413 {
4414   PetscErrorCode    ierr;
4415   PetscMPIInt       rank;
4416   PetscInt          m,N,i,rstart,nnz;
4417   size_t            len;
4418   const PetscInt    *indx;
4419   PetscViewer       out;
4420   char              *name;
4421   Mat               B;
4422   const PetscScalar *values;
4423 
4424   PetscFunctionBegin;
4425   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4426   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4427   /* Should this be the type of the diagonal block of A? */
4428   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4429   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4430   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4431   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4432   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4433   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4434   for (i=0; i<m; i++) {
4435     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4436     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4437     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4438   }
4439   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4440   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4441 
4442   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4443   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4444   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4445   sprintf(name,"%s.%d",outfile,rank);
4446   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4447   ierr = PetscFree(name);CHKERRQ(ierr);
4448   ierr = MatView(B,out);CHKERRQ(ierr);
4449   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4450   ierr = MatDestroy(&B);CHKERRQ(ierr);
4451   PetscFunctionReturn(0);
4452 }
4453 
4454 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4455 #undef __FUNCT__
4456 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4457 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4458 {
4459   PetscErrorCode      ierr;
4460   Mat_Merge_SeqsToMPI *merge;
4461   PetscContainer      container;
4462 
4463   PetscFunctionBegin;
4464   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4465   if (container) {
4466     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4467     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4468     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4469     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4470     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4471     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4472     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4473     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4474     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4475     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4476     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4477     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4478     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4479     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4480     ierr = PetscFree(merge);CHKERRQ(ierr);
4481     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4482   }
4483   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4484   PetscFunctionReturn(0);
4485 }
4486 
4487 #include <../src/mat/utils/freespace.h>
4488 #include <petscbt.h>
4489 
4490 #undef __FUNCT__
4491 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4492 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4493 {
4494   PetscErrorCode      ierr;
4495   MPI_Comm            comm;
4496   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4497   PetscMPIInt         size,rank,taga,*len_s;
4498   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4499   PetscInt            proc,m;
4500   PetscInt            **buf_ri,**buf_rj;
4501   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4502   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4503   MPI_Request         *s_waits,*r_waits;
4504   MPI_Status          *status;
4505   MatScalar           *aa=a->a;
4506   MatScalar           **abuf_r,*ba_i;
4507   Mat_Merge_SeqsToMPI *merge;
4508   PetscContainer      container;
4509 
4510   PetscFunctionBegin;
4511   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4512   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4513 
4514   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4515   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4516 
4517   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4518   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4519 
4520   bi     = merge->bi;
4521   bj     = merge->bj;
4522   buf_ri = merge->buf_ri;
4523   buf_rj = merge->buf_rj;
4524 
4525   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4526   owners = merge->rowmap->range;
4527   len_s  = merge->len_s;
4528 
4529   /* send and recv matrix values */
4530   /*-----------------------------*/
4531   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4532   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4533 
4534   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4535   for (proc=0,k=0; proc<size; proc++) {
4536     if (!len_s[proc]) continue;
4537     i    = owners[proc];
4538     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4539     k++;
4540   }
4541 
4542   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4543   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4544   ierr = PetscFree(status);CHKERRQ(ierr);
4545 
4546   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4547   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4548 
4549   /* insert mat values of mpimat */
4550   /*----------------------------*/
4551   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4552   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4553 
4554   for (k=0; k<merge->nrecv; k++) {
4555     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4556     nrows       = *(buf_ri_k[k]);
4557     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4558     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4559   }
4560 
4561   /* set values of ba */
4562   m = merge->rowmap->n;
4563   for (i=0; i<m; i++) {
4564     arow = owners[rank] + i;
4565     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4566     bnzi = bi[i+1] - bi[i];
4567     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4568 
4569     /* add local non-zero vals of this proc's seqmat into ba */
4570     anzi   = ai[arow+1] - ai[arow];
4571     aj     = a->j + ai[arow];
4572     aa     = a->a + ai[arow];
4573     nextaj = 0;
4574     for (j=0; nextaj<anzi; j++) {
4575       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4576         ba_i[j] += aa[nextaj++];
4577       }
4578     }
4579 
4580     /* add received vals into ba */
4581     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4582       /* i-th row */
4583       if (i == *nextrow[k]) {
4584         anzi   = *(nextai[k]+1) - *nextai[k];
4585         aj     = buf_rj[k] + *(nextai[k]);
4586         aa     = abuf_r[k] + *(nextai[k]);
4587         nextaj = 0;
4588         for (j=0; nextaj<anzi; j++) {
4589           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4590             ba_i[j] += aa[nextaj++];
4591           }
4592         }
4593         nextrow[k]++; nextai[k]++;
4594       }
4595     }
4596     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4597   }
4598   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4599   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4600 
4601   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4602   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4603   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4604   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4605   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4606   PetscFunctionReturn(0);
4607 }
4608 
4609 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4610 
4611 #undef __FUNCT__
4612 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4613 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4614 {
4615   PetscErrorCode      ierr;
4616   Mat                 B_mpi;
4617   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4618   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4619   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4620   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4621   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4622   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4623   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4624   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4625   MPI_Status          *status;
4626   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4627   PetscBT             lnkbt;
4628   Mat_Merge_SeqsToMPI *merge;
4629   PetscContainer      container;
4630 
4631   PetscFunctionBegin;
4632   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4633 
4634   /* make sure it is a PETSc comm */
4635   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4636   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4637   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4638 
4639   ierr = PetscNew(&merge);CHKERRQ(ierr);
4640   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4641 
4642   /* determine row ownership */
4643   /*---------------------------------------------------------*/
4644   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4645   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4646   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4647   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4648   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4649   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4650   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4651 
4652   m      = merge->rowmap->n;
4653   owners = merge->rowmap->range;
4654 
4655   /* determine the number of messages to send, their lengths */
4656   /*---------------------------------------------------------*/
4657   len_s = merge->len_s;
4658 
4659   len          = 0; /* length of buf_si[] */
4660   merge->nsend = 0;
4661   for (proc=0; proc<size; proc++) {
4662     len_si[proc] = 0;
4663     if (proc == rank) {
4664       len_s[proc] = 0;
4665     } else {
4666       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4667       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4668     }
4669     if (len_s[proc]) {
4670       merge->nsend++;
4671       nrows = 0;
4672       for (i=owners[proc]; i<owners[proc+1]; i++) {
4673         if (ai[i+1] > ai[i]) nrows++;
4674       }
4675       len_si[proc] = 2*(nrows+1);
4676       len         += len_si[proc];
4677     }
4678   }
4679 
4680   /* determine the number and length of messages to receive for ij-structure */
4681   /*-------------------------------------------------------------------------*/
4682   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4683   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4684 
4685   /* post the Irecv of j-structure */
4686   /*-------------------------------*/
4687   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4688   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4689 
4690   /* post the Isend of j-structure */
4691   /*--------------------------------*/
4692   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4693 
4694   for (proc=0, k=0; proc<size; proc++) {
4695     if (!len_s[proc]) continue;
4696     i    = owners[proc];
4697     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4698     k++;
4699   }
4700 
4701   /* receives and sends of j-structure are complete */
4702   /*------------------------------------------------*/
4703   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4704   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4705 
4706   /* send and recv i-structure */
4707   /*---------------------------*/
4708   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4709   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4710 
4711   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4712   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4713   for (proc=0,k=0; proc<size; proc++) {
4714     if (!len_s[proc]) continue;
4715     /* form outgoing message for i-structure:
4716          buf_si[0]:                 nrows to be sent
4717                [1:nrows]:           row index (global)
4718                [nrows+1:2*nrows+1]: i-structure index
4719     */
4720     /*-------------------------------------------*/
4721     nrows       = len_si[proc]/2 - 1;
4722     buf_si_i    = buf_si + nrows+1;
4723     buf_si[0]   = nrows;
4724     buf_si_i[0] = 0;
4725     nrows       = 0;
4726     for (i=owners[proc]; i<owners[proc+1]; i++) {
4727       anzi = ai[i+1] - ai[i];
4728       if (anzi) {
4729         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4730         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4731         nrows++;
4732       }
4733     }
4734     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4735     k++;
4736     buf_si += len_si[proc];
4737   }
4738 
4739   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4740   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4741 
4742   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4743   for (i=0; i<merge->nrecv; i++) {
4744     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4745   }
4746 
4747   ierr = PetscFree(len_si);CHKERRQ(ierr);
4748   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4749   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4750   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4751   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4752   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4753   ierr = PetscFree(status);CHKERRQ(ierr);
4754 
4755   /* compute a local seq matrix in each processor */
4756   /*----------------------------------------------*/
4757   /* allocate bi array and free space for accumulating nonzero column info */
4758   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4759   bi[0] = 0;
4760 
4761   /* create and initialize a linked list */
4762   nlnk = N+1;
4763   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4764 
4765   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4766   len  = ai[owners[rank+1]] - ai[owners[rank]];
4767   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4768 
4769   current_space = free_space;
4770 
4771   /* determine symbolic info for each local row */
4772   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4773 
4774   for (k=0; k<merge->nrecv; k++) {
4775     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4776     nrows       = *buf_ri_k[k];
4777     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4778     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4779   }
4780 
4781   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4782   len  = 0;
4783   for (i=0; i<m; i++) {
4784     bnzi = 0;
4785     /* add local non-zero cols of this proc's seqmat into lnk */
4786     arow  = owners[rank] + i;
4787     anzi  = ai[arow+1] - ai[arow];
4788     aj    = a->j + ai[arow];
4789     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4790     bnzi += nlnk;
4791     /* add received col data into lnk */
4792     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4793       if (i == *nextrow[k]) { /* i-th row */
4794         anzi  = *(nextai[k]+1) - *nextai[k];
4795         aj    = buf_rj[k] + *nextai[k];
4796         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4797         bnzi += nlnk;
4798         nextrow[k]++; nextai[k]++;
4799       }
4800     }
4801     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4802 
4803     /* if free space is not available, make more free space */
4804     if (current_space->local_remaining<bnzi) {
4805       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4806       nspacedouble++;
4807     }
4808     /* copy data into free space, then initialize lnk */
4809     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4810     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4811 
4812     current_space->array           += bnzi;
4813     current_space->local_used      += bnzi;
4814     current_space->local_remaining -= bnzi;
4815 
4816     bi[i+1] = bi[i] + bnzi;
4817   }
4818 
4819   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4820 
4821   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4822   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4823   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4824 
4825   /* create symbolic parallel matrix B_mpi */
4826   /*---------------------------------------*/
4827   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4828   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4829   if (n==PETSC_DECIDE) {
4830     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4831   } else {
4832     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4833   }
4834   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4835   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4836   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4837   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4838   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4839 
4840   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4841   B_mpi->assembled    = PETSC_FALSE;
4842   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4843   merge->bi           = bi;
4844   merge->bj           = bj;
4845   merge->buf_ri       = buf_ri;
4846   merge->buf_rj       = buf_rj;
4847   merge->coi          = NULL;
4848   merge->coj          = NULL;
4849   merge->owners_co    = NULL;
4850 
4851   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4852 
4853   /* attach the supporting struct to B_mpi for reuse */
4854   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4855   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4856   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4857   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4858   *mpimat = B_mpi;
4859 
4860   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4861   PetscFunctionReturn(0);
4862 }
4863 
4864 #undef __FUNCT__
4865 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4866 /*@C
4867       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4868                  matrices from each processor
4869 
4870     Collective on MPI_Comm
4871 
4872    Input Parameters:
4873 +    comm - the communicators the parallel matrix will live on
4874 .    seqmat - the input sequential matrices
4875 .    m - number of local rows (or PETSC_DECIDE)
4876 .    n - number of local columns (or PETSC_DECIDE)
4877 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4878 
4879    Output Parameter:
4880 .    mpimat - the parallel matrix generated
4881 
4882     Level: advanced
4883 
4884    Notes:
4885      The dimensions of the sequential matrix in each processor MUST be the same.
4886      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4887      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4888 @*/
4889 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4890 {
4891   PetscErrorCode ierr;
4892   PetscMPIInt    size;
4893 
4894   PetscFunctionBegin;
4895   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4896   if (size == 1) {
4897     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4898     if (scall == MAT_INITIAL_MATRIX) {
4899       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4900     } else {
4901       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4902     }
4903     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4904     PetscFunctionReturn(0);
4905   }
4906   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4907   if (scall == MAT_INITIAL_MATRIX) {
4908     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4909   }
4910   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4911   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4912   PetscFunctionReturn(0);
4913 }
4914 
4915 #undef __FUNCT__
4916 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4917 /*@
4918      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4919           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4920           with MatGetSize()
4921 
4922     Not Collective
4923 
4924    Input Parameters:
4925 +    A - the matrix
4926 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4927 
4928    Output Parameter:
4929 .    A_loc - the local sequential matrix generated
4930 
4931     Level: developer
4932 
4933 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4934 
4935 @*/
4936 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4937 {
4938   PetscErrorCode ierr;
4939   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4940   Mat_SeqAIJ     *mat,*a,*b;
4941   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4942   MatScalar      *aa,*ba,*cam;
4943   PetscScalar    *ca;
4944   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4945   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4946   PetscBool      match;
4947   MPI_Comm       comm;
4948   PetscMPIInt    size;
4949 
4950   PetscFunctionBegin;
4951   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4952   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4953   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4954   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4955   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4956 
4957   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4958   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4959   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4960   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4961   aa = a->a; ba = b->a;
4962   if (scall == MAT_INITIAL_MATRIX) {
4963     if (size == 1) {
4964       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4965       PetscFunctionReturn(0);
4966     }
4967 
4968     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4969     ci[0] = 0;
4970     for (i=0; i<am; i++) {
4971       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4972     }
4973     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4974     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4975     k    = 0;
4976     for (i=0; i<am; i++) {
4977       ncols_o = bi[i+1] - bi[i];
4978       ncols_d = ai[i+1] - ai[i];
4979       /* off-diagonal portion of A */
4980       for (jo=0; jo<ncols_o; jo++) {
4981         col = cmap[*bj];
4982         if (col >= cstart) break;
4983         cj[k]   = col; bj++;
4984         ca[k++] = *ba++;
4985       }
4986       /* diagonal portion of A */
4987       for (j=0; j<ncols_d; j++) {
4988         cj[k]   = cstart + *aj++;
4989         ca[k++] = *aa++;
4990       }
4991       /* off-diagonal portion of A */
4992       for (j=jo; j<ncols_o; j++) {
4993         cj[k]   = cmap[*bj++];
4994         ca[k++] = *ba++;
4995       }
4996     }
4997     /* put together the new matrix */
4998     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4999     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5000     /* Since these are PETSc arrays, change flags to free them as necessary. */
5001     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5002     mat->free_a  = PETSC_TRUE;
5003     mat->free_ij = PETSC_TRUE;
5004     mat->nonew   = 0;
5005   } else if (scall == MAT_REUSE_MATRIX) {
5006     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5007     ci = mat->i; cj = mat->j; cam = mat->a;
5008     for (i=0; i<am; i++) {
5009       /* off-diagonal portion of A */
5010       ncols_o = bi[i+1] - bi[i];
5011       for (jo=0; jo<ncols_o; jo++) {
5012         col = cmap[*bj];
5013         if (col >= cstart) break;
5014         *cam++ = *ba++; bj++;
5015       }
5016       /* diagonal portion of A */
5017       ncols_d = ai[i+1] - ai[i];
5018       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5019       /* off-diagonal portion of A */
5020       for (j=jo; j<ncols_o; j++) {
5021         *cam++ = *ba++; bj++;
5022       }
5023     }
5024   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5025   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5026   PetscFunctionReturn(0);
5027 }
5028 
5029 #undef __FUNCT__
5030 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5031 /*@C
5032      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5033 
5034     Not Collective
5035 
5036    Input Parameters:
5037 +    A - the matrix
5038 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5039 -    row, col - index sets of rows and columns to extract (or NULL)
5040 
5041    Output Parameter:
5042 .    A_loc - the local sequential matrix generated
5043 
5044     Level: developer
5045 
5046 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5047 
5048 @*/
5049 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5050 {
5051   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5052   PetscErrorCode ierr;
5053   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5054   IS             isrowa,iscola;
5055   Mat            *aloc;
5056   PetscBool      match;
5057 
5058   PetscFunctionBegin;
5059   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5060   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5061   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5062   if (!row) {
5063     start = A->rmap->rstart; end = A->rmap->rend;
5064     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5065   } else {
5066     isrowa = *row;
5067   }
5068   if (!col) {
5069     start = A->cmap->rstart;
5070     cmap  = a->garray;
5071     nzA   = a->A->cmap->n;
5072     nzB   = a->B->cmap->n;
5073     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5074     ncols = 0;
5075     for (i=0; i<nzB; i++) {
5076       if (cmap[i] < start) idx[ncols++] = cmap[i];
5077       else break;
5078     }
5079     imark = i;
5080     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5081     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5082     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5083   } else {
5084     iscola = *col;
5085   }
5086   if (scall != MAT_INITIAL_MATRIX) {
5087     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5088     aloc[0] = *A_loc;
5089   }
5090   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5091   *A_loc = aloc[0];
5092   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5093   if (!row) {
5094     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5095   }
5096   if (!col) {
5097     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5098   }
5099   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5100   PetscFunctionReturn(0);
5101 }
5102 
5103 #undef __FUNCT__
5104 #define __FUNCT__ "MatGetBrowsOfAcols"
5105 /*@C
5106     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5107 
5108     Collective on Mat
5109 
5110    Input Parameters:
5111 +    A,B - the matrices in mpiaij format
5112 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5113 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5114 
5115    Output Parameter:
5116 +    rowb, colb - index sets of rows and columns of B to extract
5117 -    B_seq - the sequential matrix generated
5118 
5119     Level: developer
5120 
5121 @*/
5122 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5123 {
5124   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5125   PetscErrorCode ierr;
5126   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5127   IS             isrowb,iscolb;
5128   Mat            *bseq=NULL;
5129 
5130   PetscFunctionBegin;
5131   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5132     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5133   }
5134   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5135 
5136   if (scall == MAT_INITIAL_MATRIX) {
5137     start = A->cmap->rstart;
5138     cmap  = a->garray;
5139     nzA   = a->A->cmap->n;
5140     nzB   = a->B->cmap->n;
5141     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5142     ncols = 0;
5143     for (i=0; i<nzB; i++) {  /* row < local row index */
5144       if (cmap[i] < start) idx[ncols++] = cmap[i];
5145       else break;
5146     }
5147     imark = i;
5148     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5149     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5150     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5151     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5152   } else {
5153     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5154     isrowb  = *rowb; iscolb = *colb;
5155     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5156     bseq[0] = *B_seq;
5157   }
5158   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5159   *B_seq = bseq[0];
5160   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5161   if (!rowb) {
5162     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5163   } else {
5164     *rowb = isrowb;
5165   }
5166   if (!colb) {
5167     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5168   } else {
5169     *colb = iscolb;
5170   }
5171   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5172   PetscFunctionReturn(0);
5173 }
5174 
5175 #undef __FUNCT__
5176 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5177 /*
5178     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5179     of the OFF-DIAGONAL portion of local A
5180 
5181     Collective on Mat
5182 
5183    Input Parameters:
5184 +    A,B - the matrices in mpiaij format
5185 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5186 
5187    Output Parameter:
5188 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5189 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5190 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5191 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5192 
5193     Level: developer
5194 
5195 */
5196 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5197 {
5198   VecScatter_MPI_General *gen_to,*gen_from;
5199   PetscErrorCode         ierr;
5200   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5201   Mat_SeqAIJ             *b_oth;
5202   VecScatter             ctx =a->Mvctx;
5203   MPI_Comm               comm;
5204   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5205   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5206   PetscScalar            *rvalues,*svalues;
5207   MatScalar              *b_otha,*bufa,*bufA;
5208   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5209   MPI_Request            *rwaits = NULL,*swaits = NULL;
5210   MPI_Status             *sstatus,rstatus;
5211   PetscMPIInt            jj,size;
5212   PetscInt               *cols,sbs,rbs;
5213   PetscScalar            *vals;
5214 
5215   PetscFunctionBegin;
5216   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5217   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5218   if (size == 1) PetscFunctionReturn(0);
5219 
5220   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5221     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5222   }
5223   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5224   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5225 
5226   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5227   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5228   rvalues  = gen_from->values; /* holds the length of receiving row */
5229   svalues  = gen_to->values;   /* holds the length of sending row */
5230   nrecvs   = gen_from->n;
5231   nsends   = gen_to->n;
5232 
5233   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5234   srow    = gen_to->indices;    /* local row index to be sent */
5235   sstarts = gen_to->starts;
5236   sprocs  = gen_to->procs;
5237   sstatus = gen_to->sstatus;
5238   sbs     = gen_to->bs;
5239   rstarts = gen_from->starts;
5240   rprocs  = gen_from->procs;
5241   rbs     = gen_from->bs;
5242 
5243   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5244   if (scall == MAT_INITIAL_MATRIX) {
5245     /* i-array */
5246     /*---------*/
5247     /*  post receives */
5248     for (i=0; i<nrecvs; i++) {
5249       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5250       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5251       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5252     }
5253 
5254     /* pack the outgoing message */
5255     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5256 
5257     sstartsj[0] = 0;
5258     rstartsj[0] = 0;
5259     len         = 0; /* total length of j or a array to be sent */
5260     k           = 0;
5261     for (i=0; i<nsends; i++) {
5262       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5263       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5264       for (j=0; j<nrows; j++) {
5265         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5266         for (l=0; l<sbs; l++) {
5267           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5268 
5269           rowlen[j*sbs+l] = ncols;
5270 
5271           len += ncols;
5272           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5273         }
5274         k++;
5275       }
5276       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5277 
5278       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5279     }
5280     /* recvs and sends of i-array are completed */
5281     i = nrecvs;
5282     while (i--) {
5283       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5284     }
5285     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5286 
5287     /* allocate buffers for sending j and a arrays */
5288     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5289     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5290 
5291     /* create i-array of B_oth */
5292     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5293 
5294     b_othi[0] = 0;
5295     len       = 0; /* total length of j or a array to be received */
5296     k         = 0;
5297     for (i=0; i<nrecvs; i++) {
5298       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5299       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5300       for (j=0; j<nrows; j++) {
5301         b_othi[k+1] = b_othi[k] + rowlen[j];
5302         len        += rowlen[j]; k++;
5303       }
5304       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5305     }
5306 
5307     /* allocate space for j and a arrrays of B_oth */
5308     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5309     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5310 
5311     /* j-array */
5312     /*---------*/
5313     /*  post receives of j-array */
5314     for (i=0; i<nrecvs; i++) {
5315       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5316       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5317     }
5318 
5319     /* pack the outgoing message j-array */
5320     k = 0;
5321     for (i=0; i<nsends; i++) {
5322       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5323       bufJ  = bufj+sstartsj[i];
5324       for (j=0; j<nrows; j++) {
5325         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5326         for (ll=0; ll<sbs; ll++) {
5327           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5328           for (l=0; l<ncols; l++) {
5329             *bufJ++ = cols[l];
5330           }
5331           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5332         }
5333       }
5334       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5335     }
5336 
5337     /* recvs and sends of j-array are completed */
5338     i = nrecvs;
5339     while (i--) {
5340       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5341     }
5342     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5343   } else if (scall == MAT_REUSE_MATRIX) {
5344     sstartsj = *startsj_s;
5345     rstartsj = *startsj_r;
5346     bufa     = *bufa_ptr;
5347     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5348     b_otha   = b_oth->a;
5349   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5350 
5351   /* a-array */
5352   /*---------*/
5353   /*  post receives of a-array */
5354   for (i=0; i<nrecvs; i++) {
5355     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5356     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5357   }
5358 
5359   /* pack the outgoing message a-array */
5360   k = 0;
5361   for (i=0; i<nsends; i++) {
5362     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5363     bufA  = bufa+sstartsj[i];
5364     for (j=0; j<nrows; j++) {
5365       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5366       for (ll=0; ll<sbs; ll++) {
5367         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5368         for (l=0; l<ncols; l++) {
5369           *bufA++ = vals[l];
5370         }
5371         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5372       }
5373     }
5374     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5375   }
5376   /* recvs and sends of a-array are completed */
5377   i = nrecvs;
5378   while (i--) {
5379     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5380   }
5381   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5382   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5383 
5384   if (scall == MAT_INITIAL_MATRIX) {
5385     /* put together the new matrix */
5386     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5387 
5388     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5389     /* Since these are PETSc arrays, change flags to free them as necessary. */
5390     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5391     b_oth->free_a  = PETSC_TRUE;
5392     b_oth->free_ij = PETSC_TRUE;
5393     b_oth->nonew   = 0;
5394 
5395     ierr = PetscFree(bufj);CHKERRQ(ierr);
5396     if (!startsj_s || !bufa_ptr) {
5397       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5398       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5399     } else {
5400       *startsj_s = sstartsj;
5401       *startsj_r = rstartsj;
5402       *bufa_ptr  = bufa;
5403     }
5404   }
5405   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5406   PetscFunctionReturn(0);
5407 }
5408 
5409 #undef __FUNCT__
5410 #define __FUNCT__ "MatGetCommunicationStructs"
5411 /*@C
5412   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5413 
5414   Not Collective
5415 
5416   Input Parameters:
5417 . A - The matrix in mpiaij format
5418 
5419   Output Parameter:
5420 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5421 . colmap - A map from global column index to local index into lvec
5422 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5423 
5424   Level: developer
5425 
5426 @*/
5427 #if defined(PETSC_USE_CTABLE)
5428 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5429 #else
5430 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5431 #endif
5432 {
5433   Mat_MPIAIJ *a;
5434 
5435   PetscFunctionBegin;
5436   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5437   PetscValidPointer(lvec, 2);
5438   PetscValidPointer(colmap, 3);
5439   PetscValidPointer(multScatter, 4);
5440   a = (Mat_MPIAIJ*) A->data;
5441   if (lvec) *lvec = a->lvec;
5442   if (colmap) *colmap = a->colmap;
5443   if (multScatter) *multScatter = a->Mvctx;
5444   PetscFunctionReturn(0);
5445 }
5446 
5447 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5448 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5449 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5450 
5451 #undef __FUNCT__
5452 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5453 /*
5454     Computes (B'*A')' since computing B*A directly is untenable
5455 
5456                n                       p                          p
5457         (              )       (              )         (                  )
5458       m (      A       )  *  n (       B      )   =   m (         C        )
5459         (              )       (              )         (                  )
5460 
5461 */
5462 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5463 {
5464   PetscErrorCode ierr;
5465   Mat            At,Bt,Ct;
5466 
5467   PetscFunctionBegin;
5468   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5469   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5470   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5471   ierr = MatDestroy(&At);CHKERRQ(ierr);
5472   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5473   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5474   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5475   PetscFunctionReturn(0);
5476 }
5477 
5478 #undef __FUNCT__
5479 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5480 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5481 {
5482   PetscErrorCode ierr;
5483   PetscInt       m=A->rmap->n,n=B->cmap->n;
5484   Mat            Cmat;
5485 
5486   PetscFunctionBegin;
5487   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5488   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5489   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5490   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5491   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5492   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5493   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5494   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5495 
5496   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5497 
5498   *C = Cmat;
5499   PetscFunctionReturn(0);
5500 }
5501 
5502 /* ----------------------------------------------------------------*/
5503 #undef __FUNCT__
5504 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5505 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5506 {
5507   PetscErrorCode ierr;
5508 
5509   PetscFunctionBegin;
5510   if (scall == MAT_INITIAL_MATRIX) {
5511     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5512     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5513     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5514   }
5515   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5516   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5517   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5518   PetscFunctionReturn(0);
5519 }
5520 
5521 #if defined(PETSC_HAVE_MUMPS)
5522 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5523 #endif
5524 #if defined(PETSC_HAVE_PASTIX)
5525 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5526 #endif
5527 #if defined(PETSC_HAVE_SUPERLU_DIST)
5528 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5529 #endif
5530 #if defined(PETSC_HAVE_CLIQUE)
5531 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5532 #endif
5533 
5534 /*MC
5535    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5536 
5537    Options Database Keys:
5538 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5539 
5540   Level: beginner
5541 
5542 .seealso: MatCreateAIJ()
5543 M*/
5544 
5545 #undef __FUNCT__
5546 #define __FUNCT__ "MatCreate_MPIAIJ"
5547 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5548 {
5549   Mat_MPIAIJ     *b;
5550   PetscErrorCode ierr;
5551   PetscMPIInt    size;
5552 
5553   PetscFunctionBegin;
5554   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5555 
5556   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5557   B->data       = (void*)b;
5558   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5559   B->assembled  = PETSC_FALSE;
5560   B->insertmode = NOT_SET_VALUES;
5561   b->size       = size;
5562 
5563   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5564 
5565   /* build cache for off array entries formed */
5566   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5567 
5568   b->donotstash  = PETSC_FALSE;
5569   b->colmap      = 0;
5570   b->garray      = 0;
5571   b->roworiented = PETSC_TRUE;
5572 
5573   /* stuff used for matrix vector multiply */
5574   b->lvec  = NULL;
5575   b->Mvctx = NULL;
5576 
5577   /* stuff for MatGetRow() */
5578   b->rowindices   = 0;
5579   b->rowvalues    = 0;
5580   b->getrowactive = PETSC_FALSE;
5581 
5582   /* flexible pointer used in CUSP/CUSPARSE classes */
5583   b->spptr = NULL;
5584 
5585 #if defined(PETSC_HAVE_MUMPS)
5586   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5587 #endif
5588 #if defined(PETSC_HAVE_PASTIX)
5589   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5590 #endif
5591 #if defined(PETSC_HAVE_SUPERLU_DIST)
5592   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5593 #endif
5594 #if defined(PETSC_HAVE_CLIQUE)
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5596 #endif
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5611   PetscFunctionReturn(0);
5612 }
5613 
5614 #undef __FUNCT__
5615 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5616 /*@C
5617      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5618          and "off-diagonal" part of the matrix in CSR format.
5619 
5620    Collective on MPI_Comm
5621 
5622    Input Parameters:
5623 +  comm - MPI communicator
5624 .  m - number of local rows (Cannot be PETSC_DECIDE)
5625 .  n - This value should be the same as the local size used in creating the
5626        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5627        calculated if N is given) For square matrices n is almost always m.
5628 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5629 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5630 .   i - row indices for "diagonal" portion of matrix
5631 .   j - column indices
5632 .   a - matrix values
5633 .   oi - row indices for "off-diagonal" portion of matrix
5634 .   oj - column indices
5635 -   oa - matrix values
5636 
5637    Output Parameter:
5638 .   mat - the matrix
5639 
5640    Level: advanced
5641 
5642    Notes:
5643        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5644        must free the arrays once the matrix has been destroyed and not before.
5645 
5646        The i and j indices are 0 based
5647 
5648        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5649 
5650        This sets local rows and cannot be used to set off-processor values.
5651 
5652        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5653        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5654        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5655        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5656        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5657        communication if it is known that only local entries will be set.
5658 
5659 .keywords: matrix, aij, compressed row, sparse, parallel
5660 
5661 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5662           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5663 C@*/
5664 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5665 {
5666   PetscErrorCode ierr;
5667   Mat_MPIAIJ     *maij;
5668 
5669   PetscFunctionBegin;
5670   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5671   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5672   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5673   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5674   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5675   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5676   maij = (Mat_MPIAIJ*) (*mat)->data;
5677 
5678   (*mat)->preallocated = PETSC_TRUE;
5679 
5680   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5681   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5682 
5683   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5684   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5685 
5686   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5687   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5688   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5689   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5690 
5691   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5692   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5693   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5694   PetscFunctionReturn(0);
5695 }
5696 
5697 /*
5698     Special version for direct calls from Fortran
5699 */
5700 #include <petsc-private/fortranimpl.h>
5701 
5702 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5703 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5704 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5705 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5706 #endif
5707 
5708 /* Change these macros so can be used in void function */
5709 #undef CHKERRQ
5710 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5711 #undef SETERRQ2
5712 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5713 #undef SETERRQ3
5714 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5715 #undef SETERRQ
5716 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5717 
5718 #undef __FUNCT__
5719 #define __FUNCT__ "matsetvaluesmpiaij_"
5720 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5721 {
5722   Mat            mat  = *mmat;
5723   PetscInt       m    = *mm, n = *mn;
5724   InsertMode     addv = *maddv;
5725   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5726   PetscScalar    value;
5727   PetscErrorCode ierr;
5728 
5729   MatCheckPreallocated(mat,1);
5730   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5731 
5732 #if defined(PETSC_USE_DEBUG)
5733   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5734 #endif
5735   {
5736     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5737     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5738     PetscBool roworiented = aij->roworiented;
5739 
5740     /* Some Variables required in the macro */
5741     Mat        A                 = aij->A;
5742     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5743     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5744     MatScalar  *aa               = a->a;
5745     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5746     Mat        B                 = aij->B;
5747     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5748     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5749     MatScalar  *ba               = b->a;
5750 
5751     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5752     PetscInt  nonew = a->nonew;
5753     MatScalar *ap1,*ap2;
5754 
5755     PetscFunctionBegin;
5756     for (i=0; i<m; i++) {
5757       if (im[i] < 0) continue;
5758 #if defined(PETSC_USE_DEBUG)
5759       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5760 #endif
5761       if (im[i] >= rstart && im[i] < rend) {
5762         row      = im[i] - rstart;
5763         lastcol1 = -1;
5764         rp1      = aj + ai[row];
5765         ap1      = aa + ai[row];
5766         rmax1    = aimax[row];
5767         nrow1    = ailen[row];
5768         low1     = 0;
5769         high1    = nrow1;
5770         lastcol2 = -1;
5771         rp2      = bj + bi[row];
5772         ap2      = ba + bi[row];
5773         rmax2    = bimax[row];
5774         nrow2    = bilen[row];
5775         low2     = 0;
5776         high2    = nrow2;
5777 
5778         for (j=0; j<n; j++) {
5779           if (roworiented) value = v[i*n+j];
5780           else value = v[i+j*m];
5781           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5782           if (in[j] >= cstart && in[j] < cend) {
5783             col = in[j] - cstart;
5784             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5785           } else if (in[j] < 0) continue;
5786 #if defined(PETSC_USE_DEBUG)
5787           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5788 #endif
5789           else {
5790             if (mat->was_assembled) {
5791               if (!aij->colmap) {
5792                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5793               }
5794 #if defined(PETSC_USE_CTABLE)
5795               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5796               col--;
5797 #else
5798               col = aij->colmap[in[j]] - 1;
5799 #endif
5800               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5801                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5802                 col  =  in[j];
5803                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5804                 B     = aij->B;
5805                 b     = (Mat_SeqAIJ*)B->data;
5806                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5807                 rp2   = bj + bi[row];
5808                 ap2   = ba + bi[row];
5809                 rmax2 = bimax[row];
5810                 nrow2 = bilen[row];
5811                 low2  = 0;
5812                 high2 = nrow2;
5813                 bm    = aij->B->rmap->n;
5814                 ba    = b->a;
5815               }
5816             } else col = in[j];
5817             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5818           }
5819         }
5820       } else if (!aij->donotstash) {
5821         if (roworiented) {
5822           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5823         } else {
5824           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5825         }
5826       }
5827     }
5828   }
5829   PetscFunctionReturnVoid();
5830 }
5831 
5832