xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision a34163a40bd715823141060f8cdc39ecd9525219)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       A->nonzerostate++;\
406       a_noinsert: ; \
407       ailen[row] = nrow1; \
408 }
409 
410 
411 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
412   { \
413     if (col <= lastcol2) low2 = 0;                        \
414     else high2 = nrow2;                                   \
415     lastcol2 = col;                                       \
416     while (high2-low2 > 5) {                              \
417       t = (low2+high2)/2;                                 \
418       if (rp2[t] > col) high2 = t;                        \
419       else             low2  = t;                         \
420     }                                                     \
421     for (_i=low2; _i<high2; _i++) {                       \
422       if (rp2[_i] > col) break;                           \
423       if (rp2[_i] == col) {                               \
424         if (addv == ADD_VALUES) ap2[_i] += value;         \
425         else                    ap2[_i] = value;          \
426         goto b_noinsert;                                  \
427       }                                                   \
428     }                                                     \
429     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
430     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
431     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
432     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
433     N = nrow2++ - 1; b->nz++; high2++;                    \
434     /* shift up all the later entries in this row */      \
435     for (ii=N; ii>=_i; ii--) {                            \
436       rp2[ii+1] = rp2[ii];                                \
437       ap2[ii+1] = ap2[ii];                                \
438     }                                                     \
439     rp2[_i] = col;                                        \
440     ap2[_i] = value;                                      \
441     B->nonzerostate++;                                    \
442     b_noinsert: ;                                         \
443     bilen[row] = nrow2;                                   \
444   }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
448 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
449 {
450   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
451   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
452   PetscErrorCode ierr;
453   PetscInt       l,*garray = mat->garray,diag;
454 
455   PetscFunctionBegin;
456   /* code only works for square matrices A */
457 
458   /* find size of row to the left of the diagonal part */
459   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
460   row  = row - diag;
461   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
462     if (garray[b->j[b->i[row]+l]] > diag) break;
463   }
464   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
465 
466   /* diagonal part */
467   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
468 
469   /* right of diagonal part */
470   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatSetValues_MPIAIJ"
476 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
477 {
478   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
479   PetscScalar    value;
480   PetscErrorCode ierr;
481   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
482   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
483   PetscBool      roworiented = aij->roworiented;
484 
485   /* Some Variables required in the macro */
486   Mat        A                 = aij->A;
487   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
488   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
489   MatScalar  *aa               = a->a;
490   PetscBool  ignorezeroentries = a->ignorezeroentries;
491   Mat        B                 = aij->B;
492   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
493   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
494   MatScalar  *ba               = b->a;
495 
496   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
497   PetscInt  nonew;
498   MatScalar *ap1,*ap2;
499 
500   PetscFunctionBegin;
501   for (i=0; i<m; i++) {
502     if (im[i] < 0) continue;
503 #if defined(PETSC_USE_DEBUG)
504     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
505 #endif
506     if (im[i] >= rstart && im[i] < rend) {
507       row      = im[i] - rstart;
508       lastcol1 = -1;
509       rp1      = aj + ai[row];
510       ap1      = aa + ai[row];
511       rmax1    = aimax[row];
512       nrow1    = ailen[row];
513       low1     = 0;
514       high1    = nrow1;
515       lastcol2 = -1;
516       rp2      = bj + bi[row];
517       ap2      = ba + bi[row];
518       rmax2    = bimax[row];
519       nrow2    = bilen[row];
520       low2     = 0;
521       high2    = nrow2;
522 
523       for (j=0; j<n; j++) {
524         if (roworiented) value = v[i*n+j];
525         else             value = v[i+j*m];
526         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
527         if (in[j] >= cstart && in[j] < cend) {
528           col   = in[j] - cstart;
529           nonew = a->nonew;
530           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
531         } else if (in[j] < 0) continue;
532 #if defined(PETSC_USE_DEBUG)
533         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
534 #endif
535         else {
536           if (mat->was_assembled) {
537             if (!aij->colmap) {
538               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
539             }
540 #if defined(PETSC_USE_CTABLE)
541             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
542             col--;
543 #else
544             col = aij->colmap[in[j]] - 1;
545 #endif
546             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
547               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
548               col  =  in[j];
549               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
550               B     = aij->B;
551               b     = (Mat_SeqAIJ*)B->data;
552               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
553               rp2   = bj + bi[row];
554               ap2   = ba + bi[row];
555               rmax2 = bimax[row];
556               nrow2 = bilen[row];
557               low2  = 0;
558               high2 = nrow2;
559               bm    = aij->B->rmap->n;
560               ba    = b->a;
561             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
562           } else col = in[j];
563           nonew = b->nonew;
564           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
565         }
566       }
567     } else {
568       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
569       if (!aij->donotstash) {
570         mat->assembled = PETSC_FALSE;
571         if (roworiented) {
572           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
573         } else {
574           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
575         }
576       }
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatGetValues_MPIAIJ"
584 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
585 {
586   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
589   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
590 
591   PetscFunctionBegin;
592   for (i=0; i<m; i++) {
593     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
594     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
595     if (idxm[i] >= rstart && idxm[i] < rend) {
596       row = idxm[i] - rstart;
597       for (j=0; j<n; j++) {
598         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
599         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
600         if (idxn[j] >= cstart && idxn[j] < cend) {
601           col  = idxn[j] - cstart;
602           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603         } else {
604           if (!aij->colmap) {
605             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
606           }
607 #if defined(PETSC_USE_CTABLE)
608           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
609           col--;
610 #else
611           col = aij->colmap[idxn[j]] - 1;
612 #endif
613           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
614           else {
615             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
616           }
617         }
618       }
619     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
628 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
629 {
630   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       nstash,reallocs;
633   InsertMode     addv;
634 
635   PetscFunctionBegin;
636   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
637 
638   /* make sure all processors are either in INSERTMODE or ADDMODE */
639   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
640   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
641   mat->insertmode = addv; /* in case this processor had no cache */
642 
643   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
651 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
655   PetscErrorCode ierr;
656   PetscMPIInt    n;
657   PetscInt       i,j,rstart,ncols,flg;
658   PetscInt       *row,*col;
659   PetscBool      other_disassembled;
660   PetscScalar    *val;
661   InsertMode     addv = mat->insertmode;
662 
663   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
664 
665   PetscFunctionBegin;
666   if (!aij->donotstash && !mat->nooffprocentries) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n; ) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) {
674           if (row[j] != rstart) break;
675         }
676         if (j < n) ncols = j-i;
677         else       ncols = n-i;
678         /* Now assemble all these values with a single function call */
679         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
680 
681         i = j;
682       }
683     }
684     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
685   }
686   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
687   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
688 
689   /* determine if any processor has disassembled, if so we must
690      also disassemble ourselfs, in order that we may reassemble. */
691   /*
692      if nonzero structure of submatrix B cannot change then we know that
693      no processor disassembled thus we can skip this stuff
694   */
695   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
696     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
697     if (mat->was_assembled && !other_disassembled) {
698       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
699     }
700   }
701   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
702     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
703   }
704   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
705   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
706   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
707 
708   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
709 
710   aij->rowvalues = 0;
711 
712   /* used by MatAXPY() */
713   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
714   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
715 
716   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
717   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
718 
719   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
720   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
721     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
722     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
729 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
730 {
731   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
732   PetscErrorCode ierr;
733 
734   PetscFunctionBegin;
735   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
736   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroRows_MPIAIJ"
742 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
743 {
744   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
745   PetscInt      *owners = A->rmap->range;
746   PetscInt       n      = A->rmap->n;
747   PetscSF        sf;
748   PetscInt      *lrows;
749   PetscSFNode   *rrows;
750   PetscInt       r, p = 0, len = 0;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754   /* Create SF where leaves are input rows and roots are owned rows */
755   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
756   for (r = 0; r < n; ++r) lrows[r] = -1;
757   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
758   for (r = 0; r < N; ++r) {
759     const PetscInt idx   = rows[r];
760     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
761     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
762       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
763     }
764     if (A->nooffproczerorows) {
765       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
766       lrows[len++] = idx - owners[p];
767     } else {
768       rrows[r].rank = p;
769       rrows[r].index = rows[r] - owners[p];
770     }
771   }
772   if (!A->nooffproczerorows) {
773     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
774     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
775     /* Collect flags for rows to be zeroed */
776     ierr = PetscSFReduceBegin(sf, MPIU_INT, rows, lrows, MPI_LOR);CHKERRQ(ierr);
777     ierr = PetscSFReduceEnd(sf, MPIU_INT, rows, lrows, MPI_LOR);CHKERRQ(ierr);
778     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
779     /* Compress and put in row numbers */
780     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781   }
782   /* fix right hand side if needed */
783   if (x && b) {
784     const PetscScalar *xx;
785     PetscScalar       *bb;
786 
787     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
788     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
789     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
790     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
791     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
792   }
793   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
794   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
796     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
797   } else if (diag != 0.0) {
798     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
799     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
800     for (r = 0; r < len; ++r) {
801       const PetscInt row = lrows[r] + A->rmap->rstart;
802       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
803     }
804     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
805     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
806   } else {
807     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
808   }
809   ierr = PetscFree(lrows);CHKERRQ(ierr);
810 
811   /* only change matrix nonzero state if pattern was allowed to be changed */
812   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
813     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
814     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
815   }
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
822 {
823   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
824   PetscErrorCode    ierr;
825   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
826   PetscInt          i,j,r,m,p = 0,len = 0;
827   PetscInt          *lrows,*owners = A->rmap->range;
828   PetscSFNode       *rrows;
829   PetscSF           sf;
830   const PetscScalar *xx;
831   PetscScalar       *bb,*mask;
832   Vec               xmask,lmask;
833   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
834   const PetscInt    *aj, *ii,*ridx;
835   PetscScalar       *aa;
836 #if defined(PETSC_DEBUG)
837   PetscBool found = PETSC_FALSE;
838 #endif
839 
840   PetscFunctionBegin;
841   /* Create SF where leaves are input rows and roots are owned rows */
842   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
843   for (r = 0; r < n; ++r) lrows[r] = -1;
844   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
845   for (r = 0; r < N; ++r) {
846     const PetscInt idx   = rows[r];
847     PetscBool      found = PETSC_FALSE;
848     /* Trick for efficient searching for sorted rows */
849     if (lastidx > idx) p = 0;
850     lastidx = idx;
851     for (; p < size; ++p) {
852       if (idx >= owners[p] && idx < owners[p+1]) {
853         rrows[r].rank  = p;
854         rrows[r].index = rows[r] - owners[p];
855         found = PETSC_TRUE;
856         break;
857       }
858     }
859     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
860   }
861   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
862   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
863   /* Collect flags for rows to be zeroed */
864   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
865   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
866   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
867   /* Compress and put in row numbers */
868   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
869   /* zero diagonal part of matrix */
870   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
871   /* handle off diagonal part of matrix */
872   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
873   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
874   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
875   for (i=0; i<len; i++) bb[lrows[i]] = 1;
876   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
877   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
880   if (x) {
881     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
884     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
885   }
886   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
887   /* remove zeroed rows of off diagonal matrix */
888   ii = aij->i;
889   for (i=0; i<len; i++) {
890     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
891   }
892   /* loop over all elements of off process part of matrix zeroing removed columns*/
893   if (aij->compressedrow.use) {
894     m    = aij->compressedrow.nrows;
895     ii   = aij->compressedrow.i;
896     ridx = aij->compressedrow.rindex;
897     for (i=0; i<m; i++) {
898       n  = ii[i+1] - ii[i];
899       aj = aij->j + ii[i];
900       aa = aij->a + ii[i];
901 
902       for (j=0; j<n; j++) {
903         if (PetscAbsScalar(mask[*aj])) {
904           if (b) bb[*ridx] -= *aa*xx[*aj];
905           *aa = 0.0;
906         }
907         aa++;
908         aj++;
909       }
910       ridx++;
911     }
912   } else { /* do not use compressed row format */
913     m = l->B->rmap->n;
914     for (i=0; i<m; i++) {
915       n  = ii[i+1] - ii[i];
916       aj = aij->j + ii[i];
917       aa = aij->a + ii[i];
918       for (j=0; j<n; j++) {
919         if (PetscAbsScalar(mask[*aj])) {
920           if (b) bb[i] -= *aa*xx[*aj];
921           *aa = 0.0;
922         }
923         aa++;
924         aj++;
925       }
926     }
927   }
928   if (x) {
929     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
930     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
931   }
932   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
933   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
934   ierr = PetscFree(lrows);CHKERRQ(ierr);
935 
936   /* only change matrix nonzero state if pattern was allowed to be changed */
937   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
938     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
939     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
940   }
941   PetscFunctionReturn(0);
942 }
943 
944 #undef __FUNCT__
945 #define __FUNCT__ "MatMult_MPIAIJ"
946 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
947 {
948   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
949   PetscErrorCode ierr;
950   PetscInt       nt;
951 
952   PetscFunctionBegin;
953   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
954   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
955   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
956   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
957   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
958   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
964 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
965 {
966   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
967   PetscErrorCode ierr;
968 
969   PetscFunctionBegin;
970   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
971   PetscFunctionReturn(0);
972 }
973 
974 #undef __FUNCT__
975 #define __FUNCT__ "MatMultAdd_MPIAIJ"
976 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
977 {
978   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
979   PetscErrorCode ierr;
980 
981   PetscFunctionBegin;
982   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
983   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
984   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
985   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
991 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
992 {
993   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
994   PetscErrorCode ierr;
995   PetscBool      merged;
996 
997   PetscFunctionBegin;
998   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
999   /* do nondiagonal part */
1000   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1001   if (!merged) {
1002     /* send it on its way */
1003     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1004     /* do local part */
1005     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1006     /* receive remote parts: note this assumes the values are not actually */
1007     /* added in yy until the next line, */
1008     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1009   } else {
1010     /* do local part */
1011     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1012     /* send it on its way */
1013     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1014     /* values actually were received in the Begin() but we need to call this nop */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   }
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1022 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1023 {
1024   MPI_Comm       comm;
1025   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1026   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1027   IS             Me,Notme;
1028   PetscErrorCode ierr;
1029   PetscInt       M,N,first,last,*notme,i;
1030   PetscMPIInt    size;
1031 
1032   PetscFunctionBegin;
1033   /* Easy test: symmetric diagonal block */
1034   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1035   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1036   if (!*f) PetscFunctionReturn(0);
1037   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1038   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1039   if (size == 1) PetscFunctionReturn(0);
1040 
1041   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1042   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1043   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1044   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1045   for (i=0; i<first; i++) notme[i] = i;
1046   for (i=last; i<M; i++) notme[i-last+first] = i;
1047   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1048   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1049   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1050   Aoff = Aoffs[0];
1051   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1052   Boff = Boffs[0];
1053   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1054   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1055   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1056   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1057   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1058   ierr = PetscFree(notme);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1064 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1065 {
1066   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1067   PetscErrorCode ierr;
1068 
1069   PetscFunctionBegin;
1070   /* do nondiagonal part */
1071   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1072   /* send it on its way */
1073   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1074   /* do local part */
1075   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1076   /* receive remote parts */
1077   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 /*
1082   This only works correctly for square matrices where the subblock A->A is the
1083    diagonal block
1084 */
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1087 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1088 {
1089   PetscErrorCode ierr;
1090   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1091 
1092   PetscFunctionBegin;
1093   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1094   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1095   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 #undef __FUNCT__
1100 #define __FUNCT__ "MatScale_MPIAIJ"
1101 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1102 {
1103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1104   PetscErrorCode ierr;
1105 
1106   PetscFunctionBegin;
1107   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1108   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatDestroy_Redundant"
1114 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1115 {
1116   PetscErrorCode ierr;
1117   Mat_Redundant  *redund = *redundant;
1118   PetscInt       i;
1119 
1120   PetscFunctionBegin;
1121   *redundant = NULL;
1122   if (redund){
1123     if (redund->matseq) { /* via MatGetSubMatrices()  */
1124       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1125       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1126       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1127       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1128     } else {
1129       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1130       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1131       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1132       for (i=0; i<redund->nrecvs; i++) {
1133         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1134         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1135       }
1136       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1137     }
1138 
1139     if (redund->psubcomm) {
1140       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1141     }
1142     ierr = PetscFree(redund);CHKERRQ(ierr);
1143   }
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatDestroy_MPIAIJ"
1149 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1150 {
1151   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1152   PetscErrorCode ierr;
1153 
1154   PetscFunctionBegin;
1155 #if defined(PETSC_USE_LOG)
1156   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1157 #endif
1158   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1159   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1160   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1161   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1162   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1163 #if defined(PETSC_USE_CTABLE)
1164   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1165 #else
1166   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1167 #endif
1168   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1169   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1170   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1171   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1172   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1173   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1174 
1175   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1176   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1177   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1178   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1189 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1190 {
1191   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1192   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1193   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1194   PetscErrorCode ierr;
1195   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1196   int            fd;
1197   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1198   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1199   PetscScalar    *column_values;
1200   PetscInt       message_count,flowcontrolcount;
1201   FILE           *file;
1202 
1203   PetscFunctionBegin;
1204   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1205   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1206   nz   = A->nz + B->nz;
1207   if (!rank) {
1208     header[0] = MAT_FILE_CLASSID;
1209     header[1] = mat->rmap->N;
1210     header[2] = mat->cmap->N;
1211 
1212     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1213     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1214     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1215     /* get largest number of rows any processor has */
1216     rlen  = mat->rmap->n;
1217     range = mat->rmap->range;
1218     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1219   } else {
1220     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1221     rlen = mat->rmap->n;
1222   }
1223 
1224   /* load up the local row counts */
1225   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1226   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1227 
1228   /* store the row lengths to the file */
1229   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1230   if (!rank) {
1231     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1232     for (i=1; i<size; i++) {
1233       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1234       rlen = range[i+1] - range[i];
1235       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1236       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1237     }
1238     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1239   } else {
1240     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1241     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1242     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1243   }
1244   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1245 
1246   /* load up the local column indices */
1247   nzmax = nz; /* th processor needs space a largest processor needs */
1248   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1249   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1250   cnt   = 0;
1251   for (i=0; i<mat->rmap->n; i++) {
1252     for (j=B->i[i]; j<B->i[i+1]; j++) {
1253       if ((col = garray[B->j[j]]) > cstart) break;
1254       column_indices[cnt++] = col;
1255     }
1256     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1257     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1258   }
1259   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1260 
1261   /* store the column indices to the file */
1262   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1263   if (!rank) {
1264     MPI_Status status;
1265     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1266     for (i=1; i<size; i++) {
1267       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1268       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1269       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1270       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1271       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1272     }
1273     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1274   } else {
1275     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1276     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1277     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1278     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1279   }
1280   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1281 
1282   /* load up the local column values */
1283   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1284   cnt  = 0;
1285   for (i=0; i<mat->rmap->n; i++) {
1286     for (j=B->i[i]; j<B->i[i+1]; j++) {
1287       if (garray[B->j[j]] > cstart) break;
1288       column_values[cnt++] = B->a[j];
1289     }
1290     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1291     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1292   }
1293   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1294 
1295   /* store the column values to the file */
1296   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1297   if (!rank) {
1298     MPI_Status status;
1299     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1300     for (i=1; i<size; i++) {
1301       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1302       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1303       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1304       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1305       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1306     }
1307     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1308   } else {
1309     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1310     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1311     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1312     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1313   }
1314   ierr = PetscFree(column_values);CHKERRQ(ierr);
1315 
1316   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1317   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #include <petscdraw.h>
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1324 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1325 {
1326   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1327   PetscErrorCode    ierr;
1328   PetscMPIInt       rank = aij->rank,size = aij->size;
1329   PetscBool         isdraw,iascii,isbinary;
1330   PetscViewer       sviewer;
1331   PetscViewerFormat format;
1332 
1333   PetscFunctionBegin;
1334   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1335   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1336   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1337   if (iascii) {
1338     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1339     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1340       MatInfo   info;
1341       PetscBool inodes;
1342 
1343       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1344       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1345       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1346       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1347       if (!inodes) {
1348         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1349                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1350       } else {
1351         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1352                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1353       }
1354       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1355       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1356       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1357       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1358       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1359       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1360       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1361       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1362       PetscFunctionReturn(0);
1363     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1364       PetscInt inodecount,inodelimit,*inodes;
1365       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1366       if (inodes) {
1367         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1368       } else {
1369         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1370       }
1371       PetscFunctionReturn(0);
1372     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1373       PetscFunctionReturn(0);
1374     }
1375   } else if (isbinary) {
1376     if (size == 1) {
1377       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1378       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1379     } else {
1380       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1381     }
1382     PetscFunctionReturn(0);
1383   } else if (isdraw) {
1384     PetscDraw draw;
1385     PetscBool isnull;
1386     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1387     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1388   }
1389 
1390   {
1391     /* assemble the entire matrix onto first processor. */
1392     Mat        A;
1393     Mat_SeqAIJ *Aloc;
1394     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1395     MatScalar  *a;
1396 
1397     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1398     if (!rank) {
1399       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1400     } else {
1401       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1402     }
1403     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1404     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1405     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1406     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1407     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1408 
1409     /* copy over the A part */
1410     Aloc = (Mat_SeqAIJ*)aij->A->data;
1411     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1412     row  = mat->rmap->rstart;
1413     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1414     for (i=0; i<m; i++) {
1415       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1416       row++;
1417       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1418     }
1419     aj = Aloc->j;
1420     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1421 
1422     /* copy over the B part */
1423     Aloc = (Mat_SeqAIJ*)aij->B->data;
1424     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1425     row  = mat->rmap->rstart;
1426     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1427     ct   = cols;
1428     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1429     for (i=0; i<m; i++) {
1430       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1431       row++;
1432       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1433     }
1434     ierr = PetscFree(ct);CHKERRQ(ierr);
1435     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1436     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1437     /*
1438        Everyone has to call to draw the matrix since the graphics waits are
1439        synchronized across all processors that share the PetscDraw object
1440     */
1441     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1442     if (!rank) {
1443       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1444     }
1445     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1446     ierr = MatDestroy(&A);CHKERRQ(ierr);
1447   }
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatView_MPIAIJ"
1453 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1454 {
1455   PetscErrorCode ierr;
1456   PetscBool      iascii,isdraw,issocket,isbinary;
1457 
1458   PetscFunctionBegin;
1459   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1460   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1461   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1462   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1463   if (iascii || isdraw || isbinary || issocket) {
1464     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatSOR_MPIAIJ"
1471 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1472 {
1473   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1474   PetscErrorCode ierr;
1475   Vec            bb1 = 0;
1476   PetscBool      hasop;
1477 
1478   PetscFunctionBegin;
1479   if (flag == SOR_APPLY_UPPER) {
1480     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1481     PetscFunctionReturn(0);
1482   }
1483 
1484   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1485     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1486   }
1487 
1488   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1489     if (flag & SOR_ZERO_INITIAL_GUESS) {
1490       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1491       its--;
1492     }
1493 
1494     while (its--) {
1495       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1496       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1497 
1498       /* update rhs: bb1 = bb - B*x */
1499       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1500       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1501 
1502       /* local sweep */
1503       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1504     }
1505   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1506     if (flag & SOR_ZERO_INITIAL_GUESS) {
1507       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1508       its--;
1509     }
1510     while (its--) {
1511       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1512       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1513 
1514       /* update rhs: bb1 = bb - B*x */
1515       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1516       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1517 
1518       /* local sweep */
1519       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1520     }
1521   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1522     if (flag & SOR_ZERO_INITIAL_GUESS) {
1523       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1524       its--;
1525     }
1526     while (its--) {
1527       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1528       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1529 
1530       /* update rhs: bb1 = bb - B*x */
1531       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1532       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1533 
1534       /* local sweep */
1535       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1536     }
1537   } else if (flag & SOR_EISENSTAT) {
1538     Vec xx1;
1539 
1540     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1541     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1542 
1543     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1544     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1545     if (!mat->diag) {
1546       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1547       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1548     }
1549     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1550     if (hasop) {
1551       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1552     } else {
1553       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1554     }
1555     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1556 
1557     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1558 
1559     /* local sweep */
1560     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1561     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1562     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1563   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1564 
1565   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1566   PetscFunctionReturn(0);
1567 }
1568 
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatPermute_MPIAIJ"
1571 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1572 {
1573   Mat            aA,aB,Aperm;
1574   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1575   PetscScalar    *aa,*ba;
1576   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1577   PetscSF        rowsf,sf;
1578   IS             parcolp = NULL;
1579   PetscBool      done;
1580   PetscErrorCode ierr;
1581 
1582   PetscFunctionBegin;
1583   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1584   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1585   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1586   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1587 
1588   /* Invert row permutation to find out where my rows should go */
1589   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1590   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1591   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1592   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1593   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1594   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1595 
1596   /* Invert column permutation to find out where my columns should go */
1597   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1598   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1599   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1600   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1601   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1602   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1603   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1604 
1605   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1606   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1607   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1608 
1609   /* Find out where my gcols should go */
1610   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1611   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1612   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1613   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1614   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1615   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1616   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1617   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1618 
1619   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1620   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1621   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1622   for (i=0; i<m; i++) {
1623     PetscInt row = rdest[i],rowner;
1624     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1625     for (j=ai[i]; j<ai[i+1]; j++) {
1626       PetscInt cowner,col = cdest[aj[j]];
1627       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1628       if (rowner == cowner) dnnz[i]++;
1629       else onnz[i]++;
1630     }
1631     for (j=bi[i]; j<bi[i+1]; j++) {
1632       PetscInt cowner,col = gcdest[bj[j]];
1633       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1634       if (rowner == cowner) dnnz[i]++;
1635       else onnz[i]++;
1636     }
1637   }
1638   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1639   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1640   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1641   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1642   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1643 
1644   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1645   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1646   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1647   for (i=0; i<m; i++) {
1648     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1649     PetscInt j0,rowlen;
1650     rowlen = ai[i+1] - ai[i];
1651     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1652       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1653       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1654     }
1655     rowlen = bi[i+1] - bi[i];
1656     for (j0=j=0; j<rowlen; j0=j) {
1657       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1658       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1659     }
1660   }
1661   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1662   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1663   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1664   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1665   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1666   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1667   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1668   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1669   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1670   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1671   *B = Aperm;
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1677 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1678 {
1679   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1680   Mat            A    = mat->A,B = mat->B;
1681   PetscErrorCode ierr;
1682   PetscReal      isend[5],irecv[5];
1683 
1684   PetscFunctionBegin;
1685   info->block_size = 1.0;
1686   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1687 
1688   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1689   isend[3] = info->memory;  isend[4] = info->mallocs;
1690 
1691   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1692 
1693   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1694   isend[3] += info->memory;  isend[4] += info->mallocs;
1695   if (flag == MAT_LOCAL) {
1696     info->nz_used      = isend[0];
1697     info->nz_allocated = isend[1];
1698     info->nz_unneeded  = isend[2];
1699     info->memory       = isend[3];
1700     info->mallocs      = isend[4];
1701   } else if (flag == MAT_GLOBAL_MAX) {
1702     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1703 
1704     info->nz_used      = irecv[0];
1705     info->nz_allocated = irecv[1];
1706     info->nz_unneeded  = irecv[2];
1707     info->memory       = irecv[3];
1708     info->mallocs      = irecv[4];
1709   } else if (flag == MAT_GLOBAL_SUM) {
1710     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1711 
1712     info->nz_used      = irecv[0];
1713     info->nz_allocated = irecv[1];
1714     info->nz_unneeded  = irecv[2];
1715     info->memory       = irecv[3];
1716     info->mallocs      = irecv[4];
1717   }
1718   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1719   info->fill_ratio_needed = 0;
1720   info->factor_mallocs    = 0;
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 #undef __FUNCT__
1725 #define __FUNCT__ "MatSetOption_MPIAIJ"
1726 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1727 {
1728   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1729   PetscErrorCode ierr;
1730 
1731   PetscFunctionBegin;
1732   switch (op) {
1733   case MAT_NEW_NONZERO_LOCATIONS:
1734   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1735   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1736   case MAT_KEEP_NONZERO_PATTERN:
1737   case MAT_NEW_NONZERO_LOCATION_ERR:
1738   case MAT_USE_INODES:
1739   case MAT_IGNORE_ZERO_ENTRIES:
1740     MatCheckPreallocated(A,1);
1741     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1742     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1743     break;
1744   case MAT_ROW_ORIENTED:
1745     a->roworiented = flg;
1746 
1747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1748     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1749     break;
1750   case MAT_NEW_DIAGONALS:
1751     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1752     break;
1753   case MAT_IGNORE_OFF_PROC_ENTRIES:
1754     a->donotstash = flg;
1755     break;
1756   case MAT_SPD:
1757     A->spd_set = PETSC_TRUE;
1758     A->spd     = flg;
1759     if (flg) {
1760       A->symmetric                  = PETSC_TRUE;
1761       A->structurally_symmetric     = PETSC_TRUE;
1762       A->symmetric_set              = PETSC_TRUE;
1763       A->structurally_symmetric_set = PETSC_TRUE;
1764     }
1765     break;
1766   case MAT_SYMMETRIC:
1767     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1768     break;
1769   case MAT_STRUCTURALLY_SYMMETRIC:
1770     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1771     break;
1772   case MAT_HERMITIAN:
1773     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1774     break;
1775   case MAT_SYMMETRY_ETERNAL:
1776     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1777     break;
1778   default:
1779     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1780   }
1781   PetscFunctionReturn(0);
1782 }
1783 
1784 #undef __FUNCT__
1785 #define __FUNCT__ "MatGetRow_MPIAIJ"
1786 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1787 {
1788   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1789   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1790   PetscErrorCode ierr;
1791   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1792   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1793   PetscInt       *cmap,*idx_p;
1794 
1795   PetscFunctionBegin;
1796   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1797   mat->getrowactive = PETSC_TRUE;
1798 
1799   if (!mat->rowvalues && (idx || v)) {
1800     /*
1801         allocate enough space to hold information from the longest row.
1802     */
1803     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1804     PetscInt   max = 1,tmp;
1805     for (i=0; i<matin->rmap->n; i++) {
1806       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1807       if (max < tmp) max = tmp;
1808     }
1809     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1810   }
1811 
1812   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1813   lrow = row - rstart;
1814 
1815   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1816   if (!v)   {pvA = 0; pvB = 0;}
1817   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1818   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1819   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1820   nztot = nzA + nzB;
1821 
1822   cmap = mat->garray;
1823   if (v  || idx) {
1824     if (nztot) {
1825       /* Sort by increasing column numbers, assuming A and B already sorted */
1826       PetscInt imark = -1;
1827       if (v) {
1828         *v = v_p = mat->rowvalues;
1829         for (i=0; i<nzB; i++) {
1830           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1831           else break;
1832         }
1833         imark = i;
1834         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1835         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1836       }
1837       if (idx) {
1838         *idx = idx_p = mat->rowindices;
1839         if (imark > -1) {
1840           for (i=0; i<imark; i++) {
1841             idx_p[i] = cmap[cworkB[i]];
1842           }
1843         } else {
1844           for (i=0; i<nzB; i++) {
1845             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1846             else break;
1847           }
1848           imark = i;
1849         }
1850         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1851         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1852       }
1853     } else {
1854       if (idx) *idx = 0;
1855       if (v)   *v   = 0;
1856     }
1857   }
1858   *nz  = nztot;
1859   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1860   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1866 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1867 {
1868   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1869 
1870   PetscFunctionBegin;
1871   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1872   aij->getrowactive = PETSC_FALSE;
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatNorm_MPIAIJ"
1878 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1879 {
1880   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1881   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1882   PetscErrorCode ierr;
1883   PetscInt       i,j,cstart = mat->cmap->rstart;
1884   PetscReal      sum = 0.0;
1885   MatScalar      *v;
1886 
1887   PetscFunctionBegin;
1888   if (aij->size == 1) {
1889     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1890   } else {
1891     if (type == NORM_FROBENIUS) {
1892       v = amat->a;
1893       for (i=0; i<amat->nz; i++) {
1894         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1895       }
1896       v = bmat->a;
1897       for (i=0; i<bmat->nz; i++) {
1898         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1899       }
1900       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1901       *norm = PetscSqrtReal(*norm);
1902     } else if (type == NORM_1) { /* max column norm */
1903       PetscReal *tmp,*tmp2;
1904       PetscInt  *jj,*garray = aij->garray;
1905       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1906       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1907       *norm = 0.0;
1908       v     = amat->a; jj = amat->j;
1909       for (j=0; j<amat->nz; j++) {
1910         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1911       }
1912       v = bmat->a; jj = bmat->j;
1913       for (j=0; j<bmat->nz; j++) {
1914         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1915       }
1916       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1917       for (j=0; j<mat->cmap->N; j++) {
1918         if (tmp2[j] > *norm) *norm = tmp2[j];
1919       }
1920       ierr = PetscFree(tmp);CHKERRQ(ierr);
1921       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1922     } else if (type == NORM_INFINITY) { /* max row norm */
1923       PetscReal ntemp = 0.0;
1924       for (j=0; j<aij->A->rmap->n; j++) {
1925         v   = amat->a + amat->i[j];
1926         sum = 0.0;
1927         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1928           sum += PetscAbsScalar(*v); v++;
1929         }
1930         v = bmat->a + bmat->i[j];
1931         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1932           sum += PetscAbsScalar(*v); v++;
1933         }
1934         if (sum > ntemp) ntemp = sum;
1935       }
1936       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1937     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatTranspose_MPIAIJ"
1944 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1945 {
1946   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1947   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1948   PetscErrorCode ierr;
1949   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1950   PetscInt       cstart = A->cmap->rstart,ncol;
1951   Mat            B;
1952   MatScalar      *array;
1953 
1954   PetscFunctionBegin;
1955   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1956 
1957   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1958   ai = Aloc->i; aj = Aloc->j;
1959   bi = Bloc->i; bj = Bloc->j;
1960   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1961     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1962     PetscSFNode          *oloc;
1963     PETSC_UNUSED PetscSF sf;
1964 
1965     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1966     /* compute d_nnz for preallocation */
1967     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1968     for (i=0; i<ai[ma]; i++) {
1969       d_nnz[aj[i]]++;
1970       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1971     }
1972     /* compute local off-diagonal contributions */
1973     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1974     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1975     /* map those to global */
1976     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1977     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1978     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1979     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1980     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1981     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1982     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1983 
1984     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1985     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1986     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1987     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1988     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1989     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1990   } else {
1991     B    = *matout;
1992     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1993     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1994   }
1995 
1996   /* copy over the A part */
1997   array = Aloc->a;
1998   row   = A->rmap->rstart;
1999   for (i=0; i<ma; i++) {
2000     ncol = ai[i+1]-ai[i];
2001     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2002     row++;
2003     array += ncol; aj += ncol;
2004   }
2005   aj = Aloc->j;
2006   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2007 
2008   /* copy over the B part */
2009   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2010   array = Bloc->a;
2011   row   = A->rmap->rstart;
2012   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2013   cols_tmp = cols;
2014   for (i=0; i<mb; i++) {
2015     ncol = bi[i+1]-bi[i];
2016     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2017     row++;
2018     array += ncol; cols_tmp += ncol;
2019   }
2020   ierr = PetscFree(cols);CHKERRQ(ierr);
2021 
2022   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2023   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2024   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2025     *matout = B;
2026   } else {
2027     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2028   }
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 #undef __FUNCT__
2033 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2034 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2035 {
2036   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2037   Mat            a    = aij->A,b = aij->B;
2038   PetscErrorCode ierr;
2039   PetscInt       s1,s2,s3;
2040 
2041   PetscFunctionBegin;
2042   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2043   if (rr) {
2044     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2045     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2046     /* Overlap communication with computation. */
2047     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2048   }
2049   if (ll) {
2050     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2051     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2052     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2053   }
2054   /* scale  the diagonal block */
2055   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2056 
2057   if (rr) {
2058     /* Do a scatter end and then right scale the off-diagonal block */
2059     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2060     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2061   }
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2067 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2068 {
2069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2070   PetscErrorCode ierr;
2071 
2072   PetscFunctionBegin;
2073   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatEqual_MPIAIJ"
2079 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2080 {
2081   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2082   Mat            a,b,c,d;
2083   PetscBool      flg;
2084   PetscErrorCode ierr;
2085 
2086   PetscFunctionBegin;
2087   a = matA->A; b = matA->B;
2088   c = matB->A; d = matB->B;
2089 
2090   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2091   if (flg) {
2092     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2093   }
2094   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2095   PetscFunctionReturn(0);
2096 }
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatCopy_MPIAIJ"
2100 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2101 {
2102   PetscErrorCode ierr;
2103   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2104   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2105 
2106   PetscFunctionBegin;
2107   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2108   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2109     /* because of the column compression in the off-processor part of the matrix a->B,
2110        the number of columns in a->B and b->B may be different, hence we cannot call
2111        the MatCopy() directly on the two parts. If need be, we can provide a more
2112        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2113        then copying the submatrices */
2114     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2115   } else {
2116     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2117     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatSetUp_MPIAIJ"
2124 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2125 {
2126   PetscErrorCode ierr;
2127 
2128   PetscFunctionBegin;
2129   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 
2133 /*
2134    Computes the number of nonzeros per row needed for preallocation when X and Y
2135    have different nonzero structure.
2136 */
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2139 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2140 {
2141   PetscInt       i,j,k,nzx,nzy;
2142 
2143   PetscFunctionBegin;
2144   /* Set the number of nonzeros in the new matrix */
2145   for (i=0; i<m; i++) {
2146     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2147     nzx = xi[i+1] - xi[i];
2148     nzy = yi[i+1] - yi[i];
2149     nnz[i] = 0;
2150     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2151       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2152       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2153       nnz[i]++;
2154     }
2155     for (; k<nzy; k++) nnz[i]++;
2156   }
2157   PetscFunctionReturn(0);
2158 }
2159 
2160 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2163 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2164 {
2165   PetscErrorCode ierr;
2166   PetscInt       m = Y->rmap->N;
2167   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2168   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2169 
2170   PetscFunctionBegin;
2171   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatAXPY_MPIAIJ"
2177 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2178 {
2179   PetscErrorCode ierr;
2180   PetscInt       i;
2181   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2182   PetscBLASInt   bnz,one=1;
2183   Mat_SeqAIJ     *x,*y;
2184 
2185   PetscFunctionBegin;
2186   if (str == SAME_NONZERO_PATTERN) {
2187     PetscScalar alpha = a;
2188     x    = (Mat_SeqAIJ*)xx->A->data;
2189     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2190     y    = (Mat_SeqAIJ*)yy->A->data;
2191     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2192     x    = (Mat_SeqAIJ*)xx->B->data;
2193     y    = (Mat_SeqAIJ*)yy->B->data;
2194     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2195     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2196     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2197   } else if (str == SUBSET_NONZERO_PATTERN) {
2198     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2199 
2200     x = (Mat_SeqAIJ*)xx->B->data;
2201     y = (Mat_SeqAIJ*)yy->B->data;
2202     if (y->xtoy && y->XtoY != xx->B) {
2203       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2204       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2205     }
2206     if (!y->xtoy) { /* get xtoy */
2207       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2208       y->XtoY = xx->B;
2209       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2210     }
2211     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2212     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2213   } else {
2214     Mat      B;
2215     PetscInt *nnz_d,*nnz_o;
2216     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2217     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2218     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2219     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2220     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2221     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2222     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2223     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2224     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2225     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2226     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2227     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2228     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2229     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2230   }
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatConjugate_MPIAIJ"
2238 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2239 {
2240 #if defined(PETSC_USE_COMPLEX)
2241   PetscErrorCode ierr;
2242   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2243 
2244   PetscFunctionBegin;
2245   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2246   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2247 #else
2248   PetscFunctionBegin;
2249 #endif
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatRealPart_MPIAIJ"
2255 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2256 {
2257   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2258   PetscErrorCode ierr;
2259 
2260   PetscFunctionBegin;
2261   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2262   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 #undef __FUNCT__
2267 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2268 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2269 {
2270   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2271   PetscErrorCode ierr;
2272 
2273   PetscFunctionBegin;
2274   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2275   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2276   PetscFunctionReturn(0);
2277 }
2278 
2279 #if defined(PETSC_HAVE_PBGL)
2280 
2281 #include <boost/parallel/mpi/bsp_process_group.hpp>
2282 #include <boost/graph/distributed/ilu_default_graph.hpp>
2283 #include <boost/graph/distributed/ilu_0_block.hpp>
2284 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2285 #include <boost/graph/distributed/petsc/interface.hpp>
2286 #include <boost/multi_array.hpp>
2287 #include <boost/parallel/distributed_property_map->hpp>
2288 
2289 #undef __FUNCT__
2290 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2291 /*
2292   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2293 */
2294 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2295 {
2296   namespace petsc = boost::distributed::petsc;
2297 
2298   namespace graph_dist = boost::graph::distributed;
2299   using boost::graph::distributed::ilu_default::process_group_type;
2300   using boost::graph::ilu_permuted;
2301 
2302   PetscBool      row_identity, col_identity;
2303   PetscContainer c;
2304   PetscInt       m, n, M, N;
2305   PetscErrorCode ierr;
2306 
2307   PetscFunctionBegin;
2308   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2309   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2310   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2311   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2312 
2313   process_group_type pg;
2314   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2315   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2316   lgraph_type& level_graph = *lgraph_p;
2317   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2318 
2319   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2320   ilu_permuted(level_graph);
2321 
2322   /* put together the new matrix */
2323   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2324   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2325   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2326   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2327   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2328   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2329   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2330   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2331 
2332   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2333   ierr = PetscContainerSetPointer(c, lgraph_p);
2334   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2335   ierr = PetscContainerDestroy(&c);
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2341 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2342 {
2343   PetscFunctionBegin;
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatSolve_MPIAIJ"
2349 /*
2350   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2351 */
2352 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2353 {
2354   namespace graph_dist = boost::graph::distributed;
2355 
2356   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2357   lgraph_type    *lgraph_p;
2358   PetscContainer c;
2359   PetscErrorCode ierr;
2360 
2361   PetscFunctionBegin;
2362   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2363   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2364   ierr = VecCopy(b, x);CHKERRQ(ierr);
2365 
2366   PetscScalar *array_x;
2367   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2368   PetscInt sx;
2369   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2370 
2371   PetscScalar *array_b;
2372   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2373   PetscInt sb;
2374   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2375 
2376   lgraph_type& level_graph = *lgraph_p;
2377   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2378 
2379   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2380   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2381   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2382 
2383   typedef boost::iterator_property_map<array_ref_type::iterator,
2384                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2385   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2386   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2387 
2388   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2389   PetscFunctionReturn(0);
2390 }
2391 #endif
2392 
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2396 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2397 {
2398   PetscMPIInt    rank,size;
2399   MPI_Comm       comm;
2400   PetscErrorCode ierr;
2401   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2402   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2403   PetscInt       *rowrange = mat->rmap->range;
2404   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2405   Mat            A = aij->A,B=aij->B,C=*matredundant;
2406   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2407   PetscScalar    *sbuf_a;
2408   PetscInt       nzlocal=a->nz+b->nz;
2409   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2410   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2411   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2412   MatScalar      *aworkA,*aworkB;
2413   PetscScalar    *vals;
2414   PetscMPIInt    tag1,tag2,tag3,imdex;
2415   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2416   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2417   MPI_Status     recv_status,*send_status;
2418   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2419   PetscInt       **rbuf_j=NULL;
2420   PetscScalar    **rbuf_a=NULL;
2421   Mat_Redundant  *redund =NULL;
2422 
2423   PetscFunctionBegin;
2424   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2425   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2426   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2427   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2428   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2429 
2430   if (reuse == MAT_REUSE_MATRIX) {
2431     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2432     if (subsize == 1) {
2433       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2434       redund = c->redundant;
2435     } else {
2436       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2437       redund = c->redundant;
2438     }
2439     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2440 
2441     nsends    = redund->nsends;
2442     nrecvs    = redund->nrecvs;
2443     send_rank = redund->send_rank;
2444     recv_rank = redund->recv_rank;
2445     sbuf_nz   = redund->sbuf_nz;
2446     rbuf_nz   = redund->rbuf_nz;
2447     sbuf_j    = redund->sbuf_j;
2448     sbuf_a    = redund->sbuf_a;
2449     rbuf_j    = redund->rbuf_j;
2450     rbuf_a    = redund->rbuf_a;
2451   }
2452 
2453   if (reuse == MAT_INITIAL_MATRIX) {
2454     PetscInt    nleftover,np_subcomm;
2455 
2456     /* get the destination processors' id send_rank, nsends and nrecvs */
2457     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2458 
2459     np_subcomm = size/nsubcomm;
2460     nleftover  = size - nsubcomm*np_subcomm;
2461 
2462     /* block of codes below is specific for INTERLACED */
2463     /* ------------------------------------------------*/
2464     nsends = 0; nrecvs = 0;
2465     for (i=0; i<size; i++) {
2466       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2467         send_rank[nsends++] = i;
2468         recv_rank[nrecvs++] = i;
2469       }
2470     }
2471     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2472       i = size-nleftover-1;
2473       j = 0;
2474       while (j < nsubcomm - nleftover) {
2475         send_rank[nsends++] = i;
2476         i--; j++;
2477       }
2478     }
2479 
2480     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2481       for (i=0; i<nleftover; i++) {
2482         recv_rank[nrecvs++] = size-nleftover+i;
2483       }
2484     }
2485     /*----------------------------------------------*/
2486 
2487     /* allocate sbuf_j, sbuf_a */
2488     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2489     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2490     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2491     /*
2492     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2493     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2494      */
2495   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2496 
2497   /* copy mat's local entries into the buffers */
2498   if (reuse == MAT_INITIAL_MATRIX) {
2499     rownz_max = 0;
2500     rptr      = sbuf_j;
2501     cols      = sbuf_j + rend-rstart + 1;
2502     vals      = sbuf_a;
2503     rptr[0]   = 0;
2504     for (i=0; i<rend-rstart; i++) {
2505       row    = i + rstart;
2506       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2507       ncols  = nzA + nzB;
2508       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2509       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2510       /* load the column indices for this row into cols */
2511       lwrite = 0;
2512       for (l=0; l<nzB; l++) {
2513         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2514           vals[lwrite]   = aworkB[l];
2515           cols[lwrite++] = ctmp;
2516         }
2517       }
2518       for (l=0; l<nzA; l++) {
2519         vals[lwrite]   = aworkA[l];
2520         cols[lwrite++] = cstart + cworkA[l];
2521       }
2522       for (l=0; l<nzB; l++) {
2523         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2524           vals[lwrite]   = aworkB[l];
2525           cols[lwrite++] = ctmp;
2526         }
2527       }
2528       vals     += ncols;
2529       cols     += ncols;
2530       rptr[i+1] = rptr[i] + ncols;
2531       if (rownz_max < ncols) rownz_max = ncols;
2532     }
2533     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2534   } else { /* only copy matrix values into sbuf_a */
2535     rptr    = sbuf_j;
2536     vals    = sbuf_a;
2537     rptr[0] = 0;
2538     for (i=0; i<rend-rstart; i++) {
2539       row    = i + rstart;
2540       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2541       ncols  = nzA + nzB;
2542       cworkB = b->j + b->i[i];
2543       aworkA = a->a + a->i[i];
2544       aworkB = b->a + b->i[i];
2545       lwrite = 0;
2546       for (l=0; l<nzB; l++) {
2547         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2548       }
2549       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2550       for (l=0; l<nzB; l++) {
2551         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2552       }
2553       vals     += ncols;
2554       rptr[i+1] = rptr[i] + ncols;
2555     }
2556   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2557 
2558   /* send nzlocal to others, and recv other's nzlocal */
2559   /*--------------------------------------------------*/
2560   if (reuse == MAT_INITIAL_MATRIX) {
2561     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2562 
2563     s_waits2 = s_waits3 + nsends;
2564     s_waits1 = s_waits2 + nsends;
2565     r_waits1 = s_waits1 + nsends;
2566     r_waits2 = r_waits1 + nrecvs;
2567     r_waits3 = r_waits2 + nrecvs;
2568   } else {
2569     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2570 
2571     r_waits3 = s_waits3 + nsends;
2572   }
2573 
2574   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2575   if (reuse == MAT_INITIAL_MATRIX) {
2576     /* get new tags to keep the communication clean */
2577     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2578     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2579     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2580 
2581     /* post receives of other's nzlocal */
2582     for (i=0; i<nrecvs; i++) {
2583       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2584     }
2585     /* send nzlocal to others */
2586     for (i=0; i<nsends; i++) {
2587       sbuf_nz[i] = nzlocal;
2588       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2589     }
2590     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2591     count = nrecvs;
2592     while (count) {
2593       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2594 
2595       recv_rank[imdex] = recv_status.MPI_SOURCE;
2596       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2597       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2598 
2599       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2600 
2601       rbuf_nz[imdex] += i + 2;
2602 
2603       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2604       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2605       count--;
2606     }
2607     /* wait on sends of nzlocal */
2608     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2609     /* send mat->i,j to others, and recv from other's */
2610     /*------------------------------------------------*/
2611     for (i=0; i<nsends; i++) {
2612       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2613       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2614     }
2615     /* wait on receives of mat->i,j */
2616     /*------------------------------*/
2617     count = nrecvs;
2618     while (count) {
2619       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2620       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2621       count--;
2622     }
2623     /* wait on sends of mat->i,j */
2624     /*---------------------------*/
2625     if (nsends) {
2626       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2627     }
2628   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2629 
2630   /* post receives, send and receive mat->a */
2631   /*----------------------------------------*/
2632   for (imdex=0; imdex<nrecvs; imdex++) {
2633     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2634   }
2635   for (i=0; i<nsends; i++) {
2636     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2637   }
2638   count = nrecvs;
2639   while (count) {
2640     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2641     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2642     count--;
2643   }
2644   if (nsends) {
2645     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2646   }
2647 
2648   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2649 
2650   /* create redundant matrix */
2651   /*-------------------------*/
2652   if (reuse == MAT_INITIAL_MATRIX) {
2653     const PetscInt *range;
2654     PetscInt       rstart_sub,rend_sub,mloc_sub;
2655 
2656     /* compute rownz_max for preallocation */
2657     for (imdex=0; imdex<nrecvs; imdex++) {
2658       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2659       rptr = rbuf_j[imdex];
2660       for (i=0; i<j; i++) {
2661         ncols = rptr[i+1] - rptr[i];
2662         if (rownz_max < ncols) rownz_max = ncols;
2663       }
2664     }
2665 
2666     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2667 
2668     /* get local size of redundant matrix
2669        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2670     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2671     rstart_sub = range[nsubcomm*subrank];
2672     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2673       rend_sub = range[nsubcomm*(subrank+1)];
2674     } else {
2675       rend_sub = mat->rmap->N;
2676     }
2677     mloc_sub = rend_sub - rstart_sub;
2678 
2679     if (M == N) {
2680       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2681     } else { /* non-square matrix */
2682       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2683     }
2684     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2685     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2686     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2687     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2688   } else {
2689     C = *matredundant;
2690   }
2691 
2692   /* insert local matrix entries */
2693   rptr = sbuf_j;
2694   cols = sbuf_j + rend-rstart + 1;
2695   vals = sbuf_a;
2696   for (i=0; i<rend-rstart; i++) {
2697     row   = i + rstart;
2698     ncols = rptr[i+1] - rptr[i];
2699     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2700     vals += ncols;
2701     cols += ncols;
2702   }
2703   /* insert received matrix entries */
2704   for (imdex=0; imdex<nrecvs; imdex++) {
2705     rstart = rowrange[recv_rank[imdex]];
2706     rend   = rowrange[recv_rank[imdex]+1];
2707     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2708     rptr   = rbuf_j[imdex];
2709     cols   = rbuf_j[imdex] + rend-rstart + 1;
2710     vals   = rbuf_a[imdex];
2711     for (i=0; i<rend-rstart; i++) {
2712       row   = i + rstart;
2713       ncols = rptr[i+1] - rptr[i];
2714       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2715       vals += ncols;
2716       cols += ncols;
2717     }
2718   }
2719   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2720   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2721 
2722   if (reuse == MAT_INITIAL_MATRIX) {
2723     *matredundant = C;
2724 
2725     /* create a supporting struct and attach it to C for reuse */
2726     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2727     if (subsize == 1) {
2728       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2729       c->redundant = redund;
2730     } else {
2731       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2732       c->redundant = redund;
2733     }
2734 
2735     redund->nzlocal   = nzlocal;
2736     redund->nsends    = nsends;
2737     redund->nrecvs    = nrecvs;
2738     redund->send_rank = send_rank;
2739     redund->recv_rank = recv_rank;
2740     redund->sbuf_nz   = sbuf_nz;
2741     redund->rbuf_nz   = rbuf_nz;
2742     redund->sbuf_j    = sbuf_j;
2743     redund->sbuf_a    = sbuf_a;
2744     redund->rbuf_j    = rbuf_j;
2745     redund->rbuf_a    = rbuf_a;
2746     redund->psubcomm  = NULL;
2747   }
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2753 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2754 {
2755   PetscErrorCode ierr;
2756   MPI_Comm       comm;
2757   PetscMPIInt    size,subsize;
2758   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2759   Mat_Redundant  *redund=NULL;
2760   PetscSubcomm   psubcomm=NULL;
2761   MPI_Comm       subcomm_in=subcomm;
2762   Mat            *matseq;
2763   IS             isrow,iscol;
2764 
2765   PetscFunctionBegin;
2766   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2767     if (reuse ==  MAT_INITIAL_MATRIX) {
2768       /* create psubcomm, then get subcomm */
2769       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2770       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2771       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2772 
2773       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2774       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2775       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2776       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2777       subcomm = psubcomm->comm;
2778     } else { /* retrieve psubcomm and subcomm */
2779       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2780       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2781       if (subsize == 1) {
2782         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2783         redund = c->redundant;
2784       } else {
2785         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2786         redund = c->redundant;
2787       }
2788       psubcomm = redund->psubcomm;
2789     }
2790     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2791       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2792       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2793         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2794         if (subsize == 1) {
2795           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2796           c->redundant->psubcomm = psubcomm;
2797         } else {
2798           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2799           c->redundant->psubcomm = psubcomm ;
2800         }
2801       }
2802       PetscFunctionReturn(0);
2803     }
2804   }
2805 
2806   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2807   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2808   if (reuse == MAT_INITIAL_MATRIX) {
2809     /* create a local sequential matrix matseq[0] */
2810     mloc_sub = PETSC_DECIDE;
2811     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2812     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2813     rstart = rend - mloc_sub;
2814     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2815     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2816   } else { /* reuse == MAT_REUSE_MATRIX */
2817     if (subsize == 1) {
2818       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2819       redund = c->redundant;
2820     } else {
2821       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2822       redund = c->redundant;
2823     }
2824 
2825     isrow  = redund->isrow;
2826     iscol  = redund->iscol;
2827     matseq = redund->matseq;
2828   }
2829   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2830   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2831 
2832   if (reuse == MAT_INITIAL_MATRIX) {
2833     /* create a supporting struct and attach it to C for reuse */
2834     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2835     if (subsize == 1) {
2836       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2837       c->redundant = redund;
2838     } else {
2839       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2840       c->redundant = redund;
2841     }
2842     redund->isrow    = isrow;
2843     redund->iscol    = iscol;
2844     redund->matseq   = matseq;
2845     redund->psubcomm = psubcomm;
2846   }
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 #undef __FUNCT__
2851 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2852 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2853 {
2854   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2855   PetscErrorCode ierr;
2856   PetscInt       i,*idxb = 0;
2857   PetscScalar    *va,*vb;
2858   Vec            vtmp;
2859 
2860   PetscFunctionBegin;
2861   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2862   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2863   if (idx) {
2864     for (i=0; i<A->rmap->n; i++) {
2865       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2866     }
2867   }
2868 
2869   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2870   if (idx) {
2871     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2872   }
2873   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2874   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2875 
2876   for (i=0; i<A->rmap->n; i++) {
2877     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2878       va[i] = vb[i];
2879       if (idx) idx[i] = a->garray[idxb[i]];
2880     }
2881   }
2882 
2883   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2884   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2885   ierr = PetscFree(idxb);CHKERRQ(ierr);
2886   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2887   PetscFunctionReturn(0);
2888 }
2889 
2890 #undef __FUNCT__
2891 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2892 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2893 {
2894   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2895   PetscErrorCode ierr;
2896   PetscInt       i,*idxb = 0;
2897   PetscScalar    *va,*vb;
2898   Vec            vtmp;
2899 
2900   PetscFunctionBegin;
2901   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2902   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2903   if (idx) {
2904     for (i=0; i<A->cmap->n; i++) {
2905       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2906     }
2907   }
2908 
2909   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2910   if (idx) {
2911     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2912   }
2913   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2914   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2915 
2916   for (i=0; i<A->rmap->n; i++) {
2917     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2918       va[i] = vb[i];
2919       if (idx) idx[i] = a->garray[idxb[i]];
2920     }
2921   }
2922 
2923   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2924   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2925   ierr = PetscFree(idxb);CHKERRQ(ierr);
2926   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2927   PetscFunctionReturn(0);
2928 }
2929 
2930 #undef __FUNCT__
2931 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2932 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2933 {
2934   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2935   PetscInt       n      = A->rmap->n;
2936   PetscInt       cstart = A->cmap->rstart;
2937   PetscInt       *cmap  = mat->garray;
2938   PetscInt       *diagIdx, *offdiagIdx;
2939   Vec            diagV, offdiagV;
2940   PetscScalar    *a, *diagA, *offdiagA;
2941   PetscInt       r;
2942   PetscErrorCode ierr;
2943 
2944   PetscFunctionBegin;
2945   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2946   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2947   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2948   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2949   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2950   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2951   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2952   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2953   for (r = 0; r < n; ++r) {
2954     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2955       a[r]   = diagA[r];
2956       idx[r] = cstart + diagIdx[r];
2957     } else {
2958       a[r]   = offdiagA[r];
2959       idx[r] = cmap[offdiagIdx[r]];
2960     }
2961   }
2962   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2963   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2964   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2965   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2966   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2967   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 #undef __FUNCT__
2972 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2973 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2974 {
2975   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2976   PetscInt       n      = A->rmap->n;
2977   PetscInt       cstart = A->cmap->rstart;
2978   PetscInt       *cmap  = mat->garray;
2979   PetscInt       *diagIdx, *offdiagIdx;
2980   Vec            diagV, offdiagV;
2981   PetscScalar    *a, *diagA, *offdiagA;
2982   PetscInt       r;
2983   PetscErrorCode ierr;
2984 
2985   PetscFunctionBegin;
2986   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2987   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2988   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2989   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2990   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2991   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2992   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2993   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2994   for (r = 0; r < n; ++r) {
2995     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2996       a[r]   = diagA[r];
2997       idx[r] = cstart + diagIdx[r];
2998     } else {
2999       a[r]   = offdiagA[r];
3000       idx[r] = cmap[offdiagIdx[r]];
3001     }
3002   }
3003   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3004   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3005   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3006   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3007   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3008   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3009   PetscFunctionReturn(0);
3010 }
3011 
3012 #undef __FUNCT__
3013 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3014 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3015 {
3016   PetscErrorCode ierr;
3017   Mat            *dummy;
3018 
3019   PetscFunctionBegin;
3020   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3021   *newmat = *dummy;
3022   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3023   PetscFunctionReturn(0);
3024 }
3025 
3026 #undef __FUNCT__
3027 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3028 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3029 {
3030   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3031   PetscErrorCode ierr;
3032 
3033   PetscFunctionBegin;
3034   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3035   PetscFunctionReturn(0);
3036 }
3037 
3038 #undef __FUNCT__
3039 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3040 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3041 {
3042   PetscErrorCode ierr;
3043   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3044 
3045   PetscFunctionBegin;
3046   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3047   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3048   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3049   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3050   PetscFunctionReturn(0);
3051 }
3052 
3053 /* -------------------------------------------------------------------*/
3054 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3055                                        MatGetRow_MPIAIJ,
3056                                        MatRestoreRow_MPIAIJ,
3057                                        MatMult_MPIAIJ,
3058                                 /* 4*/ MatMultAdd_MPIAIJ,
3059                                        MatMultTranspose_MPIAIJ,
3060                                        MatMultTransposeAdd_MPIAIJ,
3061 #if defined(PETSC_HAVE_PBGL)
3062                                        MatSolve_MPIAIJ,
3063 #else
3064                                        0,
3065 #endif
3066                                        0,
3067                                        0,
3068                                 /*10*/ 0,
3069                                        0,
3070                                        0,
3071                                        MatSOR_MPIAIJ,
3072                                        MatTranspose_MPIAIJ,
3073                                 /*15*/ MatGetInfo_MPIAIJ,
3074                                        MatEqual_MPIAIJ,
3075                                        MatGetDiagonal_MPIAIJ,
3076                                        MatDiagonalScale_MPIAIJ,
3077                                        MatNorm_MPIAIJ,
3078                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3079                                        MatAssemblyEnd_MPIAIJ,
3080                                        MatSetOption_MPIAIJ,
3081                                        MatZeroEntries_MPIAIJ,
3082                                 /*24*/ MatZeroRows_MPIAIJ,
3083                                        0,
3084 #if defined(PETSC_HAVE_PBGL)
3085                                        0,
3086 #else
3087                                        0,
3088 #endif
3089                                        0,
3090                                        0,
3091                                 /*29*/ MatSetUp_MPIAIJ,
3092 #if defined(PETSC_HAVE_PBGL)
3093                                        0,
3094 #else
3095                                        0,
3096 #endif
3097                                        0,
3098                                        0,
3099                                        0,
3100                                 /*34*/ MatDuplicate_MPIAIJ,
3101                                        0,
3102                                        0,
3103                                        0,
3104                                        0,
3105                                 /*39*/ MatAXPY_MPIAIJ,
3106                                        MatGetSubMatrices_MPIAIJ,
3107                                        MatIncreaseOverlap_MPIAIJ,
3108                                        MatGetValues_MPIAIJ,
3109                                        MatCopy_MPIAIJ,
3110                                 /*44*/ MatGetRowMax_MPIAIJ,
3111                                        MatScale_MPIAIJ,
3112                                        0,
3113                                        0,
3114                                        MatZeroRowsColumns_MPIAIJ,
3115                                 /*49*/ MatSetRandom_MPIAIJ,
3116                                        0,
3117                                        0,
3118                                        0,
3119                                        0,
3120                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3121                                        0,
3122                                        MatSetUnfactored_MPIAIJ,
3123                                        MatPermute_MPIAIJ,
3124                                        0,
3125                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3126                                        MatDestroy_MPIAIJ,
3127                                        MatView_MPIAIJ,
3128                                        0,
3129                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3130                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3131                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3132                                        0,
3133                                        0,
3134                                        0,
3135                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3136                                        MatGetRowMinAbs_MPIAIJ,
3137                                        0,
3138                                        MatSetColoring_MPIAIJ,
3139                                        0,
3140                                        MatSetValuesAdifor_MPIAIJ,
3141                                 /*75*/ MatFDColoringApply_AIJ,
3142                                        0,
3143                                        0,
3144                                        0,
3145                                        MatFindZeroDiagonals_MPIAIJ,
3146                                 /*80*/ 0,
3147                                        0,
3148                                        0,
3149                                 /*83*/ MatLoad_MPIAIJ,
3150                                        0,
3151                                        0,
3152                                        0,
3153                                        0,
3154                                        0,
3155                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3156                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3157                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3158                                        MatPtAP_MPIAIJ_MPIAIJ,
3159                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3160                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3161                                        0,
3162                                        0,
3163                                        0,
3164                                        0,
3165                                 /*99*/ 0,
3166                                        0,
3167                                        0,
3168                                        MatConjugate_MPIAIJ,
3169                                        0,
3170                                 /*104*/MatSetValuesRow_MPIAIJ,
3171                                        MatRealPart_MPIAIJ,
3172                                        MatImaginaryPart_MPIAIJ,
3173                                        0,
3174                                        0,
3175                                 /*109*/0,
3176                                        MatGetRedundantMatrix_MPIAIJ,
3177                                        MatGetRowMin_MPIAIJ,
3178                                        0,
3179                                        0,
3180                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3181                                        0,
3182                                        0,
3183                                        0,
3184                                        0,
3185                                 /*119*/0,
3186                                        0,
3187                                        0,
3188                                        0,
3189                                        MatGetMultiProcBlock_MPIAIJ,
3190                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3191                                        MatGetColumnNorms_MPIAIJ,
3192                                        MatInvertBlockDiagonal_MPIAIJ,
3193                                        0,
3194                                        MatGetSubMatricesParallel_MPIAIJ,
3195                                 /*129*/0,
3196                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3197                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3198                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3199                                        0,
3200                                 /*134*/0,
3201                                        0,
3202                                        0,
3203                                        0,
3204                                        0,
3205                                 /*139*/0,
3206                                        0,
3207                                        0,
3208                                        MatFDColoringSetUp_MPIXAIJ
3209 };
3210 
3211 /* ----------------------------------------------------------------------------------------*/
3212 
3213 #undef __FUNCT__
3214 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3215 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3216 {
3217   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3218   PetscErrorCode ierr;
3219 
3220   PetscFunctionBegin;
3221   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3222   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3223   PetscFunctionReturn(0);
3224 }
3225 
3226 #undef __FUNCT__
3227 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3228 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3229 {
3230   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3231   PetscErrorCode ierr;
3232 
3233   PetscFunctionBegin;
3234   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3235   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3236   PetscFunctionReturn(0);
3237 }
3238 
3239 #undef __FUNCT__
3240 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3241 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3242 {
3243   Mat_MPIAIJ     *b;
3244   PetscErrorCode ierr;
3245 
3246   PetscFunctionBegin;
3247   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3248   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3249   b = (Mat_MPIAIJ*)B->data;
3250 
3251   if (!B->preallocated) {
3252     /* Explicitly create 2 MATSEQAIJ matrices. */
3253     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3254     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3255     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3256     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3257     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3258     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3259     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3260     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3261     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3262     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3263   }
3264 
3265   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3266   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3267   B->preallocated = PETSC_TRUE;
3268   PetscFunctionReturn(0);
3269 }
3270 
3271 #undef __FUNCT__
3272 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3273 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3274 {
3275   Mat            mat;
3276   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3277   PetscErrorCode ierr;
3278 
3279   PetscFunctionBegin;
3280   *newmat = 0;
3281   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3282   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3283   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3284   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3285   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3286   a       = (Mat_MPIAIJ*)mat->data;
3287 
3288   mat->factortype   = matin->factortype;
3289   mat->assembled    = PETSC_TRUE;
3290   mat->insertmode   = NOT_SET_VALUES;
3291   mat->preallocated = PETSC_TRUE;
3292 
3293   a->size         = oldmat->size;
3294   a->rank         = oldmat->rank;
3295   a->donotstash   = oldmat->donotstash;
3296   a->roworiented  = oldmat->roworiented;
3297   a->rowindices   = 0;
3298   a->rowvalues    = 0;
3299   a->getrowactive = PETSC_FALSE;
3300 
3301   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3302   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3303 
3304   if (oldmat->colmap) {
3305 #if defined(PETSC_USE_CTABLE)
3306     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3307 #else
3308     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3309     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3310     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3311 #endif
3312   } else a->colmap = 0;
3313   if (oldmat->garray) {
3314     PetscInt len;
3315     len  = oldmat->B->cmap->n;
3316     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3317     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3318     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3319   } else a->garray = 0;
3320 
3321   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3322   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3323   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3324   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3325   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3326   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3327   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3328   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3329   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3330   *newmat = mat;
3331   PetscFunctionReturn(0);
3332 }
3333 
3334 
3335 
3336 #undef __FUNCT__
3337 #define __FUNCT__ "MatLoad_MPIAIJ"
3338 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3339 {
3340   PetscScalar    *vals,*svals;
3341   MPI_Comm       comm;
3342   PetscErrorCode ierr;
3343   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3344   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3345   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3346   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3347   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3348   int            fd;
3349   PetscInt       bs = 1;
3350 
3351   PetscFunctionBegin;
3352   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3353   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3354   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3355   if (!rank) {
3356     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3357     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3358     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3359   }
3360 
3361   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3362   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3363   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3364 
3365   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3366 
3367   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3368   M    = header[1]; N = header[2];
3369   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3370   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3371   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3372 
3373   /* If global sizes are set, check if they are consistent with that given in the file */
3374   if (sizesset) {
3375     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3376   }
3377   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3378   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3379 
3380   /* determine ownership of all (block) rows */
3381   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3382   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3383   else m = newMat->rmap->n; /* Set by user */
3384 
3385   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3386   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3387 
3388   /* First process needs enough room for process with most rows */
3389   if (!rank) {
3390     mmax = rowners[1];
3391     for (i=2; i<=size; i++) {
3392       mmax = PetscMax(mmax, rowners[i]);
3393     }
3394   } else mmax = -1;             /* unused, but compilers complain */
3395 
3396   rowners[0] = 0;
3397   for (i=2; i<=size; i++) {
3398     rowners[i] += rowners[i-1];
3399   }
3400   rstart = rowners[rank];
3401   rend   = rowners[rank+1];
3402 
3403   /* distribute row lengths to all processors */
3404   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3405   if (!rank) {
3406     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3407     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3408     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3409     for (j=0; j<m; j++) {
3410       procsnz[0] += ourlens[j];
3411     }
3412     for (i=1; i<size; i++) {
3413       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3414       /* calculate the number of nonzeros on each processor */
3415       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3416         procsnz[i] += rowlengths[j];
3417       }
3418       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3419     }
3420     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3421   } else {
3422     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3423   }
3424 
3425   if (!rank) {
3426     /* determine max buffer needed and allocate it */
3427     maxnz = 0;
3428     for (i=0; i<size; i++) {
3429       maxnz = PetscMax(maxnz,procsnz[i]);
3430     }
3431     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3432 
3433     /* read in my part of the matrix column indices  */
3434     nz   = procsnz[0];
3435     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3436     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3437 
3438     /* read in every one elses and ship off */
3439     for (i=1; i<size; i++) {
3440       nz   = procsnz[i];
3441       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3442       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3443     }
3444     ierr = PetscFree(cols);CHKERRQ(ierr);
3445   } else {
3446     /* determine buffer space needed for message */
3447     nz = 0;
3448     for (i=0; i<m; i++) {
3449       nz += ourlens[i];
3450     }
3451     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3452 
3453     /* receive message of column indices*/
3454     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3455   }
3456 
3457   /* determine column ownership if matrix is not square */
3458   if (N != M) {
3459     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3460     else n = newMat->cmap->n;
3461     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3462     cstart = cend - n;
3463   } else {
3464     cstart = rstart;
3465     cend   = rend;
3466     n      = cend - cstart;
3467   }
3468 
3469   /* loop over local rows, determining number of off diagonal entries */
3470   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3471   jj   = 0;
3472   for (i=0; i<m; i++) {
3473     for (j=0; j<ourlens[i]; j++) {
3474       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3475       jj++;
3476     }
3477   }
3478 
3479   for (i=0; i<m; i++) {
3480     ourlens[i] -= offlens[i];
3481   }
3482   if (!sizesset) {
3483     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3484   }
3485 
3486   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3487 
3488   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3489 
3490   for (i=0; i<m; i++) {
3491     ourlens[i] += offlens[i];
3492   }
3493 
3494   if (!rank) {
3495     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3496 
3497     /* read in my part of the matrix numerical values  */
3498     nz   = procsnz[0];
3499     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3500 
3501     /* insert into matrix */
3502     jj      = rstart;
3503     smycols = mycols;
3504     svals   = vals;
3505     for (i=0; i<m; i++) {
3506       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3507       smycols += ourlens[i];
3508       svals   += ourlens[i];
3509       jj++;
3510     }
3511 
3512     /* read in other processors and ship out */
3513     for (i=1; i<size; i++) {
3514       nz   = procsnz[i];
3515       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3516       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3517     }
3518     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3519   } else {
3520     /* receive numeric values */
3521     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3522 
3523     /* receive message of values*/
3524     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3525 
3526     /* insert into matrix */
3527     jj      = rstart;
3528     smycols = mycols;
3529     svals   = vals;
3530     for (i=0; i<m; i++) {
3531       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3532       smycols += ourlens[i];
3533       svals   += ourlens[i];
3534       jj++;
3535     }
3536   }
3537   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3538   ierr = PetscFree(vals);CHKERRQ(ierr);
3539   ierr = PetscFree(mycols);CHKERRQ(ierr);
3540   ierr = PetscFree(rowners);CHKERRQ(ierr);
3541   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3542   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3543   PetscFunctionReturn(0);
3544 }
3545 
3546 #undef __FUNCT__
3547 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3548 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3549 {
3550   PetscErrorCode ierr;
3551   IS             iscol_local;
3552   PetscInt       csize;
3553 
3554   PetscFunctionBegin;
3555   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3556   if (call == MAT_REUSE_MATRIX) {
3557     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3558     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3559   } else {
3560     PetscInt cbs;
3561     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3562     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3563     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3564   }
3565   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3566   if (call == MAT_INITIAL_MATRIX) {
3567     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3568     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3569   }
3570   PetscFunctionReturn(0);
3571 }
3572 
3573 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3574 #undef __FUNCT__
3575 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3576 /*
3577     Not great since it makes two copies of the submatrix, first an SeqAIJ
3578   in local and then by concatenating the local matrices the end result.
3579   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3580 
3581   Note: This requires a sequential iscol with all indices.
3582 */
3583 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3584 {
3585   PetscErrorCode ierr;
3586   PetscMPIInt    rank,size;
3587   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3588   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3589   PetscBool      allcolumns, colflag;
3590   Mat            M,Mreuse;
3591   MatScalar      *vwork,*aa;
3592   MPI_Comm       comm;
3593   Mat_SeqAIJ     *aij;
3594 
3595   PetscFunctionBegin;
3596   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3597   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3598   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3599 
3600   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3601   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3602   if (colflag && ncol == mat->cmap->N) {
3603     allcolumns = PETSC_TRUE;
3604   } else {
3605     allcolumns = PETSC_FALSE;
3606   }
3607   if (call ==  MAT_REUSE_MATRIX) {
3608     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3609     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3610     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3611   } else {
3612     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3613   }
3614 
3615   /*
3616       m - number of local rows
3617       n - number of columns (same on all processors)
3618       rstart - first row in new global matrix generated
3619   */
3620   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3621   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3622   if (call == MAT_INITIAL_MATRIX) {
3623     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3624     ii  = aij->i;
3625     jj  = aij->j;
3626 
3627     /*
3628         Determine the number of non-zeros in the diagonal and off-diagonal
3629         portions of the matrix in order to do correct preallocation
3630     */
3631 
3632     /* first get start and end of "diagonal" columns */
3633     if (csize == PETSC_DECIDE) {
3634       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3635       if (mglobal == n) { /* square matrix */
3636         nlocal = m;
3637       } else {
3638         nlocal = n/size + ((n % size) > rank);
3639       }
3640     } else {
3641       nlocal = csize;
3642     }
3643     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3644     rstart = rend - nlocal;
3645     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3646 
3647     /* next, compute all the lengths */
3648     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3649     olens = dlens + m;
3650     for (i=0; i<m; i++) {
3651       jend = ii[i+1] - ii[i];
3652       olen = 0;
3653       dlen = 0;
3654       for (j=0; j<jend; j++) {
3655         if (*jj < rstart || *jj >= rend) olen++;
3656         else dlen++;
3657         jj++;
3658       }
3659       olens[i] = olen;
3660       dlens[i] = dlen;
3661     }
3662     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3663     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3664     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3665     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3666     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3667     ierr = PetscFree(dlens);CHKERRQ(ierr);
3668   } else {
3669     PetscInt ml,nl;
3670 
3671     M    = *newmat;
3672     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3673     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3674     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3675     /*
3676          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3677        rather than the slower MatSetValues().
3678     */
3679     M->was_assembled = PETSC_TRUE;
3680     M->assembled     = PETSC_FALSE;
3681   }
3682   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3683   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3684   ii   = aij->i;
3685   jj   = aij->j;
3686   aa   = aij->a;
3687   for (i=0; i<m; i++) {
3688     row   = rstart + i;
3689     nz    = ii[i+1] - ii[i];
3690     cwork = jj;     jj += nz;
3691     vwork = aa;     aa += nz;
3692     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3693   }
3694 
3695   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3696   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3697   *newmat = M;
3698 
3699   /* save submatrix used in processor for next request */
3700   if (call ==  MAT_INITIAL_MATRIX) {
3701     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3702     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3703   }
3704   PetscFunctionReturn(0);
3705 }
3706 
3707 #undef __FUNCT__
3708 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3709 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3710 {
3711   PetscInt       m,cstart, cend,j,nnz,i,d;
3712   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3713   const PetscInt *JJ;
3714   PetscScalar    *values;
3715   PetscErrorCode ierr;
3716 
3717   PetscFunctionBegin;
3718   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3719 
3720   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3721   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3722   m      = B->rmap->n;
3723   cstart = B->cmap->rstart;
3724   cend   = B->cmap->rend;
3725   rstart = B->rmap->rstart;
3726 
3727   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3728 
3729 #if defined(PETSC_USE_DEBUGGING)
3730   for (i=0; i<m; i++) {
3731     nnz = Ii[i+1]- Ii[i];
3732     JJ  = J + Ii[i];
3733     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3734     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3735     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3736   }
3737 #endif
3738 
3739   for (i=0; i<m; i++) {
3740     nnz     = Ii[i+1]- Ii[i];
3741     JJ      = J + Ii[i];
3742     nnz_max = PetscMax(nnz_max,nnz);
3743     d       = 0;
3744     for (j=0; j<nnz; j++) {
3745       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3746     }
3747     d_nnz[i] = d;
3748     o_nnz[i] = nnz - d;
3749   }
3750   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3751   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3752 
3753   if (v) values = (PetscScalar*)v;
3754   else {
3755     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3756   }
3757 
3758   for (i=0; i<m; i++) {
3759     ii   = i + rstart;
3760     nnz  = Ii[i+1]- Ii[i];
3761     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3762   }
3763   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3764   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3765 
3766   if (!v) {
3767     ierr = PetscFree(values);CHKERRQ(ierr);
3768   }
3769   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3770   PetscFunctionReturn(0);
3771 }
3772 
3773 #undef __FUNCT__
3774 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3775 /*@
3776    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3777    (the default parallel PETSc format).
3778 
3779    Collective on MPI_Comm
3780 
3781    Input Parameters:
3782 +  B - the matrix
3783 .  i - the indices into j for the start of each local row (starts with zero)
3784 .  j - the column indices for each local row (starts with zero)
3785 -  v - optional values in the matrix
3786 
3787    Level: developer
3788 
3789    Notes:
3790        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3791      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3792      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3793 
3794        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3795 
3796        The format which is used for the sparse matrix input, is equivalent to a
3797     row-major ordering.. i.e for the following matrix, the input data expected is
3798     as shown:
3799 
3800         1 0 0
3801         2 0 3     P0
3802        -------
3803         4 5 6     P1
3804 
3805      Process0 [P0]: rows_owned=[0,1]
3806         i =  {0,1,3}  [size = nrow+1  = 2+1]
3807         j =  {0,0,2}  [size = nz = 6]
3808         v =  {1,2,3}  [size = nz = 6]
3809 
3810      Process1 [P1]: rows_owned=[2]
3811         i =  {0,3}    [size = nrow+1  = 1+1]
3812         j =  {0,1,2}  [size = nz = 6]
3813         v =  {4,5,6}  [size = nz = 6]
3814 
3815 .keywords: matrix, aij, compressed row, sparse, parallel
3816 
3817 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3818           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3819 @*/
3820 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3821 {
3822   PetscErrorCode ierr;
3823 
3824   PetscFunctionBegin;
3825   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3826   PetscFunctionReturn(0);
3827 }
3828 
3829 #undef __FUNCT__
3830 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3831 /*@C
3832    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3833    (the default parallel PETSc format).  For good matrix assembly performance
3834    the user should preallocate the matrix storage by setting the parameters
3835    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3836    performance can be increased by more than a factor of 50.
3837 
3838    Collective on MPI_Comm
3839 
3840    Input Parameters:
3841 +  B - the matrix
3842 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3843            (same value is used for all local rows)
3844 .  d_nnz - array containing the number of nonzeros in the various rows of the
3845            DIAGONAL portion of the local submatrix (possibly different for each row)
3846            or NULL, if d_nz is used to specify the nonzero structure.
3847            The size of this array is equal to the number of local rows, i.e 'm'.
3848            For matrices that will be factored, you must leave room for (and set)
3849            the diagonal entry even if it is zero.
3850 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3851            submatrix (same value is used for all local rows).
3852 -  o_nnz - array containing the number of nonzeros in the various rows of the
3853            OFF-DIAGONAL portion of the local submatrix (possibly different for
3854            each row) or NULL, if o_nz is used to specify the nonzero
3855            structure. The size of this array is equal to the number
3856            of local rows, i.e 'm'.
3857 
3858    If the *_nnz parameter is given then the *_nz parameter is ignored
3859 
3860    The AIJ format (also called the Yale sparse matrix format or
3861    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3862    storage.  The stored row and column indices begin with zero.
3863    See Users-Manual: ch_mat for details.
3864 
3865    The parallel matrix is partitioned such that the first m0 rows belong to
3866    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3867    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3868 
3869    The DIAGONAL portion of the local submatrix of a processor can be defined
3870    as the submatrix which is obtained by extraction the part corresponding to
3871    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3872    first row that belongs to the processor, r2 is the last row belonging to
3873    the this processor, and c1-c2 is range of indices of the local part of a
3874    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3875    common case of a square matrix, the row and column ranges are the same and
3876    the DIAGONAL part is also square. The remaining portion of the local
3877    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3878 
3879    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3880 
3881    You can call MatGetInfo() to get information on how effective the preallocation was;
3882    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3883    You can also run with the option -info and look for messages with the string
3884    malloc in them to see if additional memory allocation was needed.
3885 
3886    Example usage:
3887 
3888    Consider the following 8x8 matrix with 34 non-zero values, that is
3889    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3890    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3891    as follows:
3892 
3893 .vb
3894             1  2  0  |  0  3  0  |  0  4
3895     Proc0   0  5  6  |  7  0  0  |  8  0
3896             9  0 10  | 11  0  0  | 12  0
3897     -------------------------------------
3898            13  0 14  | 15 16 17  |  0  0
3899     Proc1   0 18  0  | 19 20 21  |  0  0
3900             0  0  0  | 22 23  0  | 24  0
3901     -------------------------------------
3902     Proc2  25 26 27  |  0  0 28  | 29  0
3903            30  0  0  | 31 32 33  |  0 34
3904 .ve
3905 
3906    This can be represented as a collection of submatrices as:
3907 
3908 .vb
3909       A B C
3910       D E F
3911       G H I
3912 .ve
3913 
3914    Where the submatrices A,B,C are owned by proc0, D,E,F are
3915    owned by proc1, G,H,I are owned by proc2.
3916 
3917    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3918    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3919    The 'M','N' parameters are 8,8, and have the same values on all procs.
3920 
3921    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3922    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3923    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3924    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3925    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3926    matrix, ans [DF] as another SeqAIJ matrix.
3927 
3928    When d_nz, o_nz parameters are specified, d_nz storage elements are
3929    allocated for every row of the local diagonal submatrix, and o_nz
3930    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3931    One way to choose d_nz and o_nz is to use the max nonzerors per local
3932    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3933    In this case, the values of d_nz,o_nz are:
3934 .vb
3935      proc0 : dnz = 2, o_nz = 2
3936      proc1 : dnz = 3, o_nz = 2
3937      proc2 : dnz = 1, o_nz = 4
3938 .ve
3939    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3940    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3941    for proc3. i.e we are using 12+15+10=37 storage locations to store
3942    34 values.
3943 
3944    When d_nnz, o_nnz parameters are specified, the storage is specified
3945    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3946    In the above case the values for d_nnz,o_nnz are:
3947 .vb
3948      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3949      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3950      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3951 .ve
3952    Here the space allocated is sum of all the above values i.e 34, and
3953    hence pre-allocation is perfect.
3954 
3955    Level: intermediate
3956 
3957 .keywords: matrix, aij, compressed row, sparse, parallel
3958 
3959 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3960           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3961 @*/
3962 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3963 {
3964   PetscErrorCode ierr;
3965 
3966   PetscFunctionBegin;
3967   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3968   PetscValidType(B,1);
3969   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3970   PetscFunctionReturn(0);
3971 }
3972 
3973 #undef __FUNCT__
3974 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3975 /*@
3976      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3977          CSR format the local rows.
3978 
3979    Collective on MPI_Comm
3980 
3981    Input Parameters:
3982 +  comm - MPI communicator
3983 .  m - number of local rows (Cannot be PETSC_DECIDE)
3984 .  n - This value should be the same as the local size used in creating the
3985        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3986        calculated if N is given) For square matrices n is almost always m.
3987 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3988 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3989 .   i - row indices
3990 .   j - column indices
3991 -   a - matrix values
3992 
3993    Output Parameter:
3994 .   mat - the matrix
3995 
3996    Level: intermediate
3997 
3998    Notes:
3999        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4000      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4001      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4002 
4003        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4004 
4005        The format which is used for the sparse matrix input, is equivalent to a
4006     row-major ordering.. i.e for the following matrix, the input data expected is
4007     as shown:
4008 
4009         1 0 0
4010         2 0 3     P0
4011        -------
4012         4 5 6     P1
4013 
4014      Process0 [P0]: rows_owned=[0,1]
4015         i =  {0,1,3}  [size = nrow+1  = 2+1]
4016         j =  {0,0,2}  [size = nz = 6]
4017         v =  {1,2,3}  [size = nz = 6]
4018 
4019      Process1 [P1]: rows_owned=[2]
4020         i =  {0,3}    [size = nrow+1  = 1+1]
4021         j =  {0,1,2}  [size = nz = 6]
4022         v =  {4,5,6}  [size = nz = 6]
4023 
4024 .keywords: matrix, aij, compressed row, sparse, parallel
4025 
4026 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4027           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4028 @*/
4029 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4030 {
4031   PetscErrorCode ierr;
4032 
4033   PetscFunctionBegin;
4034   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4035   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4036   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4037   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4038   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4039   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4040   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4041   PetscFunctionReturn(0);
4042 }
4043 
4044 #undef __FUNCT__
4045 #define __FUNCT__ "MatCreateAIJ"
4046 /*@C
4047    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4048    (the default parallel PETSc format).  For good matrix assembly performance
4049    the user should preallocate the matrix storage by setting the parameters
4050    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4051    performance can be increased by more than a factor of 50.
4052 
4053    Collective on MPI_Comm
4054 
4055    Input Parameters:
4056 +  comm - MPI communicator
4057 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4058            This value should be the same as the local size used in creating the
4059            y vector for the matrix-vector product y = Ax.
4060 .  n - This value should be the same as the local size used in creating the
4061        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4062        calculated if N is given) For square matrices n is almost always m.
4063 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4064 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4065 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4066            (same value is used for all local rows)
4067 .  d_nnz - array containing the number of nonzeros in the various rows of the
4068            DIAGONAL portion of the local submatrix (possibly different for each row)
4069            or NULL, if d_nz is used to specify the nonzero structure.
4070            The size of this array is equal to the number of local rows, i.e 'm'.
4071 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4072            submatrix (same value is used for all local rows).
4073 -  o_nnz - array containing the number of nonzeros in the various rows of the
4074            OFF-DIAGONAL portion of the local submatrix (possibly different for
4075            each row) or NULL, if o_nz is used to specify the nonzero
4076            structure. The size of this array is equal to the number
4077            of local rows, i.e 'm'.
4078 
4079    Output Parameter:
4080 .  A - the matrix
4081 
4082    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4083    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4084    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4085 
4086    Notes:
4087    If the *_nnz parameter is given then the *_nz parameter is ignored
4088 
4089    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4090    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4091    storage requirements for this matrix.
4092 
4093    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4094    processor than it must be used on all processors that share the object for
4095    that argument.
4096 
4097    The user MUST specify either the local or global matrix dimensions
4098    (possibly both).
4099 
4100    The parallel matrix is partitioned across processors such that the
4101    first m0 rows belong to process 0, the next m1 rows belong to
4102    process 1, the next m2 rows belong to process 2 etc.. where
4103    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4104    values corresponding to [m x N] submatrix.
4105 
4106    The columns are logically partitioned with the n0 columns belonging
4107    to 0th partition, the next n1 columns belonging to the next
4108    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4109 
4110    The DIAGONAL portion of the local submatrix on any given processor
4111    is the submatrix corresponding to the rows and columns m,n
4112    corresponding to the given processor. i.e diagonal matrix on
4113    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4114    etc. The remaining portion of the local submatrix [m x (N-n)]
4115    constitute the OFF-DIAGONAL portion. The example below better
4116    illustrates this concept.
4117 
4118    For a square global matrix we define each processor's diagonal portion
4119    to be its local rows and the corresponding columns (a square submatrix);
4120    each processor's off-diagonal portion encompasses the remainder of the
4121    local matrix (a rectangular submatrix).
4122 
4123    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4124 
4125    When calling this routine with a single process communicator, a matrix of
4126    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4127    type of communicator, use the construction mechanism:
4128      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4129 
4130    By default, this format uses inodes (identical nodes) when possible.
4131    We search for consecutive rows with the same nonzero structure, thereby
4132    reusing matrix information to achieve increased efficiency.
4133 
4134    Options Database Keys:
4135 +  -mat_no_inode  - Do not use inodes
4136 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4137 -  -mat_aij_oneindex - Internally use indexing starting at 1
4138         rather than 0.  Note that when calling MatSetValues(),
4139         the user still MUST index entries starting at 0!
4140 
4141 
4142    Example usage:
4143 
4144    Consider the following 8x8 matrix with 34 non-zero values, that is
4145    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4146    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4147    as follows:
4148 
4149 .vb
4150             1  2  0  |  0  3  0  |  0  4
4151     Proc0   0  5  6  |  7  0  0  |  8  0
4152             9  0 10  | 11  0  0  | 12  0
4153     -------------------------------------
4154            13  0 14  | 15 16 17  |  0  0
4155     Proc1   0 18  0  | 19 20 21  |  0  0
4156             0  0  0  | 22 23  0  | 24  0
4157     -------------------------------------
4158     Proc2  25 26 27  |  0  0 28  | 29  0
4159            30  0  0  | 31 32 33  |  0 34
4160 .ve
4161 
4162    This can be represented as a collection of submatrices as:
4163 
4164 .vb
4165       A B C
4166       D E F
4167       G H I
4168 .ve
4169 
4170    Where the submatrices A,B,C are owned by proc0, D,E,F are
4171    owned by proc1, G,H,I are owned by proc2.
4172 
4173    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4174    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4175    The 'M','N' parameters are 8,8, and have the same values on all procs.
4176 
4177    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4178    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4179    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4180    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4181    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4182    matrix, ans [DF] as another SeqAIJ matrix.
4183 
4184    When d_nz, o_nz parameters are specified, d_nz storage elements are
4185    allocated for every row of the local diagonal submatrix, and o_nz
4186    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4187    One way to choose d_nz and o_nz is to use the max nonzerors per local
4188    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4189    In this case, the values of d_nz,o_nz are:
4190 .vb
4191      proc0 : dnz = 2, o_nz = 2
4192      proc1 : dnz = 3, o_nz = 2
4193      proc2 : dnz = 1, o_nz = 4
4194 .ve
4195    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4196    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4197    for proc3. i.e we are using 12+15+10=37 storage locations to store
4198    34 values.
4199 
4200    When d_nnz, o_nnz parameters are specified, the storage is specified
4201    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4202    In the above case the values for d_nnz,o_nnz are:
4203 .vb
4204      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4205      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4206      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4207 .ve
4208    Here the space allocated is sum of all the above values i.e 34, and
4209    hence pre-allocation is perfect.
4210 
4211    Level: intermediate
4212 
4213 .keywords: matrix, aij, compressed row, sparse, parallel
4214 
4215 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4216           MPIAIJ, MatCreateMPIAIJWithArrays()
4217 @*/
4218 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4219 {
4220   PetscErrorCode ierr;
4221   PetscMPIInt    size;
4222 
4223   PetscFunctionBegin;
4224   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4225   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4226   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4227   if (size > 1) {
4228     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4229     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4230   } else {
4231     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4232     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4233   }
4234   PetscFunctionReturn(0);
4235 }
4236 
4237 #undef __FUNCT__
4238 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4239 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4240 {
4241   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4242 
4243   PetscFunctionBegin;
4244   if (Ad)     *Ad     = a->A;
4245   if (Ao)     *Ao     = a->B;
4246   if (colmap) *colmap = a->garray;
4247   PetscFunctionReturn(0);
4248 }
4249 
4250 #undef __FUNCT__
4251 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4252 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4253 {
4254   PetscErrorCode ierr;
4255   PetscInt       i;
4256   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4257 
4258   PetscFunctionBegin;
4259   if (coloring->ctype == IS_COLORING_GLOBAL) {
4260     ISColoringValue *allcolors,*colors;
4261     ISColoring      ocoloring;
4262 
4263     /* set coloring for diagonal portion */
4264     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4265 
4266     /* set coloring for off-diagonal portion */
4267     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4268     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4269     for (i=0; i<a->B->cmap->n; i++) {
4270       colors[i] = allcolors[a->garray[i]];
4271     }
4272     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4273     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4274     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4275     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4276   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4277     ISColoringValue *colors;
4278     PetscInt        *larray;
4279     ISColoring      ocoloring;
4280 
4281     /* set coloring for diagonal portion */
4282     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4283     for (i=0; i<a->A->cmap->n; i++) {
4284       larray[i] = i + A->cmap->rstart;
4285     }
4286     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4287     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4288     for (i=0; i<a->A->cmap->n; i++) {
4289       colors[i] = coloring->colors[larray[i]];
4290     }
4291     ierr = PetscFree(larray);CHKERRQ(ierr);
4292     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4293     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4294     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4295 
4296     /* set coloring for off-diagonal portion */
4297     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4298     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4299     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4300     for (i=0; i<a->B->cmap->n; i++) {
4301       colors[i] = coloring->colors[larray[i]];
4302     }
4303     ierr = PetscFree(larray);CHKERRQ(ierr);
4304     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4305     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4306     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4307   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4308   PetscFunctionReturn(0);
4309 }
4310 
4311 #undef __FUNCT__
4312 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4313 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4314 {
4315   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4316   PetscErrorCode ierr;
4317 
4318   PetscFunctionBegin;
4319   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4320   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4321   PetscFunctionReturn(0);
4322 }
4323 
4324 #undef __FUNCT__
4325 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4326 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4327 {
4328   PetscErrorCode ierr;
4329   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4330   PetscInt       *indx;
4331 
4332   PetscFunctionBegin;
4333   /* This routine will ONLY return MPIAIJ type matrix */
4334   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4335   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4336   if (n == PETSC_DECIDE) {
4337     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4338   }
4339   /* Check sum(n) = N */
4340   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4341   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4342 
4343   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4344   rstart -= m;
4345 
4346   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4347   for (i=0; i<m; i++) {
4348     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4349     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4350     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4351   }
4352 
4353   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4354   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4355   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4356   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4357   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4358   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4359   PetscFunctionReturn(0);
4360 }
4361 
4362 #undef __FUNCT__
4363 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4364 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4365 {
4366   PetscErrorCode ierr;
4367   PetscInt       m,N,i,rstart,nnz,Ii;
4368   PetscInt       *indx;
4369   PetscScalar    *values;
4370 
4371   PetscFunctionBegin;
4372   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4373   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4374   for (i=0; i<m; i++) {
4375     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4376     Ii   = i + rstart;
4377     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4378     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4379   }
4380   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4381   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4382   PetscFunctionReturn(0);
4383 }
4384 
4385 #undef __FUNCT__
4386 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4387 /*@
4388       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4389                  matrices from each processor
4390 
4391     Collective on MPI_Comm
4392 
4393    Input Parameters:
4394 +    comm - the communicators the parallel matrix will live on
4395 .    inmat - the input sequential matrices
4396 .    n - number of local columns (or PETSC_DECIDE)
4397 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4398 
4399    Output Parameter:
4400 .    outmat - the parallel matrix generated
4401 
4402     Level: advanced
4403 
4404    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4405 
4406 @*/
4407 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4408 {
4409   PetscErrorCode ierr;
4410   PetscMPIInt    size;
4411 
4412   PetscFunctionBegin;
4413   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4414   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4415   if (size == 1) {
4416     if (scall == MAT_INITIAL_MATRIX) {
4417       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4418     } else {
4419       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4420     }
4421   } else {
4422     if (scall == MAT_INITIAL_MATRIX) {
4423       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4424     }
4425     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4426   }
4427   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 #undef __FUNCT__
4432 #define __FUNCT__ "MatFileSplit"
4433 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4434 {
4435   PetscErrorCode    ierr;
4436   PetscMPIInt       rank;
4437   PetscInt          m,N,i,rstart,nnz;
4438   size_t            len;
4439   const PetscInt    *indx;
4440   PetscViewer       out;
4441   char              *name;
4442   Mat               B;
4443   const PetscScalar *values;
4444 
4445   PetscFunctionBegin;
4446   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4447   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4448   /* Should this be the type of the diagonal block of A? */
4449   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4450   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4451   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4452   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4453   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4454   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4455   for (i=0; i<m; i++) {
4456     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4457     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4458     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4459   }
4460   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4461   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4462 
4463   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4464   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4465   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4466   sprintf(name,"%s.%d",outfile,rank);
4467   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4468   ierr = PetscFree(name);CHKERRQ(ierr);
4469   ierr = MatView(B,out);CHKERRQ(ierr);
4470   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4471   ierr = MatDestroy(&B);CHKERRQ(ierr);
4472   PetscFunctionReturn(0);
4473 }
4474 
4475 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4476 #undef __FUNCT__
4477 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4478 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4479 {
4480   PetscErrorCode      ierr;
4481   Mat_Merge_SeqsToMPI *merge;
4482   PetscContainer      container;
4483 
4484   PetscFunctionBegin;
4485   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4486   if (container) {
4487     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4489     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4490     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4491     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4492     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4493     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4494     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4500     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4501     ierr = PetscFree(merge);CHKERRQ(ierr);
4502     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4503   }
4504   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4505   PetscFunctionReturn(0);
4506 }
4507 
4508 #include <../src/mat/utils/freespace.h>
4509 #include <petscbt.h>
4510 
4511 #undef __FUNCT__
4512 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4513 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4514 {
4515   PetscErrorCode      ierr;
4516   MPI_Comm            comm;
4517   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4518   PetscMPIInt         size,rank,taga,*len_s;
4519   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4520   PetscInt            proc,m;
4521   PetscInt            **buf_ri,**buf_rj;
4522   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4523   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4524   MPI_Request         *s_waits,*r_waits;
4525   MPI_Status          *status;
4526   MatScalar           *aa=a->a;
4527   MatScalar           **abuf_r,*ba_i;
4528   Mat_Merge_SeqsToMPI *merge;
4529   PetscContainer      container;
4530 
4531   PetscFunctionBegin;
4532   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4533   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4534 
4535   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4536   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4537 
4538   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4539   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4540 
4541   bi     = merge->bi;
4542   bj     = merge->bj;
4543   buf_ri = merge->buf_ri;
4544   buf_rj = merge->buf_rj;
4545 
4546   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4547   owners = merge->rowmap->range;
4548   len_s  = merge->len_s;
4549 
4550   /* send and recv matrix values */
4551   /*-----------------------------*/
4552   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4553   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4554 
4555   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4556   for (proc=0,k=0; proc<size; proc++) {
4557     if (!len_s[proc]) continue;
4558     i    = owners[proc];
4559     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4560     k++;
4561   }
4562 
4563   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4564   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4565   ierr = PetscFree(status);CHKERRQ(ierr);
4566 
4567   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4568   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4569 
4570   /* insert mat values of mpimat */
4571   /*----------------------------*/
4572   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4573   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4574 
4575   for (k=0; k<merge->nrecv; k++) {
4576     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4577     nrows       = *(buf_ri_k[k]);
4578     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4579     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4580   }
4581 
4582   /* set values of ba */
4583   m = merge->rowmap->n;
4584   for (i=0; i<m; i++) {
4585     arow = owners[rank] + i;
4586     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4587     bnzi = bi[i+1] - bi[i];
4588     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4589 
4590     /* add local non-zero vals of this proc's seqmat into ba */
4591     anzi   = ai[arow+1] - ai[arow];
4592     aj     = a->j + ai[arow];
4593     aa     = a->a + ai[arow];
4594     nextaj = 0;
4595     for (j=0; nextaj<anzi; j++) {
4596       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4597         ba_i[j] += aa[nextaj++];
4598       }
4599     }
4600 
4601     /* add received vals into ba */
4602     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4603       /* i-th row */
4604       if (i == *nextrow[k]) {
4605         anzi   = *(nextai[k]+1) - *nextai[k];
4606         aj     = buf_rj[k] + *(nextai[k]);
4607         aa     = abuf_r[k] + *(nextai[k]);
4608         nextaj = 0;
4609         for (j=0; nextaj<anzi; j++) {
4610           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4611             ba_i[j] += aa[nextaj++];
4612           }
4613         }
4614         nextrow[k]++; nextai[k]++;
4615       }
4616     }
4617     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4618   }
4619   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4620   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4621 
4622   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4623   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4624   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4625   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4626   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4627   PetscFunctionReturn(0);
4628 }
4629 
4630 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4631 
4632 #undef __FUNCT__
4633 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4634 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4635 {
4636   PetscErrorCode      ierr;
4637   Mat                 B_mpi;
4638   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4639   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4640   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4641   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4642   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4643   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4644   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4645   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4646   MPI_Status          *status;
4647   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4648   PetscBT             lnkbt;
4649   Mat_Merge_SeqsToMPI *merge;
4650   PetscContainer      container;
4651 
4652   PetscFunctionBegin;
4653   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4654 
4655   /* make sure it is a PETSc comm */
4656   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4657   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4658   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4659 
4660   ierr = PetscNew(&merge);CHKERRQ(ierr);
4661   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4662 
4663   /* determine row ownership */
4664   /*---------------------------------------------------------*/
4665   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4666   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4667   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4668   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4669   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4670   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4671   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4672 
4673   m      = merge->rowmap->n;
4674   owners = merge->rowmap->range;
4675 
4676   /* determine the number of messages to send, their lengths */
4677   /*---------------------------------------------------------*/
4678   len_s = merge->len_s;
4679 
4680   len          = 0; /* length of buf_si[] */
4681   merge->nsend = 0;
4682   for (proc=0; proc<size; proc++) {
4683     len_si[proc] = 0;
4684     if (proc == rank) {
4685       len_s[proc] = 0;
4686     } else {
4687       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4688       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4689     }
4690     if (len_s[proc]) {
4691       merge->nsend++;
4692       nrows = 0;
4693       for (i=owners[proc]; i<owners[proc+1]; i++) {
4694         if (ai[i+1] > ai[i]) nrows++;
4695       }
4696       len_si[proc] = 2*(nrows+1);
4697       len         += len_si[proc];
4698     }
4699   }
4700 
4701   /* determine the number and length of messages to receive for ij-structure */
4702   /*-------------------------------------------------------------------------*/
4703   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4704   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4705 
4706   /* post the Irecv of j-structure */
4707   /*-------------------------------*/
4708   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4709   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4710 
4711   /* post the Isend of j-structure */
4712   /*--------------------------------*/
4713   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4714 
4715   for (proc=0, k=0; proc<size; proc++) {
4716     if (!len_s[proc]) continue;
4717     i    = owners[proc];
4718     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4719     k++;
4720   }
4721 
4722   /* receives and sends of j-structure are complete */
4723   /*------------------------------------------------*/
4724   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4725   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4726 
4727   /* send and recv i-structure */
4728   /*---------------------------*/
4729   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4730   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4731 
4732   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4733   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4734   for (proc=0,k=0; proc<size; proc++) {
4735     if (!len_s[proc]) continue;
4736     /* form outgoing message for i-structure:
4737          buf_si[0]:                 nrows to be sent
4738                [1:nrows]:           row index (global)
4739                [nrows+1:2*nrows+1]: i-structure index
4740     */
4741     /*-------------------------------------------*/
4742     nrows       = len_si[proc]/2 - 1;
4743     buf_si_i    = buf_si + nrows+1;
4744     buf_si[0]   = nrows;
4745     buf_si_i[0] = 0;
4746     nrows       = 0;
4747     for (i=owners[proc]; i<owners[proc+1]; i++) {
4748       anzi = ai[i+1] - ai[i];
4749       if (anzi) {
4750         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4751         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4752         nrows++;
4753       }
4754     }
4755     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4756     k++;
4757     buf_si += len_si[proc];
4758   }
4759 
4760   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4761   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4762 
4763   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4764   for (i=0; i<merge->nrecv; i++) {
4765     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4766   }
4767 
4768   ierr = PetscFree(len_si);CHKERRQ(ierr);
4769   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4770   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4771   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4772   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4773   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4774   ierr = PetscFree(status);CHKERRQ(ierr);
4775 
4776   /* compute a local seq matrix in each processor */
4777   /*----------------------------------------------*/
4778   /* allocate bi array and free space for accumulating nonzero column info */
4779   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4780   bi[0] = 0;
4781 
4782   /* create and initialize a linked list */
4783   nlnk = N+1;
4784   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4785 
4786   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4787   len  = ai[owners[rank+1]] - ai[owners[rank]];
4788   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4789 
4790   current_space = free_space;
4791 
4792   /* determine symbolic info for each local row */
4793   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4794 
4795   for (k=0; k<merge->nrecv; k++) {
4796     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4797     nrows       = *buf_ri_k[k];
4798     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4799     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4800   }
4801 
4802   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4803   len  = 0;
4804   for (i=0; i<m; i++) {
4805     bnzi = 0;
4806     /* add local non-zero cols of this proc's seqmat into lnk */
4807     arow  = owners[rank] + i;
4808     anzi  = ai[arow+1] - ai[arow];
4809     aj    = a->j + ai[arow];
4810     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4811     bnzi += nlnk;
4812     /* add received col data into lnk */
4813     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4814       if (i == *nextrow[k]) { /* i-th row */
4815         anzi  = *(nextai[k]+1) - *nextai[k];
4816         aj    = buf_rj[k] + *nextai[k];
4817         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4818         bnzi += nlnk;
4819         nextrow[k]++; nextai[k]++;
4820       }
4821     }
4822     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4823 
4824     /* if free space is not available, make more free space */
4825     if (current_space->local_remaining<bnzi) {
4826       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4827       nspacedouble++;
4828     }
4829     /* copy data into free space, then initialize lnk */
4830     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4831     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4832 
4833     current_space->array           += bnzi;
4834     current_space->local_used      += bnzi;
4835     current_space->local_remaining -= bnzi;
4836 
4837     bi[i+1] = bi[i] + bnzi;
4838   }
4839 
4840   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4841 
4842   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4843   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4844   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4845 
4846   /* create symbolic parallel matrix B_mpi */
4847   /*---------------------------------------*/
4848   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4849   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4850   if (n==PETSC_DECIDE) {
4851     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4852   } else {
4853     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4854   }
4855   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4856   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4857   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4858   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4859   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4860 
4861   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4862   B_mpi->assembled    = PETSC_FALSE;
4863   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4864   merge->bi           = bi;
4865   merge->bj           = bj;
4866   merge->buf_ri       = buf_ri;
4867   merge->buf_rj       = buf_rj;
4868   merge->coi          = NULL;
4869   merge->coj          = NULL;
4870   merge->owners_co    = NULL;
4871 
4872   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4873 
4874   /* attach the supporting struct to B_mpi for reuse */
4875   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4876   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4877   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4878   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4879   *mpimat = B_mpi;
4880 
4881   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4882   PetscFunctionReturn(0);
4883 }
4884 
4885 #undef __FUNCT__
4886 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4887 /*@C
4888       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4889                  matrices from each processor
4890 
4891     Collective on MPI_Comm
4892 
4893    Input Parameters:
4894 +    comm - the communicators the parallel matrix will live on
4895 .    seqmat - the input sequential matrices
4896 .    m - number of local rows (or PETSC_DECIDE)
4897 .    n - number of local columns (or PETSC_DECIDE)
4898 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4899 
4900    Output Parameter:
4901 .    mpimat - the parallel matrix generated
4902 
4903     Level: advanced
4904 
4905    Notes:
4906      The dimensions of the sequential matrix in each processor MUST be the same.
4907      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4908      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4909 @*/
4910 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4911 {
4912   PetscErrorCode ierr;
4913   PetscMPIInt    size;
4914 
4915   PetscFunctionBegin;
4916   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4917   if (size == 1) {
4918     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4919     if (scall == MAT_INITIAL_MATRIX) {
4920       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4921     } else {
4922       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4923     }
4924     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4925     PetscFunctionReturn(0);
4926   }
4927   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4928   if (scall == MAT_INITIAL_MATRIX) {
4929     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4930   }
4931   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4932   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4933   PetscFunctionReturn(0);
4934 }
4935 
4936 #undef __FUNCT__
4937 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4938 /*@
4939      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4940           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4941           with MatGetSize()
4942 
4943     Not Collective
4944 
4945    Input Parameters:
4946 +    A - the matrix
4947 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4948 
4949    Output Parameter:
4950 .    A_loc - the local sequential matrix generated
4951 
4952     Level: developer
4953 
4954 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4955 
4956 @*/
4957 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4958 {
4959   PetscErrorCode ierr;
4960   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4961   Mat_SeqAIJ     *mat,*a,*b;
4962   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4963   MatScalar      *aa,*ba,*cam;
4964   PetscScalar    *ca;
4965   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4966   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4967   PetscBool      match;
4968 
4969   PetscFunctionBegin;
4970   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4971   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4972   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4973   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4974   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4975   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4976   aa = a->a; ba = b->a;
4977   if (scall == MAT_INITIAL_MATRIX) {
4978     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4979     ci[0] = 0;
4980     for (i=0; i<am; i++) {
4981       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4982     }
4983     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4984     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4985     k    = 0;
4986     for (i=0; i<am; i++) {
4987       ncols_o = bi[i+1] - bi[i];
4988       ncols_d = ai[i+1] - ai[i];
4989       /* off-diagonal portion of A */
4990       for (jo=0; jo<ncols_o; jo++) {
4991         col = cmap[*bj];
4992         if (col >= cstart) break;
4993         cj[k]   = col; bj++;
4994         ca[k++] = *ba++;
4995       }
4996       /* diagonal portion of A */
4997       for (j=0; j<ncols_d; j++) {
4998         cj[k]   = cstart + *aj++;
4999         ca[k++] = *aa++;
5000       }
5001       /* off-diagonal portion of A */
5002       for (j=jo; j<ncols_o; j++) {
5003         cj[k]   = cmap[*bj++];
5004         ca[k++] = *ba++;
5005       }
5006     }
5007     /* put together the new matrix */
5008     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5009     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5010     /* Since these are PETSc arrays, change flags to free them as necessary. */
5011     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5012     mat->free_a  = PETSC_TRUE;
5013     mat->free_ij = PETSC_TRUE;
5014     mat->nonew   = 0;
5015   } else if (scall == MAT_REUSE_MATRIX) {
5016     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5017     ci = mat->i; cj = mat->j; cam = mat->a;
5018     for (i=0; i<am; i++) {
5019       /* off-diagonal portion of A */
5020       ncols_o = bi[i+1] - bi[i];
5021       for (jo=0; jo<ncols_o; jo++) {
5022         col = cmap[*bj];
5023         if (col >= cstart) break;
5024         *cam++ = *ba++; bj++;
5025       }
5026       /* diagonal portion of A */
5027       ncols_d = ai[i+1] - ai[i];
5028       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5029       /* off-diagonal portion of A */
5030       for (j=jo; j<ncols_o; j++) {
5031         *cam++ = *ba++; bj++;
5032       }
5033     }
5034   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5035   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5036   PetscFunctionReturn(0);
5037 }
5038 
5039 #undef __FUNCT__
5040 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5041 /*@C
5042      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5043 
5044     Not Collective
5045 
5046    Input Parameters:
5047 +    A - the matrix
5048 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5049 -    row, col - index sets of rows and columns to extract (or NULL)
5050 
5051    Output Parameter:
5052 .    A_loc - the local sequential matrix generated
5053 
5054     Level: developer
5055 
5056 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5057 
5058 @*/
5059 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5060 {
5061   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5062   PetscErrorCode ierr;
5063   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5064   IS             isrowa,iscola;
5065   Mat            *aloc;
5066   PetscBool      match;
5067 
5068   PetscFunctionBegin;
5069   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5070   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5071   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5072   if (!row) {
5073     start = A->rmap->rstart; end = A->rmap->rend;
5074     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5075   } else {
5076     isrowa = *row;
5077   }
5078   if (!col) {
5079     start = A->cmap->rstart;
5080     cmap  = a->garray;
5081     nzA   = a->A->cmap->n;
5082     nzB   = a->B->cmap->n;
5083     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5084     ncols = 0;
5085     for (i=0; i<nzB; i++) {
5086       if (cmap[i] < start) idx[ncols++] = cmap[i];
5087       else break;
5088     }
5089     imark = i;
5090     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5091     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5092     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5093   } else {
5094     iscola = *col;
5095   }
5096   if (scall != MAT_INITIAL_MATRIX) {
5097     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5098     aloc[0] = *A_loc;
5099   }
5100   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5101   *A_loc = aloc[0];
5102   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5103   if (!row) {
5104     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5105   }
5106   if (!col) {
5107     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5108   }
5109   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5110   PetscFunctionReturn(0);
5111 }
5112 
5113 #undef __FUNCT__
5114 #define __FUNCT__ "MatGetBrowsOfAcols"
5115 /*@C
5116     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5117 
5118     Collective on Mat
5119 
5120    Input Parameters:
5121 +    A,B - the matrices in mpiaij format
5122 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5123 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5124 
5125    Output Parameter:
5126 +    rowb, colb - index sets of rows and columns of B to extract
5127 -    B_seq - the sequential matrix generated
5128 
5129     Level: developer
5130 
5131 @*/
5132 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5133 {
5134   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5135   PetscErrorCode ierr;
5136   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5137   IS             isrowb,iscolb;
5138   Mat            *bseq=NULL;
5139 
5140   PetscFunctionBegin;
5141   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5142     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5143   }
5144   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5145 
5146   if (scall == MAT_INITIAL_MATRIX) {
5147     start = A->cmap->rstart;
5148     cmap  = a->garray;
5149     nzA   = a->A->cmap->n;
5150     nzB   = a->B->cmap->n;
5151     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5152     ncols = 0;
5153     for (i=0; i<nzB; i++) {  /* row < local row index */
5154       if (cmap[i] < start) idx[ncols++] = cmap[i];
5155       else break;
5156     }
5157     imark = i;
5158     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5159     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5160     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5161     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5162   } else {
5163     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5164     isrowb  = *rowb; iscolb = *colb;
5165     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5166     bseq[0] = *B_seq;
5167   }
5168   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5169   *B_seq = bseq[0];
5170   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5171   if (!rowb) {
5172     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5173   } else {
5174     *rowb = isrowb;
5175   }
5176   if (!colb) {
5177     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5178   } else {
5179     *colb = iscolb;
5180   }
5181   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5182   PetscFunctionReturn(0);
5183 }
5184 
5185 #undef __FUNCT__
5186 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5187 /*
5188     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5189     of the OFF-DIAGONAL portion of local A
5190 
5191     Collective on Mat
5192 
5193    Input Parameters:
5194 +    A,B - the matrices in mpiaij format
5195 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5196 
5197    Output Parameter:
5198 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5199 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5200 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5201 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5202 
5203     Level: developer
5204 
5205 */
5206 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5207 {
5208   VecScatter_MPI_General *gen_to,*gen_from;
5209   PetscErrorCode         ierr;
5210   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5211   Mat_SeqAIJ             *b_oth;
5212   VecScatter             ctx =a->Mvctx;
5213   MPI_Comm               comm;
5214   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5215   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5216   PetscScalar            *rvalues,*svalues;
5217   MatScalar              *b_otha,*bufa,*bufA;
5218   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5219   MPI_Request            *rwaits = NULL,*swaits = NULL;
5220   MPI_Status             *sstatus,rstatus;
5221   PetscMPIInt            jj;
5222   PetscInt               *cols,sbs,rbs;
5223   PetscScalar            *vals;
5224 
5225   PetscFunctionBegin;
5226   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5227   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5228     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5229   }
5230   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5231   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5232 
5233   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5234   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5235   rvalues  = gen_from->values; /* holds the length of receiving row */
5236   svalues  = gen_to->values;   /* holds the length of sending row */
5237   nrecvs   = gen_from->n;
5238   nsends   = gen_to->n;
5239 
5240   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5241   srow    = gen_to->indices;    /* local row index to be sent */
5242   sstarts = gen_to->starts;
5243   sprocs  = gen_to->procs;
5244   sstatus = gen_to->sstatus;
5245   sbs     = gen_to->bs;
5246   rstarts = gen_from->starts;
5247   rprocs  = gen_from->procs;
5248   rbs     = gen_from->bs;
5249 
5250   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5251   if (scall == MAT_INITIAL_MATRIX) {
5252     /* i-array */
5253     /*---------*/
5254     /*  post receives */
5255     for (i=0; i<nrecvs; i++) {
5256       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5257       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5258       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5259     }
5260 
5261     /* pack the outgoing message */
5262     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5263 
5264     sstartsj[0] = 0;
5265     rstartsj[0] = 0;
5266     len         = 0; /* total length of j or a array to be sent */
5267     k           = 0;
5268     for (i=0; i<nsends; i++) {
5269       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5270       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5271       for (j=0; j<nrows; j++) {
5272         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5273         for (l=0; l<sbs; l++) {
5274           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5275 
5276           rowlen[j*sbs+l] = ncols;
5277 
5278           len += ncols;
5279           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5280         }
5281         k++;
5282       }
5283       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5284 
5285       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5286     }
5287     /* recvs and sends of i-array are completed */
5288     i = nrecvs;
5289     while (i--) {
5290       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5291     }
5292     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5293 
5294     /* allocate buffers for sending j and a arrays */
5295     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5296     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5297 
5298     /* create i-array of B_oth */
5299     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5300 
5301     b_othi[0] = 0;
5302     len       = 0; /* total length of j or a array to be received */
5303     k         = 0;
5304     for (i=0; i<nrecvs; i++) {
5305       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5306       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5307       for (j=0; j<nrows; j++) {
5308         b_othi[k+1] = b_othi[k] + rowlen[j];
5309         len        += rowlen[j]; k++;
5310       }
5311       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5312     }
5313 
5314     /* allocate space for j and a arrrays of B_oth */
5315     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5316     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5317 
5318     /* j-array */
5319     /*---------*/
5320     /*  post receives of j-array */
5321     for (i=0; i<nrecvs; i++) {
5322       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5323       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5324     }
5325 
5326     /* pack the outgoing message j-array */
5327     k = 0;
5328     for (i=0; i<nsends; i++) {
5329       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5330       bufJ  = bufj+sstartsj[i];
5331       for (j=0; j<nrows; j++) {
5332         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5333         for (ll=0; ll<sbs; ll++) {
5334           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5335           for (l=0; l<ncols; l++) {
5336             *bufJ++ = cols[l];
5337           }
5338           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5339         }
5340       }
5341       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5342     }
5343 
5344     /* recvs and sends of j-array are completed */
5345     i = nrecvs;
5346     while (i--) {
5347       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5348     }
5349     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5350   } else if (scall == MAT_REUSE_MATRIX) {
5351     sstartsj = *startsj_s;
5352     rstartsj = *startsj_r;
5353     bufa     = *bufa_ptr;
5354     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5355     b_otha   = b_oth->a;
5356   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5357 
5358   /* a-array */
5359   /*---------*/
5360   /*  post receives of a-array */
5361   for (i=0; i<nrecvs; i++) {
5362     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5363     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5364   }
5365 
5366   /* pack the outgoing message a-array */
5367   k = 0;
5368   for (i=0; i<nsends; i++) {
5369     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5370     bufA  = bufa+sstartsj[i];
5371     for (j=0; j<nrows; j++) {
5372       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5373       for (ll=0; ll<sbs; ll++) {
5374         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5375         for (l=0; l<ncols; l++) {
5376           *bufA++ = vals[l];
5377         }
5378         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5379       }
5380     }
5381     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5382   }
5383   /* recvs and sends of a-array are completed */
5384   i = nrecvs;
5385   while (i--) {
5386     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5387   }
5388   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5389   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5390 
5391   if (scall == MAT_INITIAL_MATRIX) {
5392     /* put together the new matrix */
5393     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5394 
5395     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5396     /* Since these are PETSc arrays, change flags to free them as necessary. */
5397     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5398     b_oth->free_a  = PETSC_TRUE;
5399     b_oth->free_ij = PETSC_TRUE;
5400     b_oth->nonew   = 0;
5401 
5402     ierr = PetscFree(bufj);CHKERRQ(ierr);
5403     if (!startsj_s || !bufa_ptr) {
5404       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5405       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5406     } else {
5407       *startsj_s = sstartsj;
5408       *startsj_r = rstartsj;
5409       *bufa_ptr  = bufa;
5410     }
5411   }
5412   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5413   PetscFunctionReturn(0);
5414 }
5415 
5416 #undef __FUNCT__
5417 #define __FUNCT__ "MatGetCommunicationStructs"
5418 /*@C
5419   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5420 
5421   Not Collective
5422 
5423   Input Parameters:
5424 . A - The matrix in mpiaij format
5425 
5426   Output Parameter:
5427 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5428 . colmap - A map from global column index to local index into lvec
5429 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5430 
5431   Level: developer
5432 
5433 @*/
5434 #if defined(PETSC_USE_CTABLE)
5435 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5436 #else
5437 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5438 #endif
5439 {
5440   Mat_MPIAIJ *a;
5441 
5442   PetscFunctionBegin;
5443   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5444   PetscValidPointer(lvec, 2);
5445   PetscValidPointer(colmap, 3);
5446   PetscValidPointer(multScatter, 4);
5447   a = (Mat_MPIAIJ*) A->data;
5448   if (lvec) *lvec = a->lvec;
5449   if (colmap) *colmap = a->colmap;
5450   if (multScatter) *multScatter = a->Mvctx;
5451   PetscFunctionReturn(0);
5452 }
5453 
5454 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5455 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5456 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5457 
5458 #undef __FUNCT__
5459 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5460 /*
5461     Computes (B'*A')' since computing B*A directly is untenable
5462 
5463                n                       p                          p
5464         (              )       (              )         (                  )
5465       m (      A       )  *  n (       B      )   =   m (         C        )
5466         (              )       (              )         (                  )
5467 
5468 */
5469 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5470 {
5471   PetscErrorCode ierr;
5472   Mat            At,Bt,Ct;
5473 
5474   PetscFunctionBegin;
5475   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5476   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5477   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5478   ierr = MatDestroy(&At);CHKERRQ(ierr);
5479   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5480   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5481   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5482   PetscFunctionReturn(0);
5483 }
5484 
5485 #undef __FUNCT__
5486 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5487 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5488 {
5489   PetscErrorCode ierr;
5490   PetscInt       m=A->rmap->n,n=B->cmap->n;
5491   Mat            Cmat;
5492 
5493   PetscFunctionBegin;
5494   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5495   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5496   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5497   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5498   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5499   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5500   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5501   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5502 
5503   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5504 
5505   *C = Cmat;
5506   PetscFunctionReturn(0);
5507 }
5508 
5509 /* ----------------------------------------------------------------*/
5510 #undef __FUNCT__
5511 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5512 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5513 {
5514   PetscErrorCode ierr;
5515 
5516   PetscFunctionBegin;
5517   if (scall == MAT_INITIAL_MATRIX) {
5518     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5519     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5520     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5521   }
5522   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5523   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5524   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5525   PetscFunctionReturn(0);
5526 }
5527 
5528 #if defined(PETSC_HAVE_MUMPS)
5529 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5530 #endif
5531 #if defined(PETSC_HAVE_PASTIX)
5532 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5533 #endif
5534 #if defined(PETSC_HAVE_SUPERLU_DIST)
5535 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5536 #endif
5537 #if defined(PETSC_HAVE_CLIQUE)
5538 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5539 #endif
5540 
5541 /*MC
5542    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5543 
5544    Options Database Keys:
5545 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5546 
5547   Level: beginner
5548 
5549 .seealso: MatCreateAIJ()
5550 M*/
5551 
5552 #undef __FUNCT__
5553 #define __FUNCT__ "MatCreate_MPIAIJ"
5554 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5555 {
5556   Mat_MPIAIJ     *b;
5557   PetscErrorCode ierr;
5558   PetscMPIInt    size;
5559 
5560   PetscFunctionBegin;
5561   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5562 
5563   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5564   B->data       = (void*)b;
5565   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5566   B->assembled  = PETSC_FALSE;
5567   B->insertmode = NOT_SET_VALUES;
5568   b->size       = size;
5569 
5570   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5571 
5572   /* build cache for off array entries formed */
5573   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5574 
5575   b->donotstash  = PETSC_FALSE;
5576   b->colmap      = 0;
5577   b->garray      = 0;
5578   b->roworiented = PETSC_TRUE;
5579 
5580   /* stuff used for matrix vector multiply */
5581   b->lvec  = NULL;
5582   b->Mvctx = NULL;
5583 
5584   /* stuff for MatGetRow() */
5585   b->rowindices   = 0;
5586   b->rowvalues    = 0;
5587   b->getrowactive = PETSC_FALSE;
5588 
5589   /* flexible pointer used in CUSP/CUSPARSE classes */
5590   b->spptr = NULL;
5591 
5592 #if defined(PETSC_HAVE_MUMPS)
5593   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5594 #endif
5595 #if defined(PETSC_HAVE_PASTIX)
5596   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5597 #endif
5598 #if defined(PETSC_HAVE_SUPERLU_DIST)
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5600 #endif
5601 #if defined(PETSC_HAVE_CLIQUE)
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5603 #endif
5604   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5607   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5617   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5618   PetscFunctionReturn(0);
5619 }
5620 
5621 #undef __FUNCT__
5622 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5623 /*@
5624      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5625          and "off-diagonal" part of the matrix in CSR format.
5626 
5627    Collective on MPI_Comm
5628 
5629    Input Parameters:
5630 +  comm - MPI communicator
5631 .  m - number of local rows (Cannot be PETSC_DECIDE)
5632 .  n - This value should be the same as the local size used in creating the
5633        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5634        calculated if N is given) For square matrices n is almost always m.
5635 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5636 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5637 .   i - row indices for "diagonal" portion of matrix
5638 .   j - column indices
5639 .   a - matrix values
5640 .   oi - row indices for "off-diagonal" portion of matrix
5641 .   oj - column indices
5642 -   oa - matrix values
5643 
5644    Output Parameter:
5645 .   mat - the matrix
5646 
5647    Level: advanced
5648 
5649    Notes:
5650        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5651        must free the arrays once the matrix has been destroyed and not before.
5652 
5653        The i and j indices are 0 based
5654 
5655        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5656 
5657        This sets local rows and cannot be used to set off-processor values.
5658 
5659        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5660        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5661        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5662        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5663        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5664        communication if it is known that only local entries will be set.
5665 
5666 .keywords: matrix, aij, compressed row, sparse, parallel
5667 
5668 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5669           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5670 @*/
5671 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5672 {
5673   PetscErrorCode ierr;
5674   Mat_MPIAIJ     *maij;
5675 
5676   PetscFunctionBegin;
5677   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5678   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5679   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5680   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5681   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5682   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5683   maij = (Mat_MPIAIJ*) (*mat)->data;
5684 
5685   (*mat)->preallocated = PETSC_TRUE;
5686 
5687   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5688   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5689 
5690   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5691   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5692 
5693   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5694   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5695   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5696   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5697 
5698   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5699   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5700   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5701   PetscFunctionReturn(0);
5702 }
5703 
5704 /*
5705     Special version for direct calls from Fortran
5706 */
5707 #include <petsc-private/fortranimpl.h>
5708 
5709 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5710 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5711 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5712 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5713 #endif
5714 
5715 /* Change these macros so can be used in void function */
5716 #undef CHKERRQ
5717 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5718 #undef SETERRQ2
5719 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5720 #undef SETERRQ3
5721 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5722 #undef SETERRQ
5723 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5724 
5725 #undef __FUNCT__
5726 #define __FUNCT__ "matsetvaluesmpiaij_"
5727 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5728 {
5729   Mat            mat  = *mmat;
5730   PetscInt       m    = *mm, n = *mn;
5731   InsertMode     addv = *maddv;
5732   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5733   PetscScalar    value;
5734   PetscErrorCode ierr;
5735 
5736   MatCheckPreallocated(mat,1);
5737   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5738 
5739 #if defined(PETSC_USE_DEBUG)
5740   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5741 #endif
5742   {
5743     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5744     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5745     PetscBool roworiented = aij->roworiented;
5746 
5747     /* Some Variables required in the macro */
5748     Mat        A                 = aij->A;
5749     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5750     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5751     MatScalar  *aa               = a->a;
5752     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5753     Mat        B                 = aij->B;
5754     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5755     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5756     MatScalar  *ba               = b->a;
5757 
5758     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5759     PetscInt  nonew = a->nonew;
5760     MatScalar *ap1,*ap2;
5761 
5762     PetscFunctionBegin;
5763     for (i=0; i<m; i++) {
5764       if (im[i] < 0) continue;
5765 #if defined(PETSC_USE_DEBUG)
5766       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5767 #endif
5768       if (im[i] >= rstart && im[i] < rend) {
5769         row      = im[i] - rstart;
5770         lastcol1 = -1;
5771         rp1      = aj + ai[row];
5772         ap1      = aa + ai[row];
5773         rmax1    = aimax[row];
5774         nrow1    = ailen[row];
5775         low1     = 0;
5776         high1    = nrow1;
5777         lastcol2 = -1;
5778         rp2      = bj + bi[row];
5779         ap2      = ba + bi[row];
5780         rmax2    = bimax[row];
5781         nrow2    = bilen[row];
5782         low2     = 0;
5783         high2    = nrow2;
5784 
5785         for (j=0; j<n; j++) {
5786           if (roworiented) value = v[i*n+j];
5787           else value = v[i+j*m];
5788           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5789           if (in[j] >= cstart && in[j] < cend) {
5790             col = in[j] - cstart;
5791             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5792           } else if (in[j] < 0) continue;
5793 #if defined(PETSC_USE_DEBUG)
5794           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5795 #endif
5796           else {
5797             if (mat->was_assembled) {
5798               if (!aij->colmap) {
5799                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5800               }
5801 #if defined(PETSC_USE_CTABLE)
5802               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5803               col--;
5804 #else
5805               col = aij->colmap[in[j]] - 1;
5806 #endif
5807               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5808                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5809                 col  =  in[j];
5810                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5811                 B     = aij->B;
5812                 b     = (Mat_SeqAIJ*)B->data;
5813                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5814                 rp2   = bj + bi[row];
5815                 ap2   = ba + bi[row];
5816                 rmax2 = bimax[row];
5817                 nrow2 = bilen[row];
5818                 low2  = 0;
5819                 high2 = nrow2;
5820                 bm    = aij->B->rmap->n;
5821                 ba    = b->a;
5822               }
5823             } else col = in[j];
5824             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5825           }
5826         }
5827       } else if (!aij->donotstash) {
5828         if (roworiented) {
5829           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5830         } else {
5831           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5832         }
5833       }
5834     }
5835   }
5836   PetscFunctionReturnVoid();
5837 }
5838 
5839