xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision 33d57670fcdbf57d9203d482728f549b81403a0e)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
206     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
207     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
208     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
209     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
210     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
211     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
212 
213     rowners[0] = 0;
214     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
215     rstart = rowners[rank];
216     rend   = rowners[rank+1];
217     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
218     if (!rank) {
219       gmata = (Mat_SeqAIJ*) gmat->data;
220       /* send row lengths to all processors */
221       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
222       for (i=1; i<size; i++) {
223         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
224       }
225       /* determine number diagonal and off-diagonal counts */
226       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
227       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
228       jj   = 0;
229       for (i=0; i<m; i++) {
230         for (j=0; j<dlens[i]; j++) {
231           if (gmata->j[jj] < rstart) ld[i]++;
232           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
233           jj++;
234         }
235       }
236       /* send column indices to other processes */
237       for (i=1; i<size; i++) {
238         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
239         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
240         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242 
243       /* send numerical values to other processes */
244       for (i=1; i<size; i++) {
245         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
246         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
247       }
248       gmataa = gmata->a;
249       gmataj = gmata->j;
250 
251     } else {
252       /* receive row lengths */
253       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
254       /* receive column indices */
255       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
256       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
257       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
258       /* determine number diagonal and off-diagonal counts */
259       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
260       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
261       jj   = 0;
262       for (i=0; i<m; i++) {
263         for (j=0; j<dlens[i]; j++) {
264           if (gmataj[jj] < rstart) ld[i]++;
265           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
266           jj++;
267         }
268       }
269       /* receive numerical values */
270       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
271       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
272     }
273     /* set preallocation */
274     for (i=0; i<m; i++) {
275       dlens[i] -= olens[i];
276     }
277     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
278     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
279 
280     for (i=0; i<m; i++) {
281       dlens[i] += olens[i];
282     }
283     cnt = 0;
284     for (i=0; i<m; i++) {
285       row  = rstart + i;
286       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
287       cnt += dlens[i];
288     }
289     if (rank) {
290       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
291     }
292     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
293     ierr = PetscFree(rowners);CHKERRQ(ierr);
294 
295     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
296 
297     *inmat = mat;
298   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
299     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
300     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
301     mat  = *inmat;
302     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
303     if (!rank) {
304       /* send numerical values to other processes */
305       gmata  = (Mat_SeqAIJ*) gmat->data;
306       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
307       gmataa = gmata->a;
308       for (i=1; i<size; i++) {
309         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
310         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
311       }
312       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
313     } else {
314       /* receive numerical values from process 0*/
315       nz   = Ad->nz + Ao->nz;
316       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
317       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
318     }
319     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
320     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
321     ad = Ad->a;
322     ao = Ao->a;
323     if (mat->rmap->n) {
324       i  = 0;
325       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
326       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
327     }
328     for (i=1; i<mat->rmap->n; i++) {
329       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
330       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
331     }
332     i--;
333     if (mat->rmap->n) {
334       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
335     }
336     if (rank) {
337       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
338     }
339   }
340   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
341   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
342   PetscFunctionReturn(0);
343 }
344 
345 /*
346   Local utility routine that creates a mapping from the global column
347 number to the local number in the off-diagonal part of the local
348 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
349 a slightly higher hash table cost; without it it is not scalable (each processor
350 has an order N integer array but is fast to acess.
351 */
352 #undef __FUNCT__
353 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
354 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
355 {
356   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
357   PetscErrorCode ierr;
358   PetscInt       n = aij->B->cmap->n,i;
359 
360   PetscFunctionBegin;
361   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
362 #if defined(PETSC_USE_CTABLE)
363   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
364   for (i=0; i<n; i++) {
365     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
366   }
367 #else
368   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
369   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
370   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
371 #endif
372   PetscFunctionReturn(0);
373 }
374 
375 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
376 { \
377     if (col <= lastcol1)  low1 = 0;     \
378     else                 high1 = nrow1; \
379     lastcol1 = col;\
380     while (high1-low1 > 5) { \
381       t = (low1+high1)/2; \
382       if (rp1[t] > col) high1 = t; \
383       else              low1  = t; \
384     } \
385       for (_i=low1; _i<high1; _i++) { \
386         if (rp1[_i] > col) break; \
387         if (rp1[_i] == col) { \
388           if (addv == ADD_VALUES) ap1[_i] += value;   \
389           else                    ap1[_i] = value; \
390           goto a_noinsert; \
391         } \
392       }  \
393       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
394       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
395       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
396       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
397       N = nrow1++ - 1; a->nz++; high1++; \
398       /* shift up all the later entries in this row */ \
399       for (ii=N; ii>=_i; ii--) { \
400         rp1[ii+1] = rp1[ii]; \
401         ap1[ii+1] = ap1[ii]; \
402       } \
403       rp1[_i] = col;  \
404       ap1[_i] = value;  \
405       a_noinsert: ; \
406       ailen[row] = nrow1; \
407 }
408 
409 
410 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
411   { \
412     if (col <= lastcol2) low2 = 0;                        \
413     else high2 = nrow2;                                   \
414     lastcol2 = col;                                       \
415     while (high2-low2 > 5) {                              \
416       t = (low2+high2)/2;                                 \
417       if (rp2[t] > col) high2 = t;                        \
418       else             low2  = t;                         \
419     }                                                     \
420     for (_i=low2; _i<high2; _i++) {                       \
421       if (rp2[_i] > col) break;                           \
422       if (rp2[_i] == col) {                               \
423         if (addv == ADD_VALUES) ap2[_i] += value;         \
424         else                    ap2[_i] = value;          \
425         goto b_noinsert;                                  \
426       }                                                   \
427     }                                                     \
428     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
429     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
430     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
431     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
432     N = nrow2++ - 1; b->nz++; high2++;                    \
433     /* shift up all the later entries in this row */      \
434     for (ii=N; ii>=_i; ii--) {                            \
435       rp2[ii+1] = rp2[ii];                                \
436       ap2[ii+1] = ap2[ii];                                \
437     }                                                     \
438     rp2[_i] = col;                                        \
439     ap2[_i] = value;                                      \
440     b_noinsert: ;                                         \
441     bilen[row] = nrow2;                                   \
442   }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
446 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
447 {
448   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
449   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
450   PetscErrorCode ierr;
451   PetscInt       l,*garray = mat->garray,diag;
452 
453   PetscFunctionBegin;
454   /* code only works for square matrices A */
455 
456   /* find size of row to the left of the diagonal part */
457   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
458   row  = row - diag;
459   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
460     if (garray[b->j[b->i[row]+l]] > diag) break;
461   }
462   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
463 
464   /* diagonal part */
465   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
466 
467   /* right of diagonal part */
468   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatSetValues_MPIAIJ"
474 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
475 {
476   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
477   PetscScalar    value;
478   PetscErrorCode ierr;
479   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
480   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
481   PetscBool      roworiented = aij->roworiented;
482 
483   /* Some Variables required in the macro */
484   Mat        A                 = aij->A;
485   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
486   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
487   MatScalar  *aa               = a->a;
488   PetscBool  ignorezeroentries = a->ignorezeroentries;
489   Mat        B                 = aij->B;
490   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
491   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
492   MatScalar  *ba               = b->a;
493 
494   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
495   PetscInt  nonew;
496   MatScalar *ap1,*ap2;
497 
498   PetscFunctionBegin;
499   if (v) PetscValidScalarPointer(v,6);
500   for (i=0; i<m; i++) {
501     if (im[i] < 0) continue;
502 #if defined(PETSC_USE_DEBUG)
503     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
504 #endif
505     if (im[i] >= rstart && im[i] < rend) {
506       row      = im[i] - rstart;
507       lastcol1 = -1;
508       rp1      = aj + ai[row];
509       ap1      = aa + ai[row];
510       rmax1    = aimax[row];
511       nrow1    = ailen[row];
512       low1     = 0;
513       high1    = nrow1;
514       lastcol2 = -1;
515       rp2      = bj + bi[row];
516       ap2      = ba + bi[row];
517       rmax2    = bimax[row];
518       nrow2    = bilen[row];
519       low2     = 0;
520       high2    = nrow2;
521 
522       for (j=0; j<n; j++) {
523         if (v) {
524           if (roworiented) value = v[i*n+j];
525           else             value = v[i+j*m];
526         } else value = 0.0;
527         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
528         if (in[j] >= cstart && in[j] < cend) {
529           col   = in[j] - cstart;
530           nonew = a->nonew;
531           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
532         } else if (in[j] < 0) continue;
533 #if defined(PETSC_USE_DEBUG)
534         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
535 #endif
536         else {
537           if (mat->was_assembled) {
538             if (!aij->colmap) {
539               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
540             }
541 #if defined(PETSC_USE_CTABLE)
542             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
543             col--;
544 #else
545             col = aij->colmap[in[j]] - 1;
546 #endif
547             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
548               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
549               col  =  in[j];
550               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
551               B     = aij->B;
552               b     = (Mat_SeqAIJ*)B->data;
553               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
554               rp2   = bj + bi[row];
555               ap2   = ba + bi[row];
556               rmax2 = bimax[row];
557               nrow2 = bilen[row];
558               low2  = 0;
559               high2 = nrow2;
560               bm    = aij->B->rmap->n;
561               ba    = b->a;
562             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
563           } else col = in[j];
564           nonew = b->nonew;
565           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
566         }
567       }
568     } else {
569       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
570       if (!aij->donotstash) {
571         mat->assembled = PETSC_FALSE;
572         if (roworiented) {
573           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
574         } else {
575           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
576         }
577       }
578     }
579   }
580   PetscFunctionReturn(0);
581 }
582 
583 #undef __FUNCT__
584 #define __FUNCT__ "MatGetValues_MPIAIJ"
585 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
586 {
587   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
588   PetscErrorCode ierr;
589   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
590   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
591 
592   PetscFunctionBegin;
593   for (i=0; i<m; i++) {
594     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
595     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
596     if (idxm[i] >= rstart && idxm[i] < rend) {
597       row = idxm[i] - rstart;
598       for (j=0; j<n; j++) {
599         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
600         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
601         if (idxn[j] >= cstart && idxn[j] < cend) {
602           col  = idxn[j] - cstart;
603           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
604         } else {
605           if (!aij->colmap) {
606             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
607           }
608 #if defined(PETSC_USE_CTABLE)
609           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
610           col--;
611 #else
612           col = aij->colmap[idxn[j]] - 1;
613 #endif
614           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
615           else {
616             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
617           }
618         }
619       }
620     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
621   }
622   PetscFunctionReturn(0);
623 }
624 
625 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
626 
627 #undef __FUNCT__
628 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
629 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
630 {
631   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
632   PetscErrorCode ierr;
633   PetscInt       nstash,reallocs;
634   InsertMode     addv;
635 
636   PetscFunctionBegin;
637   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
638 
639   /* make sure all processors are either in INSERTMODE or ADDMODE */
640   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
641   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
642   mat->insertmode = addv; /* in case this processor had no cache */
643 
644   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
645   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
646   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNCT__
651 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
652 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
653 {
654   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
655   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
656   PetscErrorCode ierr;
657   PetscMPIInt    n;
658   PetscInt       i,j,rstart,ncols,flg;
659   PetscInt       *row,*col;
660   PetscBool      other_disassembled;
661   PetscScalar    *val;
662   InsertMode     addv = mat->insertmode;
663 
664   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
665 
666   PetscFunctionBegin;
667   if (!aij->donotstash && !mat->nooffprocentries) {
668     while (1) {
669       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
670       if (!flg) break;
671 
672       for (i=0; i<n; ) {
673         /* Now identify the consecutive vals belonging to the same row */
674         for (j=i,rstart=row[j]; j<n; j++) {
675           if (row[j] != rstart) break;
676         }
677         if (j < n) ncols = j-i;
678         else       ncols = n-i;
679         /* Now assemble all these values with a single function call */
680         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
681 
682         i = j;
683       }
684     }
685     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
686   }
687   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
688   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
689 
690   /* determine if any processor has disassembled, if so we must
691      also disassemble ourselfs, in order that we may reassemble. */
692   /*
693      if nonzero structure of submatrix B cannot change then we know that
694      no processor disassembled thus we can skip this stuff
695   */
696   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
697     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
698     if (mat->was_assembled && !other_disassembled) {
699       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
700     }
701   }
702   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
703     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
704   }
705   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
706   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
708 
709   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
710 
711   aij->rowvalues = 0;
712 
713   /* used by MatAXPY() */
714   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
715   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
716 
717   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
718   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNCT__
723 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
724 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
725 {
726   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
727   PetscErrorCode ierr;
728 
729   PetscFunctionBegin;
730   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
731   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
732   PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "MatZeroRows_MPIAIJ"
737 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
738 {
739   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
740   PetscInt      *owners = A->rmap->range;
741   PetscInt       n      = A->rmap->n;
742   PetscMPIInt    size   = mat->size;
743   PetscSF        sf;
744   PetscInt      *lrows;
745   PetscSFNode   *rrows;
746   PetscInt       lastidx = -1, r, p = 0, len = 0;
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750   /* Create SF where leaves are input rows and roots are owned rows */
751   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
752   for (r = 0; r < n; ++r) lrows[r] = -1;
753   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
754   for (r = 0; r < N; ++r) {
755     const PetscInt idx   = rows[r];
756     PetscBool      found = PETSC_FALSE;
757     /* Trick for efficient searching for sorted rows */
758     if (lastidx > idx) p = 0;
759     lastidx = idx;
760     for (; p < size; ++p) {
761       if (idx >= owners[p] && idx < owners[p+1]) {
762         rrows[r].rank  = p;
763         rrows[r].index = rows[r] - owners[p];
764         found = PETSC_TRUE;
765         break;
766       }
767     }
768     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
769   }
770   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
771   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
772   /* Collect flags for rows to be zeroed */
773   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
774   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
775   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
776   /* Compress and put in row numbers */
777   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
778   /* fix right hand side if needed */
779   if (x && b) {
780     const PetscScalar *xx;
781     PetscScalar       *bb;
782 
783     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
784     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
785     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
786     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
787     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
788   }
789   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
790   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
791   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
792     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
793   } else if (diag != 0.0) {
794     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
796     for (r = 0; r < len; ++r) {
797       const PetscInt row = lrows[r] + A->rmap->rstart;
798       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
799     }
800     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
801     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
802   } else {
803     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
804   }
805   ierr = PetscFree(lrows);CHKERRQ(ierr);
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
811 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
812 {
813   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
814   PetscErrorCode    ierr;
815   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
816   PetscInt          i,j,r,m,p = 0,len = 0;
817   PetscInt          *lrows,*owners = A->rmap->range;
818   PetscSFNode       *rrows;
819   PetscSF           sf;
820   const PetscScalar *xx;
821   PetscScalar       *bb,*mask;
822   Vec               xmask,lmask;
823   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
824   const PetscInt    *aj, *ii,*ridx;
825   PetscScalar       *aa;
826 #if defined(PETSC_DEBUG)
827   PetscBool found = PETSC_FALSE;
828 #endif
829 
830   PetscFunctionBegin;
831   /* Create SF where leaves are input rows and roots are owned rows */
832   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
833   for (r = 0; r < n; ++r) lrows[r] = -1;
834   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
835   for (r = 0; r < N; ++r) {
836     const PetscInt idx   = rows[r];
837     PetscBool      found = PETSC_FALSE;
838     /* Trick for efficient searching for sorted rows */
839     if (lastidx > idx) p = 0;
840     lastidx = idx;
841     for (; p < size; ++p) {
842       if (idx >= owners[p] && idx < owners[p+1]) {
843         rrows[r].rank  = p;
844         rrows[r].index = rows[r] - owners[p];
845         found = PETSC_TRUE;
846         break;
847       }
848     }
849     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
850   }
851   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
852   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
853   /* Collect flags for rows to be zeroed */
854   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
855   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
856   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
857   /* Compress and put in row numbers */
858   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
859   /* zero diagonal part of matrix */
860   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
861   /* handle off diagonal part of matrix */
862   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
863   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
864   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
865   for (i=0; i<len; i++) bb[lrows[i]] = 1;
866   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
867   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
868   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
869   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
870   if (x) {
871     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
872     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
873     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
874     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
875   }
876   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
877   /* remove zeroed rows of off diagonal matrix */
878   ii = aij->i;
879   for (i=0; i<len; i++) {
880     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
881   }
882   /* loop over all elements of off process part of matrix zeroing removed columns*/
883   if (aij->compressedrow.use) {
884     m    = aij->compressedrow.nrows;
885     ii   = aij->compressedrow.i;
886     ridx = aij->compressedrow.rindex;
887     for (i=0; i<m; i++) {
888       n  = ii[i+1] - ii[i];
889       aj = aij->j + ii[i];
890       aa = aij->a + ii[i];
891 
892       for (j=0; j<n; j++) {
893         if (PetscAbsScalar(mask[*aj])) {
894           if (b) bb[*ridx] -= *aa*xx[*aj];
895           *aa = 0.0;
896         }
897         aa++;
898         aj++;
899       }
900       ridx++;
901     }
902   } else { /* do not use compressed row format */
903     m = l->B->rmap->n;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908       for (j=0; j<n; j++) {
909         if (PetscAbsScalar(mask[*aj])) {
910           if (b) bb[i] -= *aa*xx[*aj];
911           *aa = 0.0;
912         }
913         aa++;
914         aj++;
915       }
916     }
917   }
918   if (x) {
919     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
920     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
921   }
922   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
923   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
924   ierr = PetscFree(lrows);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatMult_MPIAIJ"
930 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
931 {
932   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
933   PetscErrorCode ierr;
934   PetscInt       nt;
935 
936   PetscFunctionBegin;
937   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
938   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
939   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
941   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
948 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
949 {
950   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
951   PetscErrorCode ierr;
952 
953   PetscFunctionBegin;
954   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 #undef __FUNCT__
959 #define __FUNCT__ "MatMultAdd_MPIAIJ"
960 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
961 {
962   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
967   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
968   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
969   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
975 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
976 {
977   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
978   PetscErrorCode ierr;
979   PetscBool      merged;
980 
981   PetscFunctionBegin;
982   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
983   /* do nondiagonal part */
984   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
985   if (!merged) {
986     /* send it on its way */
987     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
988     /* do local part */
989     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
990     /* receive remote parts: note this assumes the values are not actually */
991     /* added in yy until the next line, */
992     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
993   } else {
994     /* do local part */
995     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
996     /* send it on its way */
997     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
998     /* values actually were received in the Begin() but we need to call this nop */
999     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1000   }
1001   PetscFunctionReturn(0);
1002 }
1003 
1004 #undef __FUNCT__
1005 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1006 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1007 {
1008   MPI_Comm       comm;
1009   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1010   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1011   IS             Me,Notme;
1012   PetscErrorCode ierr;
1013   PetscInt       M,N,first,last,*notme,i;
1014   PetscMPIInt    size;
1015 
1016   PetscFunctionBegin;
1017   /* Easy test: symmetric diagonal block */
1018   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1019   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1020   if (!*f) PetscFunctionReturn(0);
1021   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1022   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1023   if (size == 1) PetscFunctionReturn(0);
1024 
1025   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1026   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1027   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1028   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1029   for (i=0; i<first; i++) notme[i] = i;
1030   for (i=last; i<M; i++) notme[i-last+first] = i;
1031   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1032   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1033   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1034   Aoff = Aoffs[0];
1035   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1036   Boff = Boffs[0];
1037   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1038   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1039   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1040   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1041   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1042   ierr = PetscFree(notme);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1048 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1049 {
1050   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1051   PetscErrorCode ierr;
1052 
1053   PetscFunctionBegin;
1054   /* do nondiagonal part */
1055   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1056   /* send it on its way */
1057   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1058   /* do local part */
1059   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1060   /* receive remote parts */
1061   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 /*
1066   This only works correctly for square matrices where the subblock A->A is the
1067    diagonal block
1068 */
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1072 {
1073   PetscErrorCode ierr;
1074   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1075 
1076   PetscFunctionBegin;
1077   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1078   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1079   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatScale_MPIAIJ"
1085 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1086 {
1087   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1092   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatDestroy_MPIAIJ"
1098 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1099 {
1100   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104 #if defined(PETSC_USE_LOG)
1105   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1106 #endif
1107   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1108   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1109   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1110   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1111 #if defined(PETSC_USE_CTABLE)
1112   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1113 #else
1114   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1115 #endif
1116   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1117   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1118   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1119   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1120   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1121   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1122 
1123   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1137 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1138 {
1139   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1140   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1141   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1142   PetscErrorCode ierr;
1143   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1144   int            fd;
1145   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1146   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1147   PetscScalar    *column_values;
1148   PetscInt       message_count,flowcontrolcount;
1149   FILE           *file;
1150 
1151   PetscFunctionBegin;
1152   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1153   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1154   nz   = A->nz + B->nz;
1155   if (!rank) {
1156     header[0] = MAT_FILE_CLASSID;
1157     header[1] = mat->rmap->N;
1158     header[2] = mat->cmap->N;
1159 
1160     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1161     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1162     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1163     /* get largest number of rows any processor has */
1164     rlen  = mat->rmap->n;
1165     range = mat->rmap->range;
1166     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1167   } else {
1168     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1169     rlen = mat->rmap->n;
1170   }
1171 
1172   /* load up the local row counts */
1173   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1174   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1175 
1176   /* store the row lengths to the file */
1177   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1178   if (!rank) {
1179     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1180     for (i=1; i<size; i++) {
1181       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1182       rlen = range[i+1] - range[i];
1183       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1185     }
1186     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1187   } else {
1188     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1189     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1190     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1191   }
1192   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1193 
1194   /* load up the local column indices */
1195   nzmax = nz; /* th processor needs space a largest processor needs */
1196   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1197   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1198   cnt   = 0;
1199   for (i=0; i<mat->rmap->n; i++) {
1200     for (j=B->i[i]; j<B->i[i+1]; j++) {
1201       if ((col = garray[B->j[j]]) > cstart) break;
1202       column_indices[cnt++] = col;
1203     }
1204     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1205     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1206   }
1207   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1208 
1209   /* store the column indices to the file */
1210   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1211   if (!rank) {
1212     MPI_Status status;
1213     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1214     for (i=1; i<size; i++) {
1215       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1216       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1217       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1218       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1220     }
1221     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1222   } else {
1223     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1224     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1225     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1226     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1227   }
1228   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1229 
1230   /* load up the local column values */
1231   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1232   cnt  = 0;
1233   for (i=0; i<mat->rmap->n; i++) {
1234     for (j=B->i[i]; j<B->i[i+1]; j++) {
1235       if (garray[B->j[j]] > cstart) break;
1236       column_values[cnt++] = B->a[j];
1237     }
1238     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1239     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1240   }
1241   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1242 
1243   /* store the column values to the file */
1244   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1245   if (!rank) {
1246     MPI_Status status;
1247     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1248     for (i=1; i<size; i++) {
1249       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1250       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1251       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1252       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1253       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1254     }
1255     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1256   } else {
1257     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1258     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1259     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1260     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1261   }
1262   ierr = PetscFree(column_values);CHKERRQ(ierr);
1263 
1264   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1265   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #include <petscdraw.h>
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1272 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1273 {
1274   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1275   PetscErrorCode    ierr;
1276   PetscMPIInt       rank = aij->rank,size = aij->size;
1277   PetscBool         isdraw,iascii,isbinary;
1278   PetscViewer       sviewer;
1279   PetscViewerFormat format;
1280 
1281   PetscFunctionBegin;
1282   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1283   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1284   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1285   if (iascii) {
1286     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1287     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1288       MatInfo   info;
1289       PetscBool inodes;
1290 
1291       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1292       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1293       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1294       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1295       if (!inodes) {
1296         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1297                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1298       } else {
1299         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1300                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1301       }
1302       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1303       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1304       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1305       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1306       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1307       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1308       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1309       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1310       PetscFunctionReturn(0);
1311     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1312       PetscInt inodecount,inodelimit,*inodes;
1313       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1314       if (inodes) {
1315         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1316       } else {
1317         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1318       }
1319       PetscFunctionReturn(0);
1320     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1321       PetscFunctionReturn(0);
1322     }
1323   } else if (isbinary) {
1324     if (size == 1) {
1325       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1326       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1327     } else {
1328       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1329     }
1330     PetscFunctionReturn(0);
1331   } else if (isdraw) {
1332     PetscDraw draw;
1333     PetscBool isnull;
1334     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1335     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1336   }
1337 
1338   if (size == 1) {
1339     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1340     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1341   } else {
1342     /* assemble the entire matrix onto first processor. */
1343     Mat        A;
1344     Mat_SeqAIJ *Aloc;
1345     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1346     MatScalar  *a;
1347 
1348     if (mat->rmap->N > 1024) {
1349       PetscBool flg = PETSC_FALSE;
1350 
1351       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1352       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1353     }
1354 
1355     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1356     if (!rank) {
1357       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1358     } else {
1359       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1360     }
1361     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1362     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1363     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1364     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1365     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1366 
1367     /* copy over the A part */
1368     Aloc = (Mat_SeqAIJ*)aij->A->data;
1369     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1370     row  = mat->rmap->rstart;
1371     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1372     for (i=0; i<m; i++) {
1373       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1374       row++;
1375       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1376     }
1377     aj = Aloc->j;
1378     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1379 
1380     /* copy over the B part */
1381     Aloc = (Mat_SeqAIJ*)aij->B->data;
1382     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1383     row  = mat->rmap->rstart;
1384     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1385     ct   = cols;
1386     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1387     for (i=0; i<m; i++) {
1388       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1389       row++;
1390       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1391     }
1392     ierr = PetscFree(ct);CHKERRQ(ierr);
1393     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1395     /*
1396        Everyone has to call to draw the matrix since the graphics waits are
1397        synchronized across all processors that share the PetscDraw object
1398     */
1399     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1400     if (!rank) {
1401       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1402       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1403       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1404       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1405     }
1406     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1407     ierr = MatDestroy(&A);CHKERRQ(ierr);
1408   }
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatView_MPIAIJ"
1414 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1415 {
1416   PetscErrorCode ierr;
1417   PetscBool      iascii,isdraw,issocket,isbinary;
1418 
1419   PetscFunctionBegin;
1420   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1421   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1422   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1423   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1424   if (iascii || isdraw || isbinary || issocket) {
1425     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatSOR_MPIAIJ"
1432 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1433 {
1434   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1435   PetscErrorCode ierr;
1436   Vec            bb1 = 0;
1437   PetscBool      hasop;
1438 
1439   PetscFunctionBegin;
1440   if (flag == SOR_APPLY_UPPER) {
1441     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1442     PetscFunctionReturn(0);
1443   }
1444 
1445   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1446     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1447   }
1448 
1449   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1450     if (flag & SOR_ZERO_INITIAL_GUESS) {
1451       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1452       its--;
1453     }
1454 
1455     while (its--) {
1456       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1457       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1458 
1459       /* update rhs: bb1 = bb - B*x */
1460       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1461       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1462 
1463       /* local sweep */
1464       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1465     }
1466   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1467     if (flag & SOR_ZERO_INITIAL_GUESS) {
1468       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1469       its--;
1470     }
1471     while (its--) {
1472       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1473       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1474 
1475       /* update rhs: bb1 = bb - B*x */
1476       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1477       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1478 
1479       /* local sweep */
1480       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1481     }
1482   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1483     if (flag & SOR_ZERO_INITIAL_GUESS) {
1484       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1485       its--;
1486     }
1487     while (its--) {
1488       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1489       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1490 
1491       /* update rhs: bb1 = bb - B*x */
1492       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1493       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1494 
1495       /* local sweep */
1496       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1497     }
1498   } else if (flag & SOR_EISENSTAT) {
1499     Vec xx1;
1500 
1501     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1502     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1503 
1504     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1505     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1506     if (!mat->diag) {
1507       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1508       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1509     }
1510     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1511     if (hasop) {
1512       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1513     } else {
1514       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1515     }
1516     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1517 
1518     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1519 
1520     /* local sweep */
1521     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1522     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1523     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1524   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1525 
1526   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatPermute_MPIAIJ"
1532 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1533 {
1534   Mat            aA,aB,Aperm;
1535   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1536   PetscScalar    *aa,*ba;
1537   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1538   PetscSF        rowsf,sf;
1539   IS             parcolp = NULL;
1540   PetscBool      done;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1545   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1546   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1547   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1548 
1549   /* Invert row permutation to find out where my rows should go */
1550   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1551   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1552   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1553   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1554   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1555   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1556 
1557   /* Invert column permutation to find out where my columns should go */
1558   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1559   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1560   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1561   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1562   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1563   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1564   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1565 
1566   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1567   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1568   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1569 
1570   /* Find out where my gcols should go */
1571   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1572   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1573   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1574   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1575   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1576   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1577   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1578   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1579 
1580   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1581   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1582   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1583   for (i=0; i<m; i++) {
1584     PetscInt row = rdest[i],rowner;
1585     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1586     for (j=ai[i]; j<ai[i+1]; j++) {
1587       PetscInt cowner,col = cdest[aj[j]];
1588       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1589       if (rowner == cowner) dnnz[i]++;
1590       else onnz[i]++;
1591     }
1592     for (j=bi[i]; j<bi[i+1]; j++) {
1593       PetscInt cowner,col = gcdest[bj[j]];
1594       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1595       if (rowner == cowner) dnnz[i]++;
1596       else onnz[i]++;
1597     }
1598   }
1599   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1600   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1601   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1602   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1603   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1604 
1605   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1606   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1607   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1608   for (i=0; i<m; i++) {
1609     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1610     PetscInt rowlen;
1611     rowlen = ai[i+1] - ai[i];
1612     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1613     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1614     rowlen = bi[i+1] - bi[i];
1615     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1616     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1617   }
1618   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1619   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1620   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1621   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1622   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1623   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1624   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1625   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1626   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1627   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1628   *B = Aperm;
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1634 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1635 {
1636   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1637   Mat            A    = mat->A,B = mat->B;
1638   PetscErrorCode ierr;
1639   PetscReal      isend[5],irecv[5];
1640 
1641   PetscFunctionBegin;
1642   info->block_size = 1.0;
1643   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1644 
1645   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1646   isend[3] = info->memory;  isend[4] = info->mallocs;
1647 
1648   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1649 
1650   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1651   isend[3] += info->memory;  isend[4] += info->mallocs;
1652   if (flag == MAT_LOCAL) {
1653     info->nz_used      = isend[0];
1654     info->nz_allocated = isend[1];
1655     info->nz_unneeded  = isend[2];
1656     info->memory       = isend[3];
1657     info->mallocs      = isend[4];
1658   } else if (flag == MAT_GLOBAL_MAX) {
1659     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1660 
1661     info->nz_used      = irecv[0];
1662     info->nz_allocated = irecv[1];
1663     info->nz_unneeded  = irecv[2];
1664     info->memory       = irecv[3];
1665     info->mallocs      = irecv[4];
1666   } else if (flag == MAT_GLOBAL_SUM) {
1667     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1668 
1669     info->nz_used      = irecv[0];
1670     info->nz_allocated = irecv[1];
1671     info->nz_unneeded  = irecv[2];
1672     info->memory       = irecv[3];
1673     info->mallocs      = irecv[4];
1674   }
1675   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1676   info->fill_ratio_needed = 0;
1677   info->factor_mallocs    = 0;
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatSetOption_MPIAIJ"
1683 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1684 {
1685   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1686   PetscErrorCode ierr;
1687 
1688   PetscFunctionBegin;
1689   switch (op) {
1690   case MAT_NEW_NONZERO_LOCATIONS:
1691   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1692   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1693   case MAT_KEEP_NONZERO_PATTERN:
1694   case MAT_NEW_NONZERO_LOCATION_ERR:
1695   case MAT_USE_INODES:
1696   case MAT_IGNORE_ZERO_ENTRIES:
1697     MatCheckPreallocated(A,1);
1698     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1699     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1700     break;
1701   case MAT_ROW_ORIENTED:
1702     a->roworiented = flg;
1703 
1704     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1705     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1706     break;
1707   case MAT_NEW_DIAGONALS:
1708     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1709     break;
1710   case MAT_IGNORE_OFF_PROC_ENTRIES:
1711     a->donotstash = flg;
1712     break;
1713   case MAT_SPD:
1714     A->spd_set = PETSC_TRUE;
1715     A->spd     = flg;
1716     if (flg) {
1717       A->symmetric                  = PETSC_TRUE;
1718       A->structurally_symmetric     = PETSC_TRUE;
1719       A->symmetric_set              = PETSC_TRUE;
1720       A->structurally_symmetric_set = PETSC_TRUE;
1721     }
1722     break;
1723   case MAT_SYMMETRIC:
1724     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1725     break;
1726   case MAT_STRUCTURALLY_SYMMETRIC:
1727     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1728     break;
1729   case MAT_HERMITIAN:
1730     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1731     break;
1732   case MAT_SYMMETRY_ETERNAL:
1733     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1734     break;
1735   default:
1736     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatGetRow_MPIAIJ"
1743 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1744 {
1745   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1746   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1747   PetscErrorCode ierr;
1748   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1749   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1750   PetscInt       *cmap,*idx_p;
1751 
1752   PetscFunctionBegin;
1753   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1754   mat->getrowactive = PETSC_TRUE;
1755 
1756   if (!mat->rowvalues && (idx || v)) {
1757     /*
1758         allocate enough space to hold information from the longest row.
1759     */
1760     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1761     PetscInt   max = 1,tmp;
1762     for (i=0; i<matin->rmap->n; i++) {
1763       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1764       if (max < tmp) max = tmp;
1765     }
1766     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1767   }
1768 
1769   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1770   lrow = row - rstart;
1771 
1772   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1773   if (!v)   {pvA = 0; pvB = 0;}
1774   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1775   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1776   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1777   nztot = nzA + nzB;
1778 
1779   cmap = mat->garray;
1780   if (v  || idx) {
1781     if (nztot) {
1782       /* Sort by increasing column numbers, assuming A and B already sorted */
1783       PetscInt imark = -1;
1784       if (v) {
1785         *v = v_p = mat->rowvalues;
1786         for (i=0; i<nzB; i++) {
1787           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1788           else break;
1789         }
1790         imark = i;
1791         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1792         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1793       }
1794       if (idx) {
1795         *idx = idx_p = mat->rowindices;
1796         if (imark > -1) {
1797           for (i=0; i<imark; i++) {
1798             idx_p[i] = cmap[cworkB[i]];
1799           }
1800         } else {
1801           for (i=0; i<nzB; i++) {
1802             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1803             else break;
1804           }
1805           imark = i;
1806         }
1807         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1808         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1809       }
1810     } else {
1811       if (idx) *idx = 0;
1812       if (v)   *v   = 0;
1813     }
1814   }
1815   *nz  = nztot;
1816   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1817   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1823 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1824 {
1825   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1826 
1827   PetscFunctionBegin;
1828   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1829   aij->getrowactive = PETSC_FALSE;
1830   PetscFunctionReturn(0);
1831 }
1832 
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatNorm_MPIAIJ"
1835 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1836 {
1837   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1838   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1839   PetscErrorCode ierr;
1840   PetscInt       i,j,cstart = mat->cmap->rstart;
1841   PetscReal      sum = 0.0;
1842   MatScalar      *v;
1843 
1844   PetscFunctionBegin;
1845   if (aij->size == 1) {
1846     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1847   } else {
1848     if (type == NORM_FROBENIUS) {
1849       v = amat->a;
1850       for (i=0; i<amat->nz; i++) {
1851         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1852       }
1853       v = bmat->a;
1854       for (i=0; i<bmat->nz; i++) {
1855         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1856       }
1857       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1858       *norm = PetscSqrtReal(*norm);
1859     } else if (type == NORM_1) { /* max column norm */
1860       PetscReal *tmp,*tmp2;
1861       PetscInt  *jj,*garray = aij->garray;
1862       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1863       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1864       *norm = 0.0;
1865       v     = amat->a; jj = amat->j;
1866       for (j=0; j<amat->nz; j++) {
1867         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1868       }
1869       v = bmat->a; jj = bmat->j;
1870       for (j=0; j<bmat->nz; j++) {
1871         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1872       }
1873       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1874       for (j=0; j<mat->cmap->N; j++) {
1875         if (tmp2[j] > *norm) *norm = tmp2[j];
1876       }
1877       ierr = PetscFree(tmp);CHKERRQ(ierr);
1878       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1879     } else if (type == NORM_INFINITY) { /* max row norm */
1880       PetscReal ntemp = 0.0;
1881       for (j=0; j<aij->A->rmap->n; j++) {
1882         v   = amat->a + amat->i[j];
1883         sum = 0.0;
1884         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1885           sum += PetscAbsScalar(*v); v++;
1886         }
1887         v = bmat->a + bmat->i[j];
1888         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1889           sum += PetscAbsScalar(*v); v++;
1890         }
1891         if (sum > ntemp) ntemp = sum;
1892       }
1893       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1894     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatTranspose_MPIAIJ"
1901 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1902 {
1903   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1904   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1905   PetscErrorCode ierr;
1906   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1907   PetscInt       cstart = A->cmap->rstart,ncol;
1908   Mat            B;
1909   MatScalar      *array;
1910 
1911   PetscFunctionBegin;
1912   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1913 
1914   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1915   ai = Aloc->i; aj = Aloc->j;
1916   bi = Bloc->i; bj = Bloc->j;
1917   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1918     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1919     PetscSFNode          *oloc;
1920     PETSC_UNUSED PetscSF sf;
1921 
1922     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1923     /* compute d_nnz for preallocation */
1924     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1925     for (i=0; i<ai[ma]; i++) {
1926       d_nnz[aj[i]]++;
1927       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1928     }
1929     /* compute local off-diagonal contributions */
1930     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1931     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1932     /* map those to global */
1933     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1934     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1935     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1936     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1937     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1938     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1939     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1940 
1941     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1942     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1943     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1944     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1945     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1946     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1947   } else {
1948     B    = *matout;
1949     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1950     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
1951   }
1952 
1953   /* copy over the A part */
1954   array = Aloc->a;
1955   row   = A->rmap->rstart;
1956   for (i=0; i<ma; i++) {
1957     ncol = ai[i+1]-ai[i];
1958     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1959     row++;
1960     array += ncol; aj += ncol;
1961   }
1962   aj = Aloc->j;
1963   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
1964 
1965   /* copy over the B part */
1966   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
1967   array = Bloc->a;
1968   row   = A->rmap->rstart;
1969   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1970   cols_tmp = cols;
1971   for (i=0; i<mb; i++) {
1972     ncol = bi[i+1]-bi[i];
1973     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
1974     row++;
1975     array += ncol; cols_tmp += ncol;
1976   }
1977   ierr = PetscFree(cols);CHKERRQ(ierr);
1978 
1979   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1980   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1981   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1982     *matout = B;
1983   } else {
1984     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1985   }
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
1991 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1992 {
1993   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1994   Mat            a    = aij->A,b = aij->B;
1995   PetscErrorCode ierr;
1996   PetscInt       s1,s2,s3;
1997 
1998   PetscFunctionBegin;
1999   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2000   if (rr) {
2001     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2002     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2003     /* Overlap communication with computation. */
2004     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2005   }
2006   if (ll) {
2007     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2008     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2009     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2010   }
2011   /* scale  the diagonal block */
2012   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2013 
2014   if (rr) {
2015     /* Do a scatter end and then right scale the off-diagonal block */
2016     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2017     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2018   }
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2024 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2025 {
2026   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2027   PetscErrorCode ierr;
2028 
2029   PetscFunctionBegin;
2030   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 #undef __FUNCT__
2035 #define __FUNCT__ "MatEqual_MPIAIJ"
2036 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2037 {
2038   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2039   Mat            a,b,c,d;
2040   PetscBool      flg;
2041   PetscErrorCode ierr;
2042 
2043   PetscFunctionBegin;
2044   a = matA->A; b = matA->B;
2045   c = matB->A; d = matB->B;
2046 
2047   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2048   if (flg) {
2049     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2050   }
2051   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatCopy_MPIAIJ"
2057 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2058 {
2059   PetscErrorCode ierr;
2060   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2061   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2062 
2063   PetscFunctionBegin;
2064   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2065   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2066     /* because of the column compression in the off-processor part of the matrix a->B,
2067        the number of columns in a->B and b->B may be different, hence we cannot call
2068        the MatCopy() directly on the two parts. If need be, we can provide a more
2069        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2070        then copying the submatrices */
2071     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2072   } else {
2073     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2074     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2075   }
2076   PetscFunctionReturn(0);
2077 }
2078 
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatSetUp_MPIAIJ"
2081 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2082 {
2083   PetscErrorCode ierr;
2084 
2085   PetscFunctionBegin;
2086   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2092 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2093 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2094 {
2095   PetscInt       i,m=Y->rmap->N;
2096   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2097   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2098   const PetscInt *xi = x->i,*yi = y->i;
2099 
2100   PetscFunctionBegin;
2101   /* Set the number of nonzeros in the new matrix */
2102   for (i=0; i<m; i++) {
2103     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2104     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2105     nnz[i] = 0;
2106     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2107       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2108       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2109       nnz[i]++;
2110     }
2111     for (; k<nzy; k++) nnz[i]++;
2112   }
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatAXPY_MPIAIJ"
2118 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2119 {
2120   PetscErrorCode ierr;
2121   PetscInt       i;
2122   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2123   PetscBLASInt   bnz,one=1;
2124   Mat_SeqAIJ     *x,*y;
2125 
2126   PetscFunctionBegin;
2127   if (str == SAME_NONZERO_PATTERN) {
2128     PetscScalar alpha = a;
2129     x    = (Mat_SeqAIJ*)xx->A->data;
2130     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2131     y    = (Mat_SeqAIJ*)yy->A->data;
2132     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2133     x    = (Mat_SeqAIJ*)xx->B->data;
2134     y    = (Mat_SeqAIJ*)yy->B->data;
2135     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2136     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2137   } else if (str == SUBSET_NONZERO_PATTERN) {
2138     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2139 
2140     x = (Mat_SeqAIJ*)xx->B->data;
2141     y = (Mat_SeqAIJ*)yy->B->data;
2142     if (y->xtoy && y->XtoY != xx->B) {
2143       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2144       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2145     }
2146     if (!y->xtoy) { /* get xtoy */
2147       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2148       y->XtoY = xx->B;
2149       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2150     }
2151     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2152   } else {
2153     Mat      B;
2154     PetscInt *nnz_d,*nnz_o;
2155     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2156     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2157     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2158     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2159     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2160     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2161     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2162     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2163     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2164     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2165     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2166     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2167     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2168     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2169   }
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatConjugate_MPIAIJ"
2177 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2178 {
2179 #if defined(PETSC_USE_COMPLEX)
2180   PetscErrorCode ierr;
2181   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2182 
2183   PetscFunctionBegin;
2184   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2185   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2186 #else
2187   PetscFunctionBegin;
2188 #endif
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatRealPart_MPIAIJ"
2194 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2195 {
2196   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2197   PetscErrorCode ierr;
2198 
2199   PetscFunctionBegin;
2200   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2201   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 #undef __FUNCT__
2206 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2207 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2208 {
2209   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2210   PetscErrorCode ierr;
2211 
2212   PetscFunctionBegin;
2213   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2214   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 #if defined(PETSC_HAVE_PBGL)
2219 
2220 #include <boost/parallel/mpi/bsp_process_group.hpp>
2221 #include <boost/graph/distributed/ilu_default_graph.hpp>
2222 #include <boost/graph/distributed/ilu_0_block.hpp>
2223 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2224 #include <boost/graph/distributed/petsc/interface.hpp>
2225 #include <boost/multi_array.hpp>
2226 #include <boost/parallel/distributed_property_map->hpp>
2227 
2228 #undef __FUNCT__
2229 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2230 /*
2231   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2232 */
2233 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2234 {
2235   namespace petsc = boost::distributed::petsc;
2236 
2237   namespace graph_dist = boost::graph::distributed;
2238   using boost::graph::distributed::ilu_default::process_group_type;
2239   using boost::graph::ilu_permuted;
2240 
2241   PetscBool      row_identity, col_identity;
2242   PetscContainer c;
2243   PetscInt       m, n, M, N;
2244   PetscErrorCode ierr;
2245 
2246   PetscFunctionBegin;
2247   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2248   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2249   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2250   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2251 
2252   process_group_type pg;
2253   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2254   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2255   lgraph_type& level_graph = *lgraph_p;
2256   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2257 
2258   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2259   ilu_permuted(level_graph);
2260 
2261   /* put together the new matrix */
2262   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2263   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2264   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2265   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2266   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2267   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2268   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2269   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2270 
2271   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2272   ierr = PetscContainerSetPointer(c, lgraph_p);
2273   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2274   ierr = PetscContainerDestroy(&c);
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 #undef __FUNCT__
2279 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2280 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2281 {
2282   PetscFunctionBegin;
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatSolve_MPIAIJ"
2288 /*
2289   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2290 */
2291 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2292 {
2293   namespace graph_dist = boost::graph::distributed;
2294 
2295   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2296   lgraph_type    *lgraph_p;
2297   PetscContainer c;
2298   PetscErrorCode ierr;
2299 
2300   PetscFunctionBegin;
2301   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2302   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2303   ierr = VecCopy(b, x);CHKERRQ(ierr);
2304 
2305   PetscScalar *array_x;
2306   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2307   PetscInt sx;
2308   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2309 
2310   PetscScalar *array_b;
2311   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2312   PetscInt sb;
2313   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2314 
2315   lgraph_type& level_graph = *lgraph_p;
2316   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2317 
2318   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2319   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2320   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2321 
2322   typedef boost::iterator_property_map<array_ref_type::iterator,
2323                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2324   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2325   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2326 
2327   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2328   PetscFunctionReturn(0);
2329 }
2330 #endif
2331 
2332 #undef __FUNCT__
2333 #define __FUNCT__ "MatDestroy_MatRedundant"
2334 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2335 {
2336   PetscErrorCode ierr;
2337   Mat_Redundant  *redund;
2338   PetscInt       i;
2339   PetscMPIInt    size;
2340 
2341   PetscFunctionBegin;
2342   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2343   if (size == 1) {
2344     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2345     redund = a->redundant;
2346   } else {
2347     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2348     redund = a->redundant;
2349   }
2350   if (redund){
2351     if (redund->matseq) { /* via MatGetSubMatrices()  */
2352       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2353       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2354       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2355       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2356     } else {
2357       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2358       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2359       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2360       for (i=0; i<redund->nrecvs; i++) {
2361         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2362         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2363       }
2364       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2365     }
2366 
2367     if (redund->psubcomm) {
2368       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2369     }
2370     ierr = redund->Destroy(A);CHKERRQ(ierr);
2371     ierr = PetscFree(redund);CHKERRQ(ierr);
2372   }
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2378 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2379 {
2380   PetscMPIInt    rank,size;
2381   MPI_Comm       comm;
2382   PetscErrorCode ierr;
2383   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2384   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2385   PetscInt       *rowrange = mat->rmap->range;
2386   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2387   Mat            A = aij->A,B=aij->B,C=*matredundant;
2388   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2389   PetscScalar    *sbuf_a;
2390   PetscInt       nzlocal=a->nz+b->nz;
2391   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2392   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2393   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2394   MatScalar      *aworkA,*aworkB;
2395   PetscScalar    *vals;
2396   PetscMPIInt    tag1,tag2,tag3,imdex;
2397   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2398   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2399   MPI_Status     recv_status,*send_status;
2400   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2401   PetscInt       **rbuf_j=NULL;
2402   PetscScalar    **rbuf_a=NULL;
2403   Mat_Redundant  *redund =NULL;
2404 
2405   PetscFunctionBegin;
2406   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2407   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2408   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2409   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2410   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2411 
2412   if (reuse == MAT_REUSE_MATRIX) {
2413     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2414     if (subsize == 1) {
2415       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2416       redund = c->redundant;
2417     } else {
2418       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2419       redund = c->redundant;
2420     }
2421     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2422 
2423     nsends    = redund->nsends;
2424     nrecvs    = redund->nrecvs;
2425     send_rank = redund->send_rank;
2426     recv_rank = redund->recv_rank;
2427     sbuf_nz   = redund->sbuf_nz;
2428     rbuf_nz   = redund->rbuf_nz;
2429     sbuf_j    = redund->sbuf_j;
2430     sbuf_a    = redund->sbuf_a;
2431     rbuf_j    = redund->rbuf_j;
2432     rbuf_a    = redund->rbuf_a;
2433   }
2434 
2435   if (reuse == MAT_INITIAL_MATRIX) {
2436     PetscInt    nleftover,np_subcomm;
2437 
2438     /* get the destination processors' id send_rank, nsends and nrecvs */
2439     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2440 
2441     np_subcomm = size/nsubcomm;
2442     nleftover  = size - nsubcomm*np_subcomm;
2443 
2444     /* block of codes below is specific for INTERLACED */
2445     /* ------------------------------------------------*/
2446     nsends = 0; nrecvs = 0;
2447     for (i=0; i<size; i++) {
2448       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2449         send_rank[nsends++] = i;
2450         recv_rank[nrecvs++] = i;
2451       }
2452     }
2453     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2454       i = size-nleftover-1;
2455       j = 0;
2456       while (j < nsubcomm - nleftover) {
2457         send_rank[nsends++] = i;
2458         i--; j++;
2459       }
2460     }
2461 
2462     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2463       for (i=0; i<nleftover; i++) {
2464         recv_rank[nrecvs++] = size-nleftover+i;
2465       }
2466     }
2467     /*----------------------------------------------*/
2468 
2469     /* allocate sbuf_j, sbuf_a */
2470     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2471     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2472     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2473     /*
2474     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2475     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2476      */
2477   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2478 
2479   /* copy mat's local entries into the buffers */
2480   if (reuse == MAT_INITIAL_MATRIX) {
2481     rownz_max = 0;
2482     rptr      = sbuf_j;
2483     cols      = sbuf_j + rend-rstart + 1;
2484     vals      = sbuf_a;
2485     rptr[0]   = 0;
2486     for (i=0; i<rend-rstart; i++) {
2487       row    = i + rstart;
2488       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2489       ncols  = nzA + nzB;
2490       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2491       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2492       /* load the column indices for this row into cols */
2493       lwrite = 0;
2494       for (l=0; l<nzB; l++) {
2495         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2496           vals[lwrite]   = aworkB[l];
2497           cols[lwrite++] = ctmp;
2498         }
2499       }
2500       for (l=0; l<nzA; l++) {
2501         vals[lwrite]   = aworkA[l];
2502         cols[lwrite++] = cstart + cworkA[l];
2503       }
2504       for (l=0; l<nzB; l++) {
2505         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2506           vals[lwrite]   = aworkB[l];
2507           cols[lwrite++] = ctmp;
2508         }
2509       }
2510       vals     += ncols;
2511       cols     += ncols;
2512       rptr[i+1] = rptr[i] + ncols;
2513       if (rownz_max < ncols) rownz_max = ncols;
2514     }
2515     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2516   } else { /* only copy matrix values into sbuf_a */
2517     rptr    = sbuf_j;
2518     vals    = sbuf_a;
2519     rptr[0] = 0;
2520     for (i=0; i<rend-rstart; i++) {
2521       row    = i + rstart;
2522       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2523       ncols  = nzA + nzB;
2524       cworkB = b->j + b->i[i];
2525       aworkA = a->a + a->i[i];
2526       aworkB = b->a + b->i[i];
2527       lwrite = 0;
2528       for (l=0; l<nzB; l++) {
2529         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2530       }
2531       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2532       for (l=0; l<nzB; l++) {
2533         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2534       }
2535       vals     += ncols;
2536       rptr[i+1] = rptr[i] + ncols;
2537     }
2538   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2539 
2540   /* send nzlocal to others, and recv other's nzlocal */
2541   /*--------------------------------------------------*/
2542   if (reuse == MAT_INITIAL_MATRIX) {
2543     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2544 
2545     s_waits2 = s_waits3 + nsends;
2546     s_waits1 = s_waits2 + nsends;
2547     r_waits1 = s_waits1 + nsends;
2548     r_waits2 = r_waits1 + nrecvs;
2549     r_waits3 = r_waits2 + nrecvs;
2550   } else {
2551     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2552 
2553     r_waits3 = s_waits3 + nsends;
2554   }
2555 
2556   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2557   if (reuse == MAT_INITIAL_MATRIX) {
2558     /* get new tags to keep the communication clean */
2559     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2560     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2561     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2562 
2563     /* post receives of other's nzlocal */
2564     for (i=0; i<nrecvs; i++) {
2565       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2566     }
2567     /* send nzlocal to others */
2568     for (i=0; i<nsends; i++) {
2569       sbuf_nz[i] = nzlocal;
2570       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2571     }
2572     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2573     count = nrecvs;
2574     while (count) {
2575       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2576 
2577       recv_rank[imdex] = recv_status.MPI_SOURCE;
2578       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2579       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2580 
2581       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2582 
2583       rbuf_nz[imdex] += i + 2;
2584 
2585       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2586       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2587       count--;
2588     }
2589     /* wait on sends of nzlocal */
2590     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2591     /* send mat->i,j to others, and recv from other's */
2592     /*------------------------------------------------*/
2593     for (i=0; i<nsends; i++) {
2594       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2595       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2596     }
2597     /* wait on receives of mat->i,j */
2598     /*------------------------------*/
2599     count = nrecvs;
2600     while (count) {
2601       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2602       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2603       count--;
2604     }
2605     /* wait on sends of mat->i,j */
2606     /*---------------------------*/
2607     if (nsends) {
2608       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2609     }
2610   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2611 
2612   /* post receives, send and receive mat->a */
2613   /*----------------------------------------*/
2614   for (imdex=0; imdex<nrecvs; imdex++) {
2615     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2616   }
2617   for (i=0; i<nsends; i++) {
2618     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2619   }
2620   count = nrecvs;
2621   while (count) {
2622     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2623     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2624     count--;
2625   }
2626   if (nsends) {
2627     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2628   }
2629 
2630   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2631 
2632   /* create redundant matrix */
2633   /*-------------------------*/
2634   if (reuse == MAT_INITIAL_MATRIX) {
2635     const PetscInt *range;
2636     PetscInt       rstart_sub,rend_sub,mloc_sub;
2637 
2638     /* compute rownz_max for preallocation */
2639     for (imdex=0; imdex<nrecvs; imdex++) {
2640       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2641       rptr = rbuf_j[imdex];
2642       for (i=0; i<j; i++) {
2643         ncols = rptr[i+1] - rptr[i];
2644         if (rownz_max < ncols) rownz_max = ncols;
2645       }
2646     }
2647 
2648     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2649 
2650     /* get local size of redundant matrix
2651        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2652     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2653     rstart_sub = range[nsubcomm*subrank];
2654     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2655       rend_sub = range[nsubcomm*(subrank+1)];
2656     } else {
2657       rend_sub = mat->rmap->N;
2658     }
2659     mloc_sub = rend_sub - rstart_sub;
2660 
2661     if (M == N) {
2662       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2663     } else { /* non-square matrix */
2664       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2665     }
2666     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2667     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2668     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2669     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2670   } else {
2671     C = *matredundant;
2672   }
2673 
2674   /* insert local matrix entries */
2675   rptr = sbuf_j;
2676   cols = sbuf_j + rend-rstart + 1;
2677   vals = sbuf_a;
2678   for (i=0; i<rend-rstart; i++) {
2679     row   = i + rstart;
2680     ncols = rptr[i+1] - rptr[i];
2681     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2682     vals += ncols;
2683     cols += ncols;
2684   }
2685   /* insert received matrix entries */
2686   for (imdex=0; imdex<nrecvs; imdex++) {
2687     rstart = rowrange[recv_rank[imdex]];
2688     rend   = rowrange[recv_rank[imdex]+1];
2689     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2690     rptr   = rbuf_j[imdex];
2691     cols   = rbuf_j[imdex] + rend-rstart + 1;
2692     vals   = rbuf_a[imdex];
2693     for (i=0; i<rend-rstart; i++) {
2694       row   = i + rstart;
2695       ncols = rptr[i+1] - rptr[i];
2696       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2697       vals += ncols;
2698       cols += ncols;
2699     }
2700   }
2701   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2702   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2703 
2704   if (reuse == MAT_INITIAL_MATRIX) {
2705     *matredundant = C;
2706 
2707     /* create a supporting struct and attach it to C for reuse */
2708     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2709     if (subsize == 1) {
2710       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2711       c->redundant = redund;
2712     } else {
2713       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2714       c->redundant = redund;
2715     }
2716 
2717     redund->nzlocal   = nzlocal;
2718     redund->nsends    = nsends;
2719     redund->nrecvs    = nrecvs;
2720     redund->send_rank = send_rank;
2721     redund->recv_rank = recv_rank;
2722     redund->sbuf_nz   = sbuf_nz;
2723     redund->rbuf_nz   = rbuf_nz;
2724     redund->sbuf_j    = sbuf_j;
2725     redund->sbuf_a    = sbuf_a;
2726     redund->rbuf_j    = rbuf_j;
2727     redund->rbuf_a    = rbuf_a;
2728     redund->psubcomm  = NULL;
2729 
2730     redund->Destroy = C->ops->destroy;
2731     C->ops->destroy = MatDestroy_MatRedundant;
2732   }
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2738 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2739 {
2740   PetscErrorCode ierr;
2741   MPI_Comm       comm;
2742   PetscMPIInt    size,subsize;
2743   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2744   Mat_Redundant  *redund=NULL;
2745   PetscSubcomm   psubcomm=NULL;
2746   MPI_Comm       subcomm_in=subcomm;
2747   Mat            *matseq;
2748   IS             isrow,iscol;
2749 
2750   PetscFunctionBegin;
2751   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2752     if (reuse ==  MAT_INITIAL_MATRIX) {
2753       /* create psubcomm, then get subcomm */
2754       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2755       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2756       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2757 
2758       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2759       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2760       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2761       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2762       subcomm = psubcomm->comm;
2763     } else { /* retrieve psubcomm and subcomm */
2764       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2765       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2766       if (subsize == 1) {
2767         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2768         redund = c->redundant;
2769       } else {
2770         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2771         redund = c->redundant;
2772       }
2773       psubcomm = redund->psubcomm;
2774     }
2775     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2776       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2777       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2778         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2779         if (subsize == 1) {
2780           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2781           c->redundant->psubcomm = psubcomm;
2782         } else {
2783           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2784           c->redundant->psubcomm = psubcomm ;
2785         }
2786       }
2787       PetscFunctionReturn(0);
2788     }
2789   }
2790 
2791   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2792   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2793   if (reuse == MAT_INITIAL_MATRIX) {
2794     /* create a local sequential matrix matseq[0] */
2795     mloc_sub = PETSC_DECIDE;
2796     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2797     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2798     rstart = rend - mloc_sub;
2799     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2800     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2801   } else { /* reuse == MAT_REUSE_MATRIX */
2802     if (subsize == 1) {
2803       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2804       redund = c->redundant;
2805     } else {
2806       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2807       redund = c->redundant;
2808     }
2809 
2810     isrow  = redund->isrow;
2811     iscol  = redund->iscol;
2812     matseq = redund->matseq;
2813   }
2814   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2815   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2816 
2817   if (reuse == MAT_INITIAL_MATRIX) {
2818     /* create a supporting struct and attach it to C for reuse */
2819     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2820     if (subsize == 1) {
2821       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2822       c->redundant = redund;
2823     } else {
2824       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2825       c->redundant = redund;
2826     }
2827     redund->isrow    = isrow;
2828     redund->iscol    = iscol;
2829     redund->matseq   = matseq;
2830     redund->psubcomm = psubcomm;
2831     redund->Destroy               = (*matredundant)->ops->destroy;
2832     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2833   }
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 #undef __FUNCT__
2838 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2839 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2840 {
2841   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2842   PetscErrorCode ierr;
2843   PetscInt       i,*idxb = 0;
2844   PetscScalar    *va,*vb;
2845   Vec            vtmp;
2846 
2847   PetscFunctionBegin;
2848   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2849   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2850   if (idx) {
2851     for (i=0; i<A->rmap->n; i++) {
2852       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2853     }
2854   }
2855 
2856   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2857   if (idx) {
2858     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2859   }
2860   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2861   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2862 
2863   for (i=0; i<A->rmap->n; i++) {
2864     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2865       va[i] = vb[i];
2866       if (idx) idx[i] = a->garray[idxb[i]];
2867     }
2868   }
2869 
2870   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2871   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2872   ierr = PetscFree(idxb);CHKERRQ(ierr);
2873   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2874   PetscFunctionReturn(0);
2875 }
2876 
2877 #undef __FUNCT__
2878 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2879 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2880 {
2881   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2882   PetscErrorCode ierr;
2883   PetscInt       i,*idxb = 0;
2884   PetscScalar    *va,*vb;
2885   Vec            vtmp;
2886 
2887   PetscFunctionBegin;
2888   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2889   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2890   if (idx) {
2891     for (i=0; i<A->cmap->n; i++) {
2892       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2893     }
2894   }
2895 
2896   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2897   if (idx) {
2898     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2899   }
2900   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2901   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2902 
2903   for (i=0; i<A->rmap->n; i++) {
2904     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2905       va[i] = vb[i];
2906       if (idx) idx[i] = a->garray[idxb[i]];
2907     }
2908   }
2909 
2910   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2911   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2912   ierr = PetscFree(idxb);CHKERRQ(ierr);
2913   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2914   PetscFunctionReturn(0);
2915 }
2916 
2917 #undef __FUNCT__
2918 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2919 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2920 {
2921   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2922   PetscInt       n      = A->rmap->n;
2923   PetscInt       cstart = A->cmap->rstart;
2924   PetscInt       *cmap  = mat->garray;
2925   PetscInt       *diagIdx, *offdiagIdx;
2926   Vec            diagV, offdiagV;
2927   PetscScalar    *a, *diagA, *offdiagA;
2928   PetscInt       r;
2929   PetscErrorCode ierr;
2930 
2931   PetscFunctionBegin;
2932   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2933   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2934   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2935   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2936   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2937   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2938   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2939   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2940   for (r = 0; r < n; ++r) {
2941     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2942       a[r]   = diagA[r];
2943       idx[r] = cstart + diagIdx[r];
2944     } else {
2945       a[r]   = offdiagA[r];
2946       idx[r] = cmap[offdiagIdx[r]];
2947     }
2948   }
2949   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2950   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2951   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2952   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2953   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2954   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2955   PetscFunctionReturn(0);
2956 }
2957 
2958 #undef __FUNCT__
2959 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2960 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2961 {
2962   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2963   PetscInt       n      = A->rmap->n;
2964   PetscInt       cstart = A->cmap->rstart;
2965   PetscInt       *cmap  = mat->garray;
2966   PetscInt       *diagIdx, *offdiagIdx;
2967   Vec            diagV, offdiagV;
2968   PetscScalar    *a, *diagA, *offdiagA;
2969   PetscInt       r;
2970   PetscErrorCode ierr;
2971 
2972   PetscFunctionBegin;
2973   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2974   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2975   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2976   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2977   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2978   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2979   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2980   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2981   for (r = 0; r < n; ++r) {
2982     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2983       a[r]   = diagA[r];
2984       idx[r] = cstart + diagIdx[r];
2985     } else {
2986       a[r]   = offdiagA[r];
2987       idx[r] = cmap[offdiagIdx[r]];
2988     }
2989   }
2990   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2991   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2992   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2993   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2994   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2995   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 #undef __FUNCT__
3000 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3001 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3002 {
3003   PetscErrorCode ierr;
3004   Mat            *dummy;
3005 
3006   PetscFunctionBegin;
3007   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3008   *newmat = *dummy;
3009   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3010   PetscFunctionReturn(0);
3011 }
3012 
3013 #undef __FUNCT__
3014 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3015 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3016 {
3017   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3018   PetscErrorCode ierr;
3019 
3020   PetscFunctionBegin;
3021   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3022   PetscFunctionReturn(0);
3023 }
3024 
3025 #undef __FUNCT__
3026 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3027 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3028 {
3029   PetscErrorCode ierr;
3030   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3031 
3032   PetscFunctionBegin;
3033   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3034   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3035   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3036   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3037   PetscFunctionReturn(0);
3038 }
3039 
3040 /* -------------------------------------------------------------------*/
3041 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3042                                        MatGetRow_MPIAIJ,
3043                                        MatRestoreRow_MPIAIJ,
3044                                        MatMult_MPIAIJ,
3045                                 /* 4*/ MatMultAdd_MPIAIJ,
3046                                        MatMultTranspose_MPIAIJ,
3047                                        MatMultTransposeAdd_MPIAIJ,
3048 #if defined(PETSC_HAVE_PBGL)
3049                                        MatSolve_MPIAIJ,
3050 #else
3051                                        0,
3052 #endif
3053                                        0,
3054                                        0,
3055                                 /*10*/ 0,
3056                                        0,
3057                                        0,
3058                                        MatSOR_MPIAIJ,
3059                                        MatTranspose_MPIAIJ,
3060                                 /*15*/ MatGetInfo_MPIAIJ,
3061                                        MatEqual_MPIAIJ,
3062                                        MatGetDiagonal_MPIAIJ,
3063                                        MatDiagonalScale_MPIAIJ,
3064                                        MatNorm_MPIAIJ,
3065                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3066                                        MatAssemblyEnd_MPIAIJ,
3067                                        MatSetOption_MPIAIJ,
3068                                        MatZeroEntries_MPIAIJ,
3069                                 /*24*/ MatZeroRows_MPIAIJ,
3070                                        0,
3071 #if defined(PETSC_HAVE_PBGL)
3072                                        0,
3073 #else
3074                                        0,
3075 #endif
3076                                        0,
3077                                        0,
3078                                 /*29*/ MatSetUp_MPIAIJ,
3079 #if defined(PETSC_HAVE_PBGL)
3080                                        0,
3081 #else
3082                                        0,
3083 #endif
3084                                        0,
3085                                        0,
3086                                        0,
3087                                 /*34*/ MatDuplicate_MPIAIJ,
3088                                        0,
3089                                        0,
3090                                        0,
3091                                        0,
3092                                 /*39*/ MatAXPY_MPIAIJ,
3093                                        MatGetSubMatrices_MPIAIJ,
3094                                        MatIncreaseOverlap_MPIAIJ,
3095                                        MatGetValues_MPIAIJ,
3096                                        MatCopy_MPIAIJ,
3097                                 /*44*/ MatGetRowMax_MPIAIJ,
3098                                        MatScale_MPIAIJ,
3099                                        0,
3100                                        0,
3101                                        MatZeroRowsColumns_MPIAIJ,
3102                                 /*49*/ MatSetRandom_MPIAIJ,
3103                                        0,
3104                                        0,
3105                                        0,
3106                                        0,
3107                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3108                                        0,
3109                                        MatSetUnfactored_MPIAIJ,
3110                                        MatPermute_MPIAIJ,
3111                                        0,
3112                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3113                                        MatDestroy_MPIAIJ,
3114                                        MatView_MPIAIJ,
3115                                        0,
3116                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3117                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3118                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3119                                        0,
3120                                        0,
3121                                        0,
3122                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3123                                        MatGetRowMinAbs_MPIAIJ,
3124                                        0,
3125                                        MatSetColoring_MPIAIJ,
3126                                        0,
3127                                        MatSetValuesAdifor_MPIAIJ,
3128                                 /*75*/ MatFDColoringApply_AIJ,
3129                                        0,
3130                                        0,
3131                                        0,
3132                                        MatFindZeroDiagonals_MPIAIJ,
3133                                 /*80*/ 0,
3134                                        0,
3135                                        0,
3136                                 /*83*/ MatLoad_MPIAIJ,
3137                                        0,
3138                                        0,
3139                                        0,
3140                                        0,
3141                                        0,
3142                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3143                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3144                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3145                                        MatPtAP_MPIAIJ_MPIAIJ,
3146                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3147                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3148                                        0,
3149                                        0,
3150                                        0,
3151                                        0,
3152                                 /*99*/ 0,
3153                                        0,
3154                                        0,
3155                                        MatConjugate_MPIAIJ,
3156                                        0,
3157                                 /*104*/MatSetValuesRow_MPIAIJ,
3158                                        MatRealPart_MPIAIJ,
3159                                        MatImaginaryPart_MPIAIJ,
3160                                        0,
3161                                        0,
3162                                 /*109*/0,
3163                                        MatGetRedundantMatrix_MPIAIJ,
3164                                        MatGetRowMin_MPIAIJ,
3165                                        0,
3166                                        0,
3167                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3168                                        0,
3169                                        0,
3170                                        0,
3171                                        0,
3172                                 /*119*/0,
3173                                        0,
3174                                        0,
3175                                        0,
3176                                        MatGetMultiProcBlock_MPIAIJ,
3177                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3178                                        MatGetColumnNorms_MPIAIJ,
3179                                        MatInvertBlockDiagonal_MPIAIJ,
3180                                        0,
3181                                        MatGetSubMatricesParallel_MPIAIJ,
3182                                 /*129*/0,
3183                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3184                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3185                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3186                                        0,
3187                                 /*134*/0,
3188                                        0,
3189                                        0,
3190                                        0,
3191                                        0,
3192                                 /*139*/0,
3193                                        0,
3194                                        0,
3195                                        MatFDColoringSetUp_MPIXAIJ
3196 };
3197 
3198 /* ----------------------------------------------------------------------------------------*/
3199 
3200 #undef __FUNCT__
3201 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3202 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3203 {
3204   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3205   PetscErrorCode ierr;
3206 
3207   PetscFunctionBegin;
3208   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3209   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3210   PetscFunctionReturn(0);
3211 }
3212 
3213 #undef __FUNCT__
3214 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3215 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3216 {
3217   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3218   PetscErrorCode ierr;
3219 
3220   PetscFunctionBegin;
3221   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3222   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3223   PetscFunctionReturn(0);
3224 }
3225 
3226 #undef __FUNCT__
3227 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3228 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3229 {
3230   Mat_MPIAIJ     *b;
3231   PetscErrorCode ierr;
3232 
3233   PetscFunctionBegin;
3234   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3235   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3236   b = (Mat_MPIAIJ*)B->data;
3237 
3238   if (!B->preallocated) {
3239     /* Explicitly create 2 MATSEQAIJ matrices. */
3240     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3241     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3242     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3243     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3244     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3245     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3246     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3247     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3248     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3249     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3250   }
3251 
3252   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3253   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3254   B->preallocated = PETSC_TRUE;
3255   PetscFunctionReturn(0);
3256 }
3257 
3258 #undef __FUNCT__
3259 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3260 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3261 {
3262   Mat            mat;
3263   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3264   PetscErrorCode ierr;
3265 
3266   PetscFunctionBegin;
3267   *newmat = 0;
3268   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3269   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3270   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3271   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3272   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3273   a       = (Mat_MPIAIJ*)mat->data;
3274 
3275   mat->factortype   = matin->factortype;
3276   mat->assembled    = PETSC_TRUE;
3277   mat->insertmode   = NOT_SET_VALUES;
3278   mat->preallocated = PETSC_TRUE;
3279 
3280   a->size         = oldmat->size;
3281   a->rank         = oldmat->rank;
3282   a->donotstash   = oldmat->donotstash;
3283   a->roworiented  = oldmat->roworiented;
3284   a->rowindices   = 0;
3285   a->rowvalues    = 0;
3286   a->getrowactive = PETSC_FALSE;
3287 
3288   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3289   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3290 
3291   if (oldmat->colmap) {
3292 #if defined(PETSC_USE_CTABLE)
3293     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3294 #else
3295     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3296     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3297     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3298 #endif
3299   } else a->colmap = 0;
3300   if (oldmat->garray) {
3301     PetscInt len;
3302     len  = oldmat->B->cmap->n;
3303     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3304     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3305     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3306   } else a->garray = 0;
3307 
3308   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3309   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3310   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3311   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3312   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3313   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3314   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3315   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3316   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3317   *newmat = mat;
3318   PetscFunctionReturn(0);
3319 }
3320 
3321 
3322 
3323 #undef __FUNCT__
3324 #define __FUNCT__ "MatLoad_MPIAIJ"
3325 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3326 {
3327   PetscScalar    *vals,*svals;
3328   MPI_Comm       comm;
3329   PetscErrorCode ierr;
3330   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3331   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3332   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3333   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3334   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3335   int            fd;
3336   PetscInt       bs = 1;
3337 
3338   PetscFunctionBegin;
3339   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3340   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3341   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3342   if (!rank) {
3343     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3344     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3345     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3346   }
3347 
3348   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3349   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3350   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3351 
3352   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3353 
3354   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3355   M    = header[1]; N = header[2];
3356   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3357   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3358   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3359 
3360   /* If global sizes are set, check if they are consistent with that given in the file */
3361   if (sizesset) {
3362     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3363   }
3364   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3365   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3366 
3367   /* determine ownership of all (block) rows */
3368   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3369   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3370   else m = newMat->rmap->n; /* Set by user */
3371 
3372   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3373   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3374 
3375   /* First process needs enough room for process with most rows */
3376   if (!rank) {
3377     mmax = rowners[1];
3378     for (i=2; i<=size; i++) {
3379       mmax = PetscMax(mmax, rowners[i]);
3380     }
3381   } else mmax = -1;             /* unused, but compilers complain */
3382 
3383   rowners[0] = 0;
3384   for (i=2; i<=size; i++) {
3385     rowners[i] += rowners[i-1];
3386   }
3387   rstart = rowners[rank];
3388   rend   = rowners[rank+1];
3389 
3390   /* distribute row lengths to all processors */
3391   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3392   if (!rank) {
3393     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3394     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3395     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3396     for (j=0; j<m; j++) {
3397       procsnz[0] += ourlens[j];
3398     }
3399     for (i=1; i<size; i++) {
3400       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3401       /* calculate the number of nonzeros on each processor */
3402       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3403         procsnz[i] += rowlengths[j];
3404       }
3405       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3406     }
3407     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3408   } else {
3409     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3410   }
3411 
3412   if (!rank) {
3413     /* determine max buffer needed and allocate it */
3414     maxnz = 0;
3415     for (i=0; i<size; i++) {
3416       maxnz = PetscMax(maxnz,procsnz[i]);
3417     }
3418     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3419 
3420     /* read in my part of the matrix column indices  */
3421     nz   = procsnz[0];
3422     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3423     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3424 
3425     /* read in every one elses and ship off */
3426     for (i=1; i<size; i++) {
3427       nz   = procsnz[i];
3428       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3429       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3430     }
3431     ierr = PetscFree(cols);CHKERRQ(ierr);
3432   } else {
3433     /* determine buffer space needed for message */
3434     nz = 0;
3435     for (i=0; i<m; i++) {
3436       nz += ourlens[i];
3437     }
3438     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3439 
3440     /* receive message of column indices*/
3441     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3442   }
3443 
3444   /* determine column ownership if matrix is not square */
3445   if (N != M) {
3446     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3447     else n = newMat->cmap->n;
3448     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3449     cstart = cend - n;
3450   } else {
3451     cstart = rstart;
3452     cend   = rend;
3453     n      = cend - cstart;
3454   }
3455 
3456   /* loop over local rows, determining number of off diagonal entries */
3457   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3458   jj   = 0;
3459   for (i=0; i<m; i++) {
3460     for (j=0; j<ourlens[i]; j++) {
3461       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3462       jj++;
3463     }
3464   }
3465 
3466   for (i=0; i<m; i++) {
3467     ourlens[i] -= offlens[i];
3468   }
3469   if (!sizesset) {
3470     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3471   }
3472 
3473   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3474 
3475   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3476 
3477   for (i=0; i<m; i++) {
3478     ourlens[i] += offlens[i];
3479   }
3480 
3481   if (!rank) {
3482     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3483 
3484     /* read in my part of the matrix numerical values  */
3485     nz   = procsnz[0];
3486     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3487 
3488     /* insert into matrix */
3489     jj      = rstart;
3490     smycols = mycols;
3491     svals   = vals;
3492     for (i=0; i<m; i++) {
3493       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3494       smycols += ourlens[i];
3495       svals   += ourlens[i];
3496       jj++;
3497     }
3498 
3499     /* read in other processors and ship out */
3500     for (i=1; i<size; i++) {
3501       nz   = procsnz[i];
3502       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3503       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3504     }
3505     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3506   } else {
3507     /* receive numeric values */
3508     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3509 
3510     /* receive message of values*/
3511     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3512 
3513     /* insert into matrix */
3514     jj      = rstart;
3515     smycols = mycols;
3516     svals   = vals;
3517     for (i=0; i<m; i++) {
3518       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3519       smycols += ourlens[i];
3520       svals   += ourlens[i];
3521       jj++;
3522     }
3523   }
3524   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3525   ierr = PetscFree(vals);CHKERRQ(ierr);
3526   ierr = PetscFree(mycols);CHKERRQ(ierr);
3527   ierr = PetscFree(rowners);CHKERRQ(ierr);
3528   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3529   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3530   PetscFunctionReturn(0);
3531 }
3532 
3533 #undef __FUNCT__
3534 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3535 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3536 {
3537   PetscErrorCode ierr;
3538   IS             iscol_local;
3539   PetscInt       csize;
3540 
3541   PetscFunctionBegin;
3542   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3543   if (call == MAT_REUSE_MATRIX) {
3544     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3545     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3546   } else {
3547     PetscInt cbs;
3548     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3549     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3550     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3551   }
3552   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3553   if (call == MAT_INITIAL_MATRIX) {
3554     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3555     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3556   }
3557   PetscFunctionReturn(0);
3558 }
3559 
3560 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3561 #undef __FUNCT__
3562 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3563 /*
3564     Not great since it makes two copies of the submatrix, first an SeqAIJ
3565   in local and then by concatenating the local matrices the end result.
3566   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3567 
3568   Note: This requires a sequential iscol with all indices.
3569 */
3570 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3571 {
3572   PetscErrorCode ierr;
3573   PetscMPIInt    rank,size;
3574   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3575   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3576   PetscBool      allcolumns, colflag;
3577   Mat            M,Mreuse;
3578   MatScalar      *vwork,*aa;
3579   MPI_Comm       comm;
3580   Mat_SeqAIJ     *aij;
3581 
3582   PetscFunctionBegin;
3583   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3584   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3585   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3586 
3587   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3588   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3589   if (colflag && ncol == mat->cmap->N) {
3590     allcolumns = PETSC_TRUE;
3591   } else {
3592     allcolumns = PETSC_FALSE;
3593   }
3594   if (call ==  MAT_REUSE_MATRIX) {
3595     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3596     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3597     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3598   } else {
3599     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3600   }
3601 
3602   /*
3603       m - number of local rows
3604       n - number of columns (same on all processors)
3605       rstart - first row in new global matrix generated
3606   */
3607   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3608   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3609   if (call == MAT_INITIAL_MATRIX) {
3610     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3611     ii  = aij->i;
3612     jj  = aij->j;
3613 
3614     /*
3615         Determine the number of non-zeros in the diagonal and off-diagonal
3616         portions of the matrix in order to do correct preallocation
3617     */
3618 
3619     /* first get start and end of "diagonal" columns */
3620     if (csize == PETSC_DECIDE) {
3621       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3622       if (mglobal == n) { /* square matrix */
3623         nlocal = m;
3624       } else {
3625         nlocal = n/size + ((n % size) > rank);
3626       }
3627     } else {
3628       nlocal = csize;
3629     }
3630     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3631     rstart = rend - nlocal;
3632     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3633 
3634     /* next, compute all the lengths */
3635     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3636     olens = dlens + m;
3637     for (i=0; i<m; i++) {
3638       jend = ii[i+1] - ii[i];
3639       olen = 0;
3640       dlen = 0;
3641       for (j=0; j<jend; j++) {
3642         if (*jj < rstart || *jj >= rend) olen++;
3643         else dlen++;
3644         jj++;
3645       }
3646       olens[i] = olen;
3647       dlens[i] = dlen;
3648     }
3649     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3650     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3651     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3652     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3653     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3654     ierr = PetscFree(dlens);CHKERRQ(ierr);
3655   } else {
3656     PetscInt ml,nl;
3657 
3658     M    = *newmat;
3659     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3660     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3661     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3662     /*
3663          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3664        rather than the slower MatSetValues().
3665     */
3666     M->was_assembled = PETSC_TRUE;
3667     M->assembled     = PETSC_FALSE;
3668   }
3669   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3670   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3671   ii   = aij->i;
3672   jj   = aij->j;
3673   aa   = aij->a;
3674   for (i=0; i<m; i++) {
3675     row   = rstart + i;
3676     nz    = ii[i+1] - ii[i];
3677     cwork = jj;     jj += nz;
3678     vwork = aa;     aa += nz;
3679     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3680   }
3681 
3682   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3683   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3684   *newmat = M;
3685 
3686   /* save submatrix used in processor for next request */
3687   if (call ==  MAT_INITIAL_MATRIX) {
3688     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3689     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3690   }
3691   PetscFunctionReturn(0);
3692 }
3693 
3694 #undef __FUNCT__
3695 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3696 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3697 {
3698   PetscInt       m,cstart, cend,j,nnz,i,d;
3699   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3700   const PetscInt *JJ;
3701   PetscScalar    *values;
3702   PetscErrorCode ierr;
3703 
3704   PetscFunctionBegin;
3705   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3706 
3707   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3708   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3709   m      = B->rmap->n;
3710   cstart = B->cmap->rstart;
3711   cend   = B->cmap->rend;
3712   rstart = B->rmap->rstart;
3713 
3714   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3715 
3716 #if defined(PETSC_USE_DEBUGGING)
3717   for (i=0; i<m; i++) {
3718     nnz = Ii[i+1]- Ii[i];
3719     JJ  = J + Ii[i];
3720     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3721     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3722     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3723   }
3724 #endif
3725 
3726   for (i=0; i<m; i++) {
3727     nnz     = Ii[i+1]- Ii[i];
3728     JJ      = J + Ii[i];
3729     nnz_max = PetscMax(nnz_max,nnz);
3730     d       = 0;
3731     for (j=0; j<nnz; j++) {
3732       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3733     }
3734     d_nnz[i] = d;
3735     o_nnz[i] = nnz - d;
3736   }
3737   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3738   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3739 
3740   if (v) values = (PetscScalar*)v;
3741   else {
3742     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3743   }
3744 
3745   for (i=0; i<m; i++) {
3746     ii   = i + rstart;
3747     nnz  = Ii[i+1]- Ii[i];
3748     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3749   }
3750   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3751   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3752 
3753   if (!v) {
3754     ierr = PetscFree(values);CHKERRQ(ierr);
3755   }
3756   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3757   PetscFunctionReturn(0);
3758 }
3759 
3760 #undef __FUNCT__
3761 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3762 /*@
3763    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3764    (the default parallel PETSc format).
3765 
3766    Collective on MPI_Comm
3767 
3768    Input Parameters:
3769 +  B - the matrix
3770 .  i - the indices into j for the start of each local row (starts with zero)
3771 .  j - the column indices for each local row (starts with zero)
3772 -  v - optional values in the matrix
3773 
3774    Level: developer
3775 
3776    Notes:
3777        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3778      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3779      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3780 
3781        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3782 
3783        The format which is used for the sparse matrix input, is equivalent to a
3784     row-major ordering.. i.e for the following matrix, the input data expected is
3785     as shown:
3786 
3787         1 0 0
3788         2 0 3     P0
3789        -------
3790         4 5 6     P1
3791 
3792      Process0 [P0]: rows_owned=[0,1]
3793         i =  {0,1,3}  [size = nrow+1  = 2+1]
3794         j =  {0,0,2}  [size = nz = 6]
3795         v =  {1,2,3}  [size = nz = 6]
3796 
3797      Process1 [P1]: rows_owned=[2]
3798         i =  {0,3}    [size = nrow+1  = 1+1]
3799         j =  {0,1,2}  [size = nz = 6]
3800         v =  {4,5,6}  [size = nz = 6]
3801 
3802 .keywords: matrix, aij, compressed row, sparse, parallel
3803 
3804 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3805           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3806 @*/
3807 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3808 {
3809   PetscErrorCode ierr;
3810 
3811   PetscFunctionBegin;
3812   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3813   PetscFunctionReturn(0);
3814 }
3815 
3816 #undef __FUNCT__
3817 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3818 /*@C
3819    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3820    (the default parallel PETSc format).  For good matrix assembly performance
3821    the user should preallocate the matrix storage by setting the parameters
3822    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3823    performance can be increased by more than a factor of 50.
3824 
3825    Collective on MPI_Comm
3826 
3827    Input Parameters:
3828 +  A - the matrix
3829 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3830            (same value is used for all local rows)
3831 .  d_nnz - array containing the number of nonzeros in the various rows of the
3832            DIAGONAL portion of the local submatrix (possibly different for each row)
3833            or NULL, if d_nz is used to specify the nonzero structure.
3834            The size of this array is equal to the number of local rows, i.e 'm'.
3835            For matrices that will be factored, you must leave room for (and set)
3836            the diagonal entry even if it is zero.
3837 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3838            submatrix (same value is used for all local rows).
3839 -  o_nnz - array containing the number of nonzeros in the various rows of the
3840            OFF-DIAGONAL portion of the local submatrix (possibly different for
3841            each row) or NULL, if o_nz is used to specify the nonzero
3842            structure. The size of this array is equal to the number
3843            of local rows, i.e 'm'.
3844 
3845    If the *_nnz parameter is given then the *_nz parameter is ignored
3846 
3847    The AIJ format (also called the Yale sparse matrix format or
3848    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3849    storage.  The stored row and column indices begin with zero.
3850    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3851 
3852    The parallel matrix is partitioned such that the first m0 rows belong to
3853    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3854    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3855 
3856    The DIAGONAL portion of the local submatrix of a processor can be defined
3857    as the submatrix which is obtained by extraction the part corresponding to
3858    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3859    first row that belongs to the processor, r2 is the last row belonging to
3860    the this processor, and c1-c2 is range of indices of the local part of a
3861    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3862    common case of a square matrix, the row and column ranges are the same and
3863    the DIAGONAL part is also square. The remaining portion of the local
3864    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3865 
3866    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3867 
3868    You can call MatGetInfo() to get information on how effective the preallocation was;
3869    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3870    You can also run with the option -info and look for messages with the string
3871    malloc in them to see if additional memory allocation was needed.
3872 
3873    Example usage:
3874 
3875    Consider the following 8x8 matrix with 34 non-zero values, that is
3876    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3877    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3878    as follows:
3879 
3880 .vb
3881             1  2  0  |  0  3  0  |  0  4
3882     Proc0   0  5  6  |  7  0  0  |  8  0
3883             9  0 10  | 11  0  0  | 12  0
3884     -------------------------------------
3885            13  0 14  | 15 16 17  |  0  0
3886     Proc1   0 18  0  | 19 20 21  |  0  0
3887             0  0  0  | 22 23  0  | 24  0
3888     -------------------------------------
3889     Proc2  25 26 27  |  0  0 28  | 29  0
3890            30  0  0  | 31 32 33  |  0 34
3891 .ve
3892 
3893    This can be represented as a collection of submatrices as:
3894 
3895 .vb
3896       A B C
3897       D E F
3898       G H I
3899 .ve
3900 
3901    Where the submatrices A,B,C are owned by proc0, D,E,F are
3902    owned by proc1, G,H,I are owned by proc2.
3903 
3904    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3905    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3906    The 'M','N' parameters are 8,8, and have the same values on all procs.
3907 
3908    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3909    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3910    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3911    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3912    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3913    matrix, ans [DF] as another SeqAIJ matrix.
3914 
3915    When d_nz, o_nz parameters are specified, d_nz storage elements are
3916    allocated for every row of the local diagonal submatrix, and o_nz
3917    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3918    One way to choose d_nz and o_nz is to use the max nonzerors per local
3919    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3920    In this case, the values of d_nz,o_nz are:
3921 .vb
3922      proc0 : dnz = 2, o_nz = 2
3923      proc1 : dnz = 3, o_nz = 2
3924      proc2 : dnz = 1, o_nz = 4
3925 .ve
3926    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3927    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3928    for proc3. i.e we are using 12+15+10=37 storage locations to store
3929    34 values.
3930 
3931    When d_nnz, o_nnz parameters are specified, the storage is specified
3932    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3933    In the above case the values for d_nnz,o_nnz are:
3934 .vb
3935      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3936      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3937      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3938 .ve
3939    Here the space allocated is sum of all the above values i.e 34, and
3940    hence pre-allocation is perfect.
3941 
3942    Level: intermediate
3943 
3944 .keywords: matrix, aij, compressed row, sparse, parallel
3945 
3946 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3947           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3948 @*/
3949 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3950 {
3951   PetscErrorCode ierr;
3952 
3953   PetscFunctionBegin;
3954   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3955   PetscValidType(B,1);
3956   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3957   PetscFunctionReturn(0);
3958 }
3959 
3960 #undef __FUNCT__
3961 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3962 /*@
3963      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3964          CSR format the local rows.
3965 
3966    Collective on MPI_Comm
3967 
3968    Input Parameters:
3969 +  comm - MPI communicator
3970 .  m - number of local rows (Cannot be PETSC_DECIDE)
3971 .  n - This value should be the same as the local size used in creating the
3972        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3973        calculated if N is given) For square matrices n is almost always m.
3974 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3975 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3976 .   i - row indices
3977 .   j - column indices
3978 -   a - matrix values
3979 
3980    Output Parameter:
3981 .   mat - the matrix
3982 
3983    Level: intermediate
3984 
3985    Notes:
3986        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3987      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3988      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3989 
3990        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3991 
3992        The format which is used for the sparse matrix input, is equivalent to a
3993     row-major ordering.. i.e for the following matrix, the input data expected is
3994     as shown:
3995 
3996         1 0 0
3997         2 0 3     P0
3998        -------
3999         4 5 6     P1
4000 
4001      Process0 [P0]: rows_owned=[0,1]
4002         i =  {0,1,3}  [size = nrow+1  = 2+1]
4003         j =  {0,0,2}  [size = nz = 6]
4004         v =  {1,2,3}  [size = nz = 6]
4005 
4006      Process1 [P1]: rows_owned=[2]
4007         i =  {0,3}    [size = nrow+1  = 1+1]
4008         j =  {0,1,2}  [size = nz = 6]
4009         v =  {4,5,6}  [size = nz = 6]
4010 
4011 .keywords: matrix, aij, compressed row, sparse, parallel
4012 
4013 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4014           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4015 @*/
4016 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4017 {
4018   PetscErrorCode ierr;
4019 
4020   PetscFunctionBegin;
4021   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4022   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4023   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4024   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4025   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4026   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4027   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4028   PetscFunctionReturn(0);
4029 }
4030 
4031 #undef __FUNCT__
4032 #define __FUNCT__ "MatCreateAIJ"
4033 /*@C
4034    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4035    (the default parallel PETSc format).  For good matrix assembly performance
4036    the user should preallocate the matrix storage by setting the parameters
4037    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4038    performance can be increased by more than a factor of 50.
4039 
4040    Collective on MPI_Comm
4041 
4042    Input Parameters:
4043 +  comm - MPI communicator
4044 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4045            This value should be the same as the local size used in creating the
4046            y vector for the matrix-vector product y = Ax.
4047 .  n - This value should be the same as the local size used in creating the
4048        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4049        calculated if N is given) For square matrices n is almost always m.
4050 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4051 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4052 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4053            (same value is used for all local rows)
4054 .  d_nnz - array containing the number of nonzeros in the various rows of the
4055            DIAGONAL portion of the local submatrix (possibly different for each row)
4056            or NULL, if d_nz is used to specify the nonzero structure.
4057            The size of this array is equal to the number of local rows, i.e 'm'.
4058 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4059            submatrix (same value is used for all local rows).
4060 -  o_nnz - array containing the number of nonzeros in the various rows of the
4061            OFF-DIAGONAL portion of the local submatrix (possibly different for
4062            each row) or NULL, if o_nz is used to specify the nonzero
4063            structure. The size of this array is equal to the number
4064            of local rows, i.e 'm'.
4065 
4066    Output Parameter:
4067 .  A - the matrix
4068 
4069    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4070    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4071    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4072 
4073    Notes:
4074    If the *_nnz parameter is given then the *_nz parameter is ignored
4075 
4076    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4077    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4078    storage requirements for this matrix.
4079 
4080    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4081    processor than it must be used on all processors that share the object for
4082    that argument.
4083 
4084    The user MUST specify either the local or global matrix dimensions
4085    (possibly both).
4086 
4087    The parallel matrix is partitioned across processors such that the
4088    first m0 rows belong to process 0, the next m1 rows belong to
4089    process 1, the next m2 rows belong to process 2 etc.. where
4090    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4091    values corresponding to [m x N] submatrix.
4092 
4093    The columns are logically partitioned with the n0 columns belonging
4094    to 0th partition, the next n1 columns belonging to the next
4095    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4096 
4097    The DIAGONAL portion of the local submatrix on any given processor
4098    is the submatrix corresponding to the rows and columns m,n
4099    corresponding to the given processor. i.e diagonal matrix on
4100    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4101    etc. The remaining portion of the local submatrix [m x (N-n)]
4102    constitute the OFF-DIAGONAL portion. The example below better
4103    illustrates this concept.
4104 
4105    For a square global matrix we define each processor's diagonal portion
4106    to be its local rows and the corresponding columns (a square submatrix);
4107    each processor's off-diagonal portion encompasses the remainder of the
4108    local matrix (a rectangular submatrix).
4109 
4110    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4111 
4112    When calling this routine with a single process communicator, a matrix of
4113    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4114    type of communicator, use the construction mechanism:
4115      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4116 
4117    By default, this format uses inodes (identical nodes) when possible.
4118    We search for consecutive rows with the same nonzero structure, thereby
4119    reusing matrix information to achieve increased efficiency.
4120 
4121    Options Database Keys:
4122 +  -mat_no_inode  - Do not use inodes
4123 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4124 -  -mat_aij_oneindex - Internally use indexing starting at 1
4125         rather than 0.  Note that when calling MatSetValues(),
4126         the user still MUST index entries starting at 0!
4127 
4128 
4129    Example usage:
4130 
4131    Consider the following 8x8 matrix with 34 non-zero values, that is
4132    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4133    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4134    as follows:
4135 
4136 .vb
4137             1  2  0  |  0  3  0  |  0  4
4138     Proc0   0  5  6  |  7  0  0  |  8  0
4139             9  0 10  | 11  0  0  | 12  0
4140     -------------------------------------
4141            13  0 14  | 15 16 17  |  0  0
4142     Proc1   0 18  0  | 19 20 21  |  0  0
4143             0  0  0  | 22 23  0  | 24  0
4144     -------------------------------------
4145     Proc2  25 26 27  |  0  0 28  | 29  0
4146            30  0  0  | 31 32 33  |  0 34
4147 .ve
4148 
4149    This can be represented as a collection of submatrices as:
4150 
4151 .vb
4152       A B C
4153       D E F
4154       G H I
4155 .ve
4156 
4157    Where the submatrices A,B,C are owned by proc0, D,E,F are
4158    owned by proc1, G,H,I are owned by proc2.
4159 
4160    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4161    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4162    The 'M','N' parameters are 8,8, and have the same values on all procs.
4163 
4164    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4165    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4166    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4167    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4168    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4169    matrix, ans [DF] as another SeqAIJ matrix.
4170 
4171    When d_nz, o_nz parameters are specified, d_nz storage elements are
4172    allocated for every row of the local diagonal submatrix, and o_nz
4173    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4174    One way to choose d_nz and o_nz is to use the max nonzerors per local
4175    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4176    In this case, the values of d_nz,o_nz are:
4177 .vb
4178      proc0 : dnz = 2, o_nz = 2
4179      proc1 : dnz = 3, o_nz = 2
4180      proc2 : dnz = 1, o_nz = 4
4181 .ve
4182    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4183    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4184    for proc3. i.e we are using 12+15+10=37 storage locations to store
4185    34 values.
4186 
4187    When d_nnz, o_nnz parameters are specified, the storage is specified
4188    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4189    In the above case the values for d_nnz,o_nnz are:
4190 .vb
4191      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4192      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4193      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4194 .ve
4195    Here the space allocated is sum of all the above values i.e 34, and
4196    hence pre-allocation is perfect.
4197 
4198    Level: intermediate
4199 
4200 .keywords: matrix, aij, compressed row, sparse, parallel
4201 
4202 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4203           MPIAIJ, MatCreateMPIAIJWithArrays()
4204 @*/
4205 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4206 {
4207   PetscErrorCode ierr;
4208   PetscMPIInt    size;
4209 
4210   PetscFunctionBegin;
4211   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4212   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4213   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4214   if (size > 1) {
4215     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4216     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4217   } else {
4218     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4219     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4220   }
4221   PetscFunctionReturn(0);
4222 }
4223 
4224 #undef __FUNCT__
4225 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4226 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4227 {
4228   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4229 
4230   PetscFunctionBegin;
4231   *Ad     = a->A;
4232   *Ao     = a->B;
4233   *colmap = a->garray;
4234   PetscFunctionReturn(0);
4235 }
4236 
4237 #undef __FUNCT__
4238 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4239 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4240 {
4241   PetscErrorCode ierr;
4242   PetscInt       i;
4243   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4244 
4245   PetscFunctionBegin;
4246   if (coloring->ctype == IS_COLORING_GLOBAL) {
4247     ISColoringValue *allcolors,*colors;
4248     ISColoring      ocoloring;
4249 
4250     /* set coloring for diagonal portion */
4251     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4252 
4253     /* set coloring for off-diagonal portion */
4254     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4255     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4256     for (i=0; i<a->B->cmap->n; i++) {
4257       colors[i] = allcolors[a->garray[i]];
4258     }
4259     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4260     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4261     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4262     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4263   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4264     ISColoringValue *colors;
4265     PetscInt        *larray;
4266     ISColoring      ocoloring;
4267 
4268     /* set coloring for diagonal portion */
4269     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4270     for (i=0; i<a->A->cmap->n; i++) {
4271       larray[i] = i + A->cmap->rstart;
4272     }
4273     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4274     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4275     for (i=0; i<a->A->cmap->n; i++) {
4276       colors[i] = coloring->colors[larray[i]];
4277     }
4278     ierr = PetscFree(larray);CHKERRQ(ierr);
4279     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4280     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4281     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4282 
4283     /* set coloring for off-diagonal portion */
4284     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4285     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4286     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4287     for (i=0; i<a->B->cmap->n; i++) {
4288       colors[i] = coloring->colors[larray[i]];
4289     }
4290     ierr = PetscFree(larray);CHKERRQ(ierr);
4291     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4292     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4293     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4294   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4295   PetscFunctionReturn(0);
4296 }
4297 
4298 #undef __FUNCT__
4299 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4300 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4301 {
4302   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4303   PetscErrorCode ierr;
4304 
4305   PetscFunctionBegin;
4306   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4307   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4308   PetscFunctionReturn(0);
4309 }
4310 
4311 #undef __FUNCT__
4312 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4313 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4314 {
4315   PetscErrorCode ierr;
4316   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4317   PetscInt       *indx;
4318 
4319   PetscFunctionBegin;
4320   /* This routine will ONLY return MPIAIJ type matrix */
4321   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4322   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4323   if (n == PETSC_DECIDE) {
4324     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4325   }
4326   /* Check sum(n) = N */
4327   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4328   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4329 
4330   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4331   rstart -= m;
4332 
4333   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4334   for (i=0; i<m; i++) {
4335     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4336     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4337     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4338   }
4339 
4340   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4341   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4342   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4343   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4344   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4345   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4346   PetscFunctionReturn(0);
4347 }
4348 
4349 #undef __FUNCT__
4350 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4351 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4352 {
4353   PetscErrorCode ierr;
4354   PetscInt       m,N,i,rstart,nnz,Ii;
4355   PetscInt       *indx;
4356   PetscScalar    *values;
4357 
4358   PetscFunctionBegin;
4359   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4360   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4361   for (i=0; i<m; i++) {
4362     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4363     Ii   = i + rstart;
4364     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4365     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4366   }
4367   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4368   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4369   PetscFunctionReturn(0);
4370 }
4371 
4372 #undef __FUNCT__
4373 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4374 /*@
4375       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4376                  matrices from each processor
4377 
4378     Collective on MPI_Comm
4379 
4380    Input Parameters:
4381 +    comm - the communicators the parallel matrix will live on
4382 .    inmat - the input sequential matrices
4383 .    n - number of local columns (or PETSC_DECIDE)
4384 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4385 
4386    Output Parameter:
4387 .    outmat - the parallel matrix generated
4388 
4389     Level: advanced
4390 
4391    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4392 
4393 @*/
4394 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4395 {
4396   PetscErrorCode ierr;
4397   PetscMPIInt    size;
4398 
4399   PetscFunctionBegin;
4400   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4401   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4402   if (size == 1) {
4403     if (scall == MAT_INITIAL_MATRIX) {
4404       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4405     } else {
4406       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4407     }
4408   } else {
4409     if (scall == MAT_INITIAL_MATRIX) {
4410       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4411     }
4412     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4413   }
4414   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4415   PetscFunctionReturn(0);
4416 }
4417 
4418 #undef __FUNCT__
4419 #define __FUNCT__ "MatFileSplit"
4420 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4421 {
4422   PetscErrorCode    ierr;
4423   PetscMPIInt       rank;
4424   PetscInt          m,N,i,rstart,nnz;
4425   size_t            len;
4426   const PetscInt    *indx;
4427   PetscViewer       out;
4428   char              *name;
4429   Mat               B;
4430   const PetscScalar *values;
4431 
4432   PetscFunctionBegin;
4433   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4434   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4435   /* Should this be the type of the diagonal block of A? */
4436   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4437   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4438   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4439   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4440   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4441   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4442   for (i=0; i<m; i++) {
4443     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4444     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4445     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4446   }
4447   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4448   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4449 
4450   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4451   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4452   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4453   sprintf(name,"%s.%d",outfile,rank);
4454   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4455   ierr = PetscFree(name);CHKERRQ(ierr);
4456   ierr = MatView(B,out);CHKERRQ(ierr);
4457   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4458   ierr = MatDestroy(&B);CHKERRQ(ierr);
4459   PetscFunctionReturn(0);
4460 }
4461 
4462 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4463 #undef __FUNCT__
4464 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4465 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4466 {
4467   PetscErrorCode      ierr;
4468   Mat_Merge_SeqsToMPI *merge;
4469   PetscContainer      container;
4470 
4471   PetscFunctionBegin;
4472   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4473   if (container) {
4474     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4475     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4476     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4477     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4478     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4487     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4488     ierr = PetscFree(merge);CHKERRQ(ierr);
4489     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4490   }
4491   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4492   PetscFunctionReturn(0);
4493 }
4494 
4495 #include <../src/mat/utils/freespace.h>
4496 #include <petscbt.h>
4497 
4498 #undef __FUNCT__
4499 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4500 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4501 {
4502   PetscErrorCode      ierr;
4503   MPI_Comm            comm;
4504   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4505   PetscMPIInt         size,rank,taga,*len_s;
4506   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4507   PetscInt            proc,m;
4508   PetscInt            **buf_ri,**buf_rj;
4509   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4510   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4511   MPI_Request         *s_waits,*r_waits;
4512   MPI_Status          *status;
4513   MatScalar           *aa=a->a;
4514   MatScalar           **abuf_r,*ba_i;
4515   Mat_Merge_SeqsToMPI *merge;
4516   PetscContainer      container;
4517 
4518   PetscFunctionBegin;
4519   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4520   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4521 
4522   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4523   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4524 
4525   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4526   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4527 
4528   bi     = merge->bi;
4529   bj     = merge->bj;
4530   buf_ri = merge->buf_ri;
4531   buf_rj = merge->buf_rj;
4532 
4533   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4534   owners = merge->rowmap->range;
4535   len_s  = merge->len_s;
4536 
4537   /* send and recv matrix values */
4538   /*-----------------------------*/
4539   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4540   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4541 
4542   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4543   for (proc=0,k=0; proc<size; proc++) {
4544     if (!len_s[proc]) continue;
4545     i    = owners[proc];
4546     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4547     k++;
4548   }
4549 
4550   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4551   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4552   ierr = PetscFree(status);CHKERRQ(ierr);
4553 
4554   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4555   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4556 
4557   /* insert mat values of mpimat */
4558   /*----------------------------*/
4559   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4560   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4561 
4562   for (k=0; k<merge->nrecv; k++) {
4563     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4564     nrows       = *(buf_ri_k[k]);
4565     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4566     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4567   }
4568 
4569   /* set values of ba */
4570   m = merge->rowmap->n;
4571   for (i=0; i<m; i++) {
4572     arow = owners[rank] + i;
4573     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4574     bnzi = bi[i+1] - bi[i];
4575     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4576 
4577     /* add local non-zero vals of this proc's seqmat into ba */
4578     anzi   = ai[arow+1] - ai[arow];
4579     aj     = a->j + ai[arow];
4580     aa     = a->a + ai[arow];
4581     nextaj = 0;
4582     for (j=0; nextaj<anzi; j++) {
4583       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4584         ba_i[j] += aa[nextaj++];
4585       }
4586     }
4587 
4588     /* add received vals into ba */
4589     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4590       /* i-th row */
4591       if (i == *nextrow[k]) {
4592         anzi   = *(nextai[k]+1) - *nextai[k];
4593         aj     = buf_rj[k] + *(nextai[k]);
4594         aa     = abuf_r[k] + *(nextai[k]);
4595         nextaj = 0;
4596         for (j=0; nextaj<anzi; j++) {
4597           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4598             ba_i[j] += aa[nextaj++];
4599           }
4600         }
4601         nextrow[k]++; nextai[k]++;
4602       }
4603     }
4604     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4605   }
4606   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4607   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4608 
4609   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4610   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4611   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4612   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4613   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4614   PetscFunctionReturn(0);
4615 }
4616 
4617 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4618 
4619 #undef __FUNCT__
4620 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4621 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4622 {
4623   PetscErrorCode      ierr;
4624   Mat                 B_mpi;
4625   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4626   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4627   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4628   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4629   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4630   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4631   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4632   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4633   MPI_Status          *status;
4634   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4635   PetscBT             lnkbt;
4636   Mat_Merge_SeqsToMPI *merge;
4637   PetscContainer      container;
4638 
4639   PetscFunctionBegin;
4640   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4641 
4642   /* make sure it is a PETSc comm */
4643   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4644   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4645   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4646 
4647   ierr = PetscNew(&merge);CHKERRQ(ierr);
4648   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4649 
4650   /* determine row ownership */
4651   /*---------------------------------------------------------*/
4652   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4653   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4654   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4655   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4656   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4657   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4658   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4659 
4660   m      = merge->rowmap->n;
4661   owners = merge->rowmap->range;
4662 
4663   /* determine the number of messages to send, their lengths */
4664   /*---------------------------------------------------------*/
4665   len_s = merge->len_s;
4666 
4667   len          = 0; /* length of buf_si[] */
4668   merge->nsend = 0;
4669   for (proc=0; proc<size; proc++) {
4670     len_si[proc] = 0;
4671     if (proc == rank) {
4672       len_s[proc] = 0;
4673     } else {
4674       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4675       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4676     }
4677     if (len_s[proc]) {
4678       merge->nsend++;
4679       nrows = 0;
4680       for (i=owners[proc]; i<owners[proc+1]; i++) {
4681         if (ai[i+1] > ai[i]) nrows++;
4682       }
4683       len_si[proc] = 2*(nrows+1);
4684       len         += len_si[proc];
4685     }
4686   }
4687 
4688   /* determine the number and length of messages to receive for ij-structure */
4689   /*-------------------------------------------------------------------------*/
4690   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4691   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4692 
4693   /* post the Irecv of j-structure */
4694   /*-------------------------------*/
4695   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4696   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4697 
4698   /* post the Isend of j-structure */
4699   /*--------------------------------*/
4700   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4701 
4702   for (proc=0, k=0; proc<size; proc++) {
4703     if (!len_s[proc]) continue;
4704     i    = owners[proc];
4705     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4706     k++;
4707   }
4708 
4709   /* receives and sends of j-structure are complete */
4710   /*------------------------------------------------*/
4711   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4712   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4713 
4714   /* send and recv i-structure */
4715   /*---------------------------*/
4716   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4717   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4718 
4719   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4720   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4721   for (proc=0,k=0; proc<size; proc++) {
4722     if (!len_s[proc]) continue;
4723     /* form outgoing message for i-structure:
4724          buf_si[0]:                 nrows to be sent
4725                [1:nrows]:           row index (global)
4726                [nrows+1:2*nrows+1]: i-structure index
4727     */
4728     /*-------------------------------------------*/
4729     nrows       = len_si[proc]/2 - 1;
4730     buf_si_i    = buf_si + nrows+1;
4731     buf_si[0]   = nrows;
4732     buf_si_i[0] = 0;
4733     nrows       = 0;
4734     for (i=owners[proc]; i<owners[proc+1]; i++) {
4735       anzi = ai[i+1] - ai[i];
4736       if (anzi) {
4737         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4738         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4739         nrows++;
4740       }
4741     }
4742     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4743     k++;
4744     buf_si += len_si[proc];
4745   }
4746 
4747   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4748   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4749 
4750   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4751   for (i=0; i<merge->nrecv; i++) {
4752     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4753   }
4754 
4755   ierr = PetscFree(len_si);CHKERRQ(ierr);
4756   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4757   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4758   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4759   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4760   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4761   ierr = PetscFree(status);CHKERRQ(ierr);
4762 
4763   /* compute a local seq matrix in each processor */
4764   /*----------------------------------------------*/
4765   /* allocate bi array and free space for accumulating nonzero column info */
4766   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4767   bi[0] = 0;
4768 
4769   /* create and initialize a linked list */
4770   nlnk = N+1;
4771   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4772 
4773   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4774   len  = ai[owners[rank+1]] - ai[owners[rank]];
4775   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4776 
4777   current_space = free_space;
4778 
4779   /* determine symbolic info for each local row */
4780   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4781 
4782   for (k=0; k<merge->nrecv; k++) {
4783     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4784     nrows       = *buf_ri_k[k];
4785     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4786     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4787   }
4788 
4789   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4790   len  = 0;
4791   for (i=0; i<m; i++) {
4792     bnzi = 0;
4793     /* add local non-zero cols of this proc's seqmat into lnk */
4794     arow  = owners[rank] + i;
4795     anzi  = ai[arow+1] - ai[arow];
4796     aj    = a->j + ai[arow];
4797     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4798     bnzi += nlnk;
4799     /* add received col data into lnk */
4800     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4801       if (i == *nextrow[k]) { /* i-th row */
4802         anzi  = *(nextai[k]+1) - *nextai[k];
4803         aj    = buf_rj[k] + *nextai[k];
4804         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4805         bnzi += nlnk;
4806         nextrow[k]++; nextai[k]++;
4807       }
4808     }
4809     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4810 
4811     /* if free space is not available, make more free space */
4812     if (current_space->local_remaining<bnzi) {
4813       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4814       nspacedouble++;
4815     }
4816     /* copy data into free space, then initialize lnk */
4817     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4818     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4819 
4820     current_space->array           += bnzi;
4821     current_space->local_used      += bnzi;
4822     current_space->local_remaining -= bnzi;
4823 
4824     bi[i+1] = bi[i] + bnzi;
4825   }
4826 
4827   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4828 
4829   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4830   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4831   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4832 
4833   /* create symbolic parallel matrix B_mpi */
4834   /*---------------------------------------*/
4835   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4836   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4837   if (n==PETSC_DECIDE) {
4838     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4839   } else {
4840     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4841   }
4842   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4843   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4844   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4845   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4846   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4847 
4848   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4849   B_mpi->assembled    = PETSC_FALSE;
4850   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4851   merge->bi           = bi;
4852   merge->bj           = bj;
4853   merge->buf_ri       = buf_ri;
4854   merge->buf_rj       = buf_rj;
4855   merge->coi          = NULL;
4856   merge->coj          = NULL;
4857   merge->owners_co    = NULL;
4858 
4859   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4860 
4861   /* attach the supporting struct to B_mpi for reuse */
4862   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4863   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4864   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4865   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4866   *mpimat = B_mpi;
4867 
4868   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4869   PetscFunctionReturn(0);
4870 }
4871 
4872 #undef __FUNCT__
4873 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4874 /*@C
4875       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4876                  matrices from each processor
4877 
4878     Collective on MPI_Comm
4879 
4880    Input Parameters:
4881 +    comm - the communicators the parallel matrix will live on
4882 .    seqmat - the input sequential matrices
4883 .    m - number of local rows (or PETSC_DECIDE)
4884 .    n - number of local columns (or PETSC_DECIDE)
4885 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4886 
4887    Output Parameter:
4888 .    mpimat - the parallel matrix generated
4889 
4890     Level: advanced
4891 
4892    Notes:
4893      The dimensions of the sequential matrix in each processor MUST be the same.
4894      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4895      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4896 @*/
4897 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4898 {
4899   PetscErrorCode ierr;
4900   PetscMPIInt    size;
4901 
4902   PetscFunctionBegin;
4903   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4904   if (size == 1) {
4905     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4906     if (scall == MAT_INITIAL_MATRIX) {
4907       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4908     } else {
4909       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4910     }
4911     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4912     PetscFunctionReturn(0);
4913   }
4914   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4915   if (scall == MAT_INITIAL_MATRIX) {
4916     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4917   }
4918   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4919   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4920   PetscFunctionReturn(0);
4921 }
4922 
4923 #undef __FUNCT__
4924 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4925 /*@
4926      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4927           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4928           with MatGetSize()
4929 
4930     Not Collective
4931 
4932    Input Parameters:
4933 +    A - the matrix
4934 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4935 
4936    Output Parameter:
4937 .    A_loc - the local sequential matrix generated
4938 
4939     Level: developer
4940 
4941 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4942 
4943 @*/
4944 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4945 {
4946   PetscErrorCode ierr;
4947   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4948   Mat_SeqAIJ     *mat,*a,*b;
4949   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4950   MatScalar      *aa,*ba,*cam;
4951   PetscScalar    *ca;
4952   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4953   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4954   PetscBool      match;
4955 
4956   PetscFunctionBegin;
4957   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4958   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4959   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4960   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4961   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4962   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4963   aa = a->a; ba = b->a;
4964   if (scall == MAT_INITIAL_MATRIX) {
4965     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4966     ci[0] = 0;
4967     for (i=0; i<am; i++) {
4968       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4969     }
4970     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4971     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4972     k    = 0;
4973     for (i=0; i<am; i++) {
4974       ncols_o = bi[i+1] - bi[i];
4975       ncols_d = ai[i+1] - ai[i];
4976       /* off-diagonal portion of A */
4977       for (jo=0; jo<ncols_o; jo++) {
4978         col = cmap[*bj];
4979         if (col >= cstart) break;
4980         cj[k]   = col; bj++;
4981         ca[k++] = *ba++;
4982       }
4983       /* diagonal portion of A */
4984       for (j=0; j<ncols_d; j++) {
4985         cj[k]   = cstart + *aj++;
4986         ca[k++] = *aa++;
4987       }
4988       /* off-diagonal portion of A */
4989       for (j=jo; j<ncols_o; j++) {
4990         cj[k]   = cmap[*bj++];
4991         ca[k++] = *ba++;
4992       }
4993     }
4994     /* put together the new matrix */
4995     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
4996     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
4997     /* Since these are PETSc arrays, change flags to free them as necessary. */
4998     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
4999     mat->free_a  = PETSC_TRUE;
5000     mat->free_ij = PETSC_TRUE;
5001     mat->nonew   = 0;
5002   } else if (scall == MAT_REUSE_MATRIX) {
5003     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5004     ci = mat->i; cj = mat->j; cam = mat->a;
5005     for (i=0; i<am; i++) {
5006       /* off-diagonal portion of A */
5007       ncols_o = bi[i+1] - bi[i];
5008       for (jo=0; jo<ncols_o; jo++) {
5009         col = cmap[*bj];
5010         if (col >= cstart) break;
5011         *cam++ = *ba++; bj++;
5012       }
5013       /* diagonal portion of A */
5014       ncols_d = ai[i+1] - ai[i];
5015       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5016       /* off-diagonal portion of A */
5017       for (j=jo; j<ncols_o; j++) {
5018         *cam++ = *ba++; bj++;
5019       }
5020     }
5021   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5022   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5023   PetscFunctionReturn(0);
5024 }
5025 
5026 #undef __FUNCT__
5027 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5028 /*@C
5029      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5030 
5031     Not Collective
5032 
5033    Input Parameters:
5034 +    A - the matrix
5035 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5036 -    row, col - index sets of rows and columns to extract (or NULL)
5037 
5038    Output Parameter:
5039 .    A_loc - the local sequential matrix generated
5040 
5041     Level: developer
5042 
5043 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5044 
5045 @*/
5046 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5047 {
5048   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5049   PetscErrorCode ierr;
5050   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5051   IS             isrowa,iscola;
5052   Mat            *aloc;
5053   PetscBool      match;
5054 
5055   PetscFunctionBegin;
5056   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5057   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5058   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5059   if (!row) {
5060     start = A->rmap->rstart; end = A->rmap->rend;
5061     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5062   } else {
5063     isrowa = *row;
5064   }
5065   if (!col) {
5066     start = A->cmap->rstart;
5067     cmap  = a->garray;
5068     nzA   = a->A->cmap->n;
5069     nzB   = a->B->cmap->n;
5070     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5071     ncols = 0;
5072     for (i=0; i<nzB; i++) {
5073       if (cmap[i] < start) idx[ncols++] = cmap[i];
5074       else break;
5075     }
5076     imark = i;
5077     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5078     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5079     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5080   } else {
5081     iscola = *col;
5082   }
5083   if (scall != MAT_INITIAL_MATRIX) {
5084     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5085     aloc[0] = *A_loc;
5086   }
5087   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5088   *A_loc = aloc[0];
5089   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5090   if (!row) {
5091     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5092   }
5093   if (!col) {
5094     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5095   }
5096   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5097   PetscFunctionReturn(0);
5098 }
5099 
5100 #undef __FUNCT__
5101 #define __FUNCT__ "MatGetBrowsOfAcols"
5102 /*@C
5103     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5104 
5105     Collective on Mat
5106 
5107    Input Parameters:
5108 +    A,B - the matrices in mpiaij format
5109 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5110 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5111 
5112    Output Parameter:
5113 +    rowb, colb - index sets of rows and columns of B to extract
5114 -    B_seq - the sequential matrix generated
5115 
5116     Level: developer
5117 
5118 @*/
5119 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5120 {
5121   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5122   PetscErrorCode ierr;
5123   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5124   IS             isrowb,iscolb;
5125   Mat            *bseq=NULL;
5126 
5127   PetscFunctionBegin;
5128   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5129     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5130   }
5131   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5132 
5133   if (scall == MAT_INITIAL_MATRIX) {
5134     start = A->cmap->rstart;
5135     cmap  = a->garray;
5136     nzA   = a->A->cmap->n;
5137     nzB   = a->B->cmap->n;
5138     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5139     ncols = 0;
5140     for (i=0; i<nzB; i++) {  /* row < local row index */
5141       if (cmap[i] < start) idx[ncols++] = cmap[i];
5142       else break;
5143     }
5144     imark = i;
5145     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5146     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5147     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5148     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5149   } else {
5150     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5151     isrowb  = *rowb; iscolb = *colb;
5152     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5153     bseq[0] = *B_seq;
5154   }
5155   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5156   *B_seq = bseq[0];
5157   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5158   if (!rowb) {
5159     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5160   } else {
5161     *rowb = isrowb;
5162   }
5163   if (!colb) {
5164     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5165   } else {
5166     *colb = iscolb;
5167   }
5168   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5169   PetscFunctionReturn(0);
5170 }
5171 
5172 #undef __FUNCT__
5173 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5174 /*
5175     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5176     of the OFF-DIAGONAL portion of local A
5177 
5178     Collective on Mat
5179 
5180    Input Parameters:
5181 +    A,B - the matrices in mpiaij format
5182 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5183 
5184    Output Parameter:
5185 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5186 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5187 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5188 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5189 
5190     Level: developer
5191 
5192 */
5193 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5194 {
5195   VecScatter_MPI_General *gen_to,*gen_from;
5196   PetscErrorCode         ierr;
5197   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5198   Mat_SeqAIJ             *b_oth;
5199   VecScatter             ctx =a->Mvctx;
5200   MPI_Comm               comm;
5201   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5202   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5203   PetscScalar            *rvalues,*svalues;
5204   MatScalar              *b_otha,*bufa,*bufA;
5205   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5206   MPI_Request            *rwaits = NULL,*swaits = NULL;
5207   MPI_Status             *sstatus,rstatus;
5208   PetscMPIInt            jj;
5209   PetscInt               *cols,sbs,rbs;
5210   PetscScalar            *vals;
5211 
5212   PetscFunctionBegin;
5213   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5214   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5215     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5216   }
5217   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5218   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5219 
5220   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5221   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5222   rvalues  = gen_from->values; /* holds the length of receiving row */
5223   svalues  = gen_to->values;   /* holds the length of sending row */
5224   nrecvs   = gen_from->n;
5225   nsends   = gen_to->n;
5226 
5227   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5228   srow    = gen_to->indices;    /* local row index to be sent */
5229   sstarts = gen_to->starts;
5230   sprocs  = gen_to->procs;
5231   sstatus = gen_to->sstatus;
5232   sbs     = gen_to->bs;
5233   rstarts = gen_from->starts;
5234   rprocs  = gen_from->procs;
5235   rbs     = gen_from->bs;
5236 
5237   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5238   if (scall == MAT_INITIAL_MATRIX) {
5239     /* i-array */
5240     /*---------*/
5241     /*  post receives */
5242     for (i=0; i<nrecvs; i++) {
5243       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5244       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5245       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5246     }
5247 
5248     /* pack the outgoing message */
5249     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5250 
5251     sstartsj[0] = 0;
5252     rstartsj[0] = 0;
5253     len         = 0; /* total length of j or a array to be sent */
5254     k           = 0;
5255     for (i=0; i<nsends; i++) {
5256       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5257       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5258       for (j=0; j<nrows; j++) {
5259         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5260         for (l=0; l<sbs; l++) {
5261           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5262 
5263           rowlen[j*sbs+l] = ncols;
5264 
5265           len += ncols;
5266           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5267         }
5268         k++;
5269       }
5270       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5271 
5272       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5273     }
5274     /* recvs and sends of i-array are completed */
5275     i = nrecvs;
5276     while (i--) {
5277       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5278     }
5279     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5280 
5281     /* allocate buffers for sending j and a arrays */
5282     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5283     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5284 
5285     /* create i-array of B_oth */
5286     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5287 
5288     b_othi[0] = 0;
5289     len       = 0; /* total length of j or a array to be received */
5290     k         = 0;
5291     for (i=0; i<nrecvs; i++) {
5292       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5293       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5294       for (j=0; j<nrows; j++) {
5295         b_othi[k+1] = b_othi[k] + rowlen[j];
5296         len        += rowlen[j]; k++;
5297       }
5298       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5299     }
5300 
5301     /* allocate space for j and a arrrays of B_oth */
5302     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5303     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5304 
5305     /* j-array */
5306     /*---------*/
5307     /*  post receives of j-array */
5308     for (i=0; i<nrecvs; i++) {
5309       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5310       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5311     }
5312 
5313     /* pack the outgoing message j-array */
5314     k = 0;
5315     for (i=0; i<nsends; i++) {
5316       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5317       bufJ  = bufj+sstartsj[i];
5318       for (j=0; j<nrows; j++) {
5319         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5320         for (ll=0; ll<sbs; ll++) {
5321           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5322           for (l=0; l<ncols; l++) {
5323             *bufJ++ = cols[l];
5324           }
5325           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5326         }
5327       }
5328       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5329     }
5330 
5331     /* recvs and sends of j-array are completed */
5332     i = nrecvs;
5333     while (i--) {
5334       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5335     }
5336     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5337   } else if (scall == MAT_REUSE_MATRIX) {
5338     sstartsj = *startsj_s;
5339     rstartsj = *startsj_r;
5340     bufa     = *bufa_ptr;
5341     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5342     b_otha   = b_oth->a;
5343   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5344 
5345   /* a-array */
5346   /*---------*/
5347   /*  post receives of a-array */
5348   for (i=0; i<nrecvs; i++) {
5349     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5350     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5351   }
5352 
5353   /* pack the outgoing message a-array */
5354   k = 0;
5355   for (i=0; i<nsends; i++) {
5356     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5357     bufA  = bufa+sstartsj[i];
5358     for (j=0; j<nrows; j++) {
5359       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5360       for (ll=0; ll<sbs; ll++) {
5361         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5362         for (l=0; l<ncols; l++) {
5363           *bufA++ = vals[l];
5364         }
5365         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5366       }
5367     }
5368     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5369   }
5370   /* recvs and sends of a-array are completed */
5371   i = nrecvs;
5372   while (i--) {
5373     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5374   }
5375   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5376   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5377 
5378   if (scall == MAT_INITIAL_MATRIX) {
5379     /* put together the new matrix */
5380     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5381 
5382     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5383     /* Since these are PETSc arrays, change flags to free them as necessary. */
5384     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5385     b_oth->free_a  = PETSC_TRUE;
5386     b_oth->free_ij = PETSC_TRUE;
5387     b_oth->nonew   = 0;
5388 
5389     ierr = PetscFree(bufj);CHKERRQ(ierr);
5390     if (!startsj_s || !bufa_ptr) {
5391       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5392       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5393     } else {
5394       *startsj_s = sstartsj;
5395       *startsj_r = rstartsj;
5396       *bufa_ptr  = bufa;
5397     }
5398   }
5399   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5400   PetscFunctionReturn(0);
5401 }
5402 
5403 #undef __FUNCT__
5404 #define __FUNCT__ "MatGetCommunicationStructs"
5405 /*@C
5406   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5407 
5408   Not Collective
5409 
5410   Input Parameters:
5411 . A - The matrix in mpiaij format
5412 
5413   Output Parameter:
5414 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5415 . colmap - A map from global column index to local index into lvec
5416 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5417 
5418   Level: developer
5419 
5420 @*/
5421 #if defined(PETSC_USE_CTABLE)
5422 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5423 #else
5424 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5425 #endif
5426 {
5427   Mat_MPIAIJ *a;
5428 
5429   PetscFunctionBegin;
5430   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5431   PetscValidPointer(lvec, 2);
5432   PetscValidPointer(colmap, 3);
5433   PetscValidPointer(multScatter, 4);
5434   a = (Mat_MPIAIJ*) A->data;
5435   if (lvec) *lvec = a->lvec;
5436   if (colmap) *colmap = a->colmap;
5437   if (multScatter) *multScatter = a->Mvctx;
5438   PetscFunctionReturn(0);
5439 }
5440 
5441 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5442 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5443 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5444 
5445 #undef __FUNCT__
5446 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5447 /*
5448     Computes (B'*A')' since computing B*A directly is untenable
5449 
5450                n                       p                          p
5451         (              )       (              )         (                  )
5452       m (      A       )  *  n (       B      )   =   m (         C        )
5453         (              )       (              )         (                  )
5454 
5455 */
5456 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5457 {
5458   PetscErrorCode ierr;
5459   Mat            At,Bt,Ct;
5460 
5461   PetscFunctionBegin;
5462   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5463   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5464   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5465   ierr = MatDestroy(&At);CHKERRQ(ierr);
5466   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5467   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5468   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5469   PetscFunctionReturn(0);
5470 }
5471 
5472 #undef __FUNCT__
5473 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5474 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5475 {
5476   PetscErrorCode ierr;
5477   PetscInt       m=A->rmap->n,n=B->cmap->n;
5478   Mat            Cmat;
5479 
5480   PetscFunctionBegin;
5481   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5482   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5483   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5484   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5485   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5486   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5487   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5488   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5489 
5490   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5491 
5492   *C = Cmat;
5493   PetscFunctionReturn(0);
5494 }
5495 
5496 /* ----------------------------------------------------------------*/
5497 #undef __FUNCT__
5498 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5499 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5500 {
5501   PetscErrorCode ierr;
5502 
5503   PetscFunctionBegin;
5504   if (scall == MAT_INITIAL_MATRIX) {
5505     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5506     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5507     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5508   }
5509   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5510   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5511   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5512   PetscFunctionReturn(0);
5513 }
5514 
5515 #if defined(PETSC_HAVE_MUMPS)
5516 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5517 #endif
5518 #if defined(PETSC_HAVE_PASTIX)
5519 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5520 #endif
5521 #if defined(PETSC_HAVE_SUPERLU_DIST)
5522 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5523 #endif
5524 #if defined(PETSC_HAVE_CLIQUE)
5525 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5526 #endif
5527 
5528 /*MC
5529    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5530 
5531    Options Database Keys:
5532 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5533 
5534   Level: beginner
5535 
5536 .seealso: MatCreateAIJ()
5537 M*/
5538 
5539 #undef __FUNCT__
5540 #define __FUNCT__ "MatCreate_MPIAIJ"
5541 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5542 {
5543   Mat_MPIAIJ     *b;
5544   PetscErrorCode ierr;
5545   PetscMPIInt    size;
5546 
5547   PetscFunctionBegin;
5548   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5549 
5550   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5551   B->data       = (void*)b;
5552   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5553   B->assembled  = PETSC_FALSE;
5554   B->insertmode = NOT_SET_VALUES;
5555   b->size       = size;
5556 
5557   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5558 
5559   /* build cache for off array entries formed */
5560   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5561 
5562   b->donotstash  = PETSC_FALSE;
5563   b->colmap      = 0;
5564   b->garray      = 0;
5565   b->roworiented = PETSC_TRUE;
5566 
5567   /* stuff used for matrix vector multiply */
5568   b->lvec  = NULL;
5569   b->Mvctx = NULL;
5570 
5571   /* stuff for MatGetRow() */
5572   b->rowindices   = 0;
5573   b->rowvalues    = 0;
5574   b->getrowactive = PETSC_FALSE;
5575 
5576   /* flexible pointer used in CUSP/CUSPARSE classes */
5577   b->spptr = NULL;
5578 
5579 #if defined(PETSC_HAVE_MUMPS)
5580   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5581 #endif
5582 #if defined(PETSC_HAVE_PASTIX)
5583   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5584 #endif
5585 #if defined(PETSC_HAVE_SUPERLU_DIST)
5586   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5587 #endif
5588 #if defined(PETSC_HAVE_CLIQUE)
5589   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5590 #endif
5591   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5592   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5593   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5594   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5595   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5596   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5597   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5598   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5601   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5604   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5605   PetscFunctionReturn(0);
5606 }
5607 
5608 #undef __FUNCT__
5609 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5610 /*@
5611      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5612          and "off-diagonal" part of the matrix in CSR format.
5613 
5614    Collective on MPI_Comm
5615 
5616    Input Parameters:
5617 +  comm - MPI communicator
5618 .  m - number of local rows (Cannot be PETSC_DECIDE)
5619 .  n - This value should be the same as the local size used in creating the
5620        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5621        calculated if N is given) For square matrices n is almost always m.
5622 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5623 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5624 .   i - row indices for "diagonal" portion of matrix
5625 .   j - column indices
5626 .   a - matrix values
5627 .   oi - row indices for "off-diagonal" portion of matrix
5628 .   oj - column indices
5629 -   oa - matrix values
5630 
5631    Output Parameter:
5632 .   mat - the matrix
5633 
5634    Level: advanced
5635 
5636    Notes:
5637        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5638        must free the arrays once the matrix has been destroyed and not before.
5639 
5640        The i and j indices are 0 based
5641 
5642        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5643 
5644        This sets local rows and cannot be used to set off-processor values.
5645 
5646        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5647        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5648        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5649        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5650        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5651        communication if it is known that only local entries will be set.
5652 
5653 .keywords: matrix, aij, compressed row, sparse, parallel
5654 
5655 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5656           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5657 @*/
5658 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5659 {
5660   PetscErrorCode ierr;
5661   Mat_MPIAIJ     *maij;
5662 
5663   PetscFunctionBegin;
5664   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5665   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5666   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5667   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5668   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5669   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5670   maij = (Mat_MPIAIJ*) (*mat)->data;
5671 
5672   (*mat)->preallocated = PETSC_TRUE;
5673 
5674   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5675   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5676 
5677   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5678   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5679 
5680   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5681   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5682   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5683   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5684 
5685   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5686   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5687   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5688   PetscFunctionReturn(0);
5689 }
5690 
5691 /*
5692     Special version for direct calls from Fortran
5693 */
5694 #include <petsc-private/fortranimpl.h>
5695 
5696 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5697 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5698 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5699 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5700 #endif
5701 
5702 /* Change these macros so can be used in void function */
5703 #undef CHKERRQ
5704 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5705 #undef SETERRQ2
5706 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5707 #undef SETERRQ3
5708 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5709 #undef SETERRQ
5710 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5711 
5712 #undef __FUNCT__
5713 #define __FUNCT__ "matsetvaluesmpiaij_"
5714 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5715 {
5716   Mat            mat  = *mmat;
5717   PetscInt       m    = *mm, n = *mn;
5718   InsertMode     addv = *maddv;
5719   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5720   PetscScalar    value;
5721   PetscErrorCode ierr;
5722 
5723   MatCheckPreallocated(mat,1);
5724   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5725 
5726 #if defined(PETSC_USE_DEBUG)
5727   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5728 #endif
5729   {
5730     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5731     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5732     PetscBool roworiented = aij->roworiented;
5733 
5734     /* Some Variables required in the macro */
5735     Mat        A                 = aij->A;
5736     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5737     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5738     MatScalar  *aa               = a->a;
5739     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5740     Mat        B                 = aij->B;
5741     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5742     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5743     MatScalar  *ba               = b->a;
5744 
5745     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5746     PetscInt  nonew = a->nonew;
5747     MatScalar *ap1,*ap2;
5748 
5749     PetscFunctionBegin;
5750     for (i=0; i<m; i++) {
5751       if (im[i] < 0) continue;
5752 #if defined(PETSC_USE_DEBUG)
5753       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5754 #endif
5755       if (im[i] >= rstart && im[i] < rend) {
5756         row      = im[i] - rstart;
5757         lastcol1 = -1;
5758         rp1      = aj + ai[row];
5759         ap1      = aa + ai[row];
5760         rmax1    = aimax[row];
5761         nrow1    = ailen[row];
5762         low1     = 0;
5763         high1    = nrow1;
5764         lastcol2 = -1;
5765         rp2      = bj + bi[row];
5766         ap2      = ba + bi[row];
5767         rmax2    = bimax[row];
5768         nrow2    = bilen[row];
5769         low2     = 0;
5770         high2    = nrow2;
5771 
5772         for (j=0; j<n; j++) {
5773           if (roworiented) value = v[i*n+j];
5774           else value = v[i+j*m];
5775           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5776           if (in[j] >= cstart && in[j] < cend) {
5777             col = in[j] - cstart;
5778             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5779           } else if (in[j] < 0) continue;
5780 #if defined(PETSC_USE_DEBUG)
5781           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5782 #endif
5783           else {
5784             if (mat->was_assembled) {
5785               if (!aij->colmap) {
5786                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5787               }
5788 #if defined(PETSC_USE_CTABLE)
5789               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5790               col--;
5791 #else
5792               col = aij->colmap[in[j]] - 1;
5793 #endif
5794               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5795                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5796                 col  =  in[j];
5797                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5798                 B     = aij->B;
5799                 b     = (Mat_SeqAIJ*)B->data;
5800                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5801                 rp2   = bj + bi[row];
5802                 ap2   = ba + bi[row];
5803                 rmax2 = bimax[row];
5804                 nrow2 = bilen[row];
5805                 low2  = 0;
5806                 high2 = nrow2;
5807                 bm    = aij->B->rmap->n;
5808                 ba    = b->a;
5809               }
5810             } else col = in[j];
5811             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5812           }
5813         }
5814       } else if (!aij->donotstash) {
5815         if (roworiented) {
5816           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5817         } else {
5818           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5819         }
5820       }
5821     }
5822   }
5823   PetscFunctionReturnVoid();
5824 }
5825 
5826