xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision f38d543f014061d7d9693584ad5578cc37639c60)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc((M->rmap->n-cnt)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscMalloc(n*sizeof(PetscReal),&work);CHKERRQ(ierr);
138   ierr = PetscMemzero(work,n*sizeof(PetscReal));CHKERRQ(ierr);
139   if (type == NORM_2) {
140     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
141       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
142     }
143     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
144       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
145     }
146   } else if (type == NORM_1) {
147     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
148       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
149     }
150     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
151       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
152     }
153   } else if (type == NORM_INFINITY) {
154     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
155       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
156     }
157     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
158       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
159     }
160 
161   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
162   if (type == NORM_INFINITY) {
163     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr);
164   } else {
165     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr);
166   }
167   ierr = PetscFree(work);CHKERRQ(ierr);
168   if (type == NORM_2) {
169     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
170   }
171   PetscFunctionReturn(0);
172 }
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDistribute_MPIAIJ"
176 /*
177     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
178     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
179 
180     Only for square matrices
181 
182     Used by a preconditioner, hence PETSC_EXTERN
183 */
184 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
185 {
186   PetscMPIInt    rank,size;
187   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz,*gmataj,cnt,row,*ld,bses[2];
188   PetscErrorCode ierr;
189   Mat            mat;
190   Mat_SeqAIJ     *gmata;
191   PetscMPIInt    tag;
192   MPI_Status     status;
193   PetscBool      aij;
194   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
195 
196   PetscFunctionBegin;
197   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
199   if (!rank) {
200     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
201     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
202   }
203   if (reuse == MAT_INITIAL_MATRIX) {
204     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
205     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
206     if (!rank) {
207       bses[0] = gmat->rmap->bs;
208       bses[1] = gmat->cmap->bs;
209     }
210     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
211     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
212     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
213     ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
214     ierr = PetscMalloc2(m,PetscInt,&dlens,m,PetscInt,&olens);CHKERRQ(ierr);
215     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
216 
217     rowners[0] = 0;
218     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
219     rstart = rowners[rank];
220     rend   = rowners[rank+1];
221     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
222     if (!rank) {
223       gmata = (Mat_SeqAIJ*) gmat->data;
224       /* send row lengths to all processors */
225       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
226       for (i=1; i<size; i++) {
227         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
228       }
229       /* determine number diagonal and off-diagonal counts */
230       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
231       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
232       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
233       jj   = 0;
234       for (i=0; i<m; i++) {
235         for (j=0; j<dlens[i]; j++) {
236           if (gmata->j[jj] < rstart) ld[i]++;
237           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
238           jj++;
239         }
240       }
241       /* send column indices to other processes */
242       for (i=1; i<size; i++) {
243         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
244         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
245         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
246       }
247 
248       /* send numerical values to other processes */
249       for (i=1; i<size; i++) {
250         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
251         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
252       }
253       gmataa = gmata->a;
254       gmataj = gmata->j;
255 
256     } else {
257       /* receive row lengths */
258       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       /* receive column indices */
260       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       ierr = PetscMalloc2(nz,PetscScalar,&gmataa,nz,PetscInt,&gmataj);CHKERRQ(ierr);
262       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
263       /* determine number diagonal and off-diagonal counts */
264       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
265       ierr = PetscMalloc(m*sizeof(PetscInt),&ld);CHKERRQ(ierr);
266       ierr = PetscMemzero(ld,m*sizeof(PetscInt));CHKERRQ(ierr);
267       jj   = 0;
268       for (i=0; i<m; i++) {
269         for (j=0; j<dlens[i]; j++) {
270           if (gmataj[jj] < rstart) ld[i]++;
271           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
272           jj++;
273         }
274       }
275       /* receive numerical values */
276       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
277       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
278     }
279     /* set preallocation */
280     for (i=0; i<m; i++) {
281       dlens[i] -= olens[i];
282     }
283     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
284     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
285 
286     for (i=0; i<m; i++) {
287       dlens[i] += olens[i];
288     }
289     cnt = 0;
290     for (i=0; i<m; i++) {
291       row  = rstart + i;
292       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
293       cnt += dlens[i];
294     }
295     if (rank) {
296       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
297     }
298     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
299     ierr = PetscFree(rowners);CHKERRQ(ierr);
300 
301     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
302 
303     *inmat = mat;
304   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
305     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
306     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
307     mat  = *inmat;
308     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
309     if (!rank) {
310       /* send numerical values to other processes */
311       gmata  = (Mat_SeqAIJ*) gmat->data;
312       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
313       gmataa = gmata->a;
314       for (i=1; i<size; i++) {
315         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
316         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
317       }
318       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
319     } else {
320       /* receive numerical values from process 0*/
321       nz   = Ad->nz + Ao->nz;
322       ierr = PetscMalloc(nz*sizeof(PetscScalar),&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
323       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
324     }
325     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
326     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
327     ad = Ad->a;
328     ao = Ao->a;
329     if (mat->rmap->n) {
330       i  = 0;
331       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
332       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
333     }
334     for (i=1; i<mat->rmap->n; i++) {
335       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
336       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
337     }
338     i--;
339     if (mat->rmap->n) {
340       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
341     }
342     if (rank) {
343       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
344     }
345   }
346   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
347   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 /*
352   Local utility routine that creates a mapping from the global column
353 number to the local number in the off-diagonal part of the local
354 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
355 a slightly higher hash table cost; without it it is not scalable (each processor
356 has an order N integer array but is fast to acess.
357 */
358 #undef __FUNCT__
359 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
360 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
361 {
362   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       n = aij->B->cmap->n,i;
365 
366   PetscFunctionBegin;
367   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
368 #if defined(PETSC_USE_CTABLE)
369   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
370   for (i=0; i<n; i++) {
371     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
372   }
373 #else
374   ierr = PetscMalloc((mat->cmap->N+1)*sizeof(PetscInt),&aij->colmap);CHKERRQ(ierr);
375   ierr = PetscLogObjectMemory(mat,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
376   ierr = PetscMemzero(aij->colmap,mat->cmap->N*sizeof(PetscInt));CHKERRQ(ierr);
377   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
378 #endif
379   PetscFunctionReturn(0);
380 }
381 
382 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
383 { \
384     if (col <= lastcol1)  low1 = 0;     \
385     else                 high1 = nrow1; \
386     lastcol1 = col;\
387     while (high1-low1 > 5) { \
388       t = (low1+high1)/2; \
389       if (rp1[t] > col) high1 = t; \
390       else              low1  = t; \
391     } \
392       for (_i=low1; _i<high1; _i++) { \
393         if (rp1[_i] > col) break; \
394         if (rp1[_i] == col) { \
395           if (addv == ADD_VALUES) ap1[_i] += value;   \
396           else                    ap1[_i] = value; \
397           goto a_noinsert; \
398         } \
399       }  \
400       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
401       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
402       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
403       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
404       N = nrow1++ - 1; a->nz++; high1++; \
405       /* shift up all the later entries in this row */ \
406       for (ii=N; ii>=_i; ii--) { \
407         rp1[ii+1] = rp1[ii]; \
408         ap1[ii+1] = ap1[ii]; \
409       } \
410       rp1[_i] = col;  \
411       ap1[_i] = value;  \
412       a_noinsert: ; \
413       ailen[row] = nrow1; \
414 }
415 
416 
417 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
418   { \
419     if (col <= lastcol2) low2 = 0;                        \
420     else high2 = nrow2;                                   \
421     lastcol2 = col;                                       \
422     while (high2-low2 > 5) {                              \
423       t = (low2+high2)/2;                                 \
424       if (rp2[t] > col) high2 = t;                        \
425       else             low2  = t;                         \
426     }                                                     \
427     for (_i=low2; _i<high2; _i++) {                       \
428       if (rp2[_i] > col) break;                           \
429       if (rp2[_i] == col) {                               \
430         if (addv == ADD_VALUES) ap2[_i] += value;         \
431         else                    ap2[_i] = value;          \
432         goto b_noinsert;                                  \
433       }                                                   \
434     }                                                     \
435     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
436     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
437     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
438     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
439     N = nrow2++ - 1; b->nz++; high2++;                    \
440     /* shift up all the later entries in this row */      \
441     for (ii=N; ii>=_i; ii--) {                            \
442       rp2[ii+1] = rp2[ii];                                \
443       ap2[ii+1] = ap2[ii];                                \
444     }                                                     \
445     rp2[_i] = col;                                        \
446     ap2[_i] = value;                                      \
447     b_noinsert: ;                                         \
448     bilen[row] = nrow2;                                   \
449   }
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
453 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
454 {
455   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
456   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
457   PetscErrorCode ierr;
458   PetscInt       l,*garray = mat->garray,diag;
459 
460   PetscFunctionBegin;
461   /* code only works for square matrices A */
462 
463   /* find size of row to the left of the diagonal part */
464   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
465   row  = row - diag;
466   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
467     if (garray[b->j[b->i[row]+l]] > diag) break;
468   }
469   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
470 
471   /* diagonal part */
472   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
473 
474   /* right of diagonal part */
475   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
476   PetscFunctionReturn(0);
477 }
478 
479 #undef __FUNCT__
480 #define __FUNCT__ "MatSetValues_MPIAIJ"
481 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
482 {
483   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
484   PetscScalar    value;
485   PetscErrorCode ierr;
486   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
487   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
488   PetscBool      roworiented = aij->roworiented;
489 
490   /* Some Variables required in the macro */
491   Mat        A                 = aij->A;
492   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
493   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
494   MatScalar  *aa               = a->a;
495   PetscBool  ignorezeroentries = a->ignorezeroentries;
496   Mat        B                 = aij->B;
497   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
498   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
499   MatScalar  *ba               = b->a;
500 
501   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
502   PetscInt  nonew;
503   MatScalar *ap1,*ap2;
504 
505   PetscFunctionBegin;
506   if (v) PetscValidScalarPointer(v,6);
507   for (i=0; i<m; i++) {
508     if (im[i] < 0) continue;
509 #if defined(PETSC_USE_DEBUG)
510     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
511 #endif
512     if (im[i] >= rstart && im[i] < rend) {
513       row      = im[i] - rstart;
514       lastcol1 = -1;
515       rp1      = aj + ai[row];
516       ap1      = aa + ai[row];
517       rmax1    = aimax[row];
518       nrow1    = ailen[row];
519       low1     = 0;
520       high1    = nrow1;
521       lastcol2 = -1;
522       rp2      = bj + bi[row];
523       ap2      = ba + bi[row];
524       rmax2    = bimax[row];
525       nrow2    = bilen[row];
526       low2     = 0;
527       high2    = nrow2;
528 
529       for (j=0; j<n; j++) {
530         if (v) {
531           if (roworiented) value = v[i*n+j];
532           else             value = v[i+j*m];
533         } else value = 0.0;
534         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
535         if (in[j] >= cstart && in[j] < cend) {
536           col   = in[j] - cstart;
537           nonew = a->nonew;
538           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
539         } else if (in[j] < 0) continue;
540 #if defined(PETSC_USE_DEBUG)
541         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
542 #endif
543         else {
544           if (mat->was_assembled) {
545             if (!aij->colmap) {
546               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
547             }
548 #if defined(PETSC_USE_CTABLE)
549             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
550             col--;
551 #else
552             col = aij->colmap[in[j]] - 1;
553 #endif
554             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
555               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
556               col  =  in[j];
557               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
558               B     = aij->B;
559               b     = (Mat_SeqAIJ*)B->data;
560               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
561               rp2   = bj + bi[row];
562               ap2   = ba + bi[row];
563               rmax2 = bimax[row];
564               nrow2 = bilen[row];
565               low2  = 0;
566               high2 = nrow2;
567               bm    = aij->B->rmap->n;
568               ba    = b->a;
569             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
570           } else col = in[j];
571           nonew = b->nonew;
572           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
573         }
574       }
575     } else {
576       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
577       if (!aij->donotstash) {
578         mat->assembled = PETSC_FALSE;
579         if (roworiented) {
580           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
581         } else {
582           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
583         }
584       }
585     }
586   }
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatGetValues_MPIAIJ"
592 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
593 {
594   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
595   PetscErrorCode ierr;
596   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
597   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
598 
599   PetscFunctionBegin;
600   for (i=0; i<m; i++) {
601     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
602     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
603     if (idxm[i] >= rstart && idxm[i] < rend) {
604       row = idxm[i] - rstart;
605       for (j=0; j<n; j++) {
606         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
607         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
608         if (idxn[j] >= cstart && idxn[j] < cend) {
609           col  = idxn[j] - cstart;
610           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
611         } else {
612           if (!aij->colmap) {
613             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
614           }
615 #if defined(PETSC_USE_CTABLE)
616           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
617           col--;
618 #else
619           col = aij->colmap[idxn[j]] - 1;
620 #endif
621           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
622           else {
623             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
624           }
625         }
626       }
627     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
636 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
639   PetscErrorCode ierr;
640   PetscInt       nstash,reallocs;
641   InsertMode     addv;
642 
643   PetscFunctionBegin;
644   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
645 
646   /* make sure all processors are either in INSERTMODE or ADDMODE */
647   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
648   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
649   mat->insertmode = addv; /* in case this processor had no cache */
650 
651   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
652   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
653   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
654   PetscFunctionReturn(0);
655 }
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
659 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
660 {
661   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
662   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
663   PetscErrorCode ierr;
664   PetscMPIInt    n;
665   PetscInt       i,j,rstart,ncols,flg;
666   PetscInt       *row,*col;
667   PetscBool      other_disassembled;
668   PetscScalar    *val;
669   InsertMode     addv = mat->insertmode;
670 
671   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
672 
673   PetscFunctionBegin;
674   if (!aij->donotstash && !mat->nooffprocentries) {
675     while (1) {
676       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
677       if (!flg) break;
678 
679       for (i=0; i<n; ) {
680         /* Now identify the consecutive vals belonging to the same row */
681         for (j=i,rstart=row[j]; j<n; j++) {
682           if (row[j] != rstart) break;
683         }
684         if (j < n) ncols = j-i;
685         else       ncols = n-i;
686         /* Now assemble all these values with a single function call */
687         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
688 
689         i = j;
690       }
691     }
692     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
693   }
694   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
695   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
696 
697   /* determine if any processor has disassembled, if so we must
698      also disassemble ourselfs, in order that we may reassemble. */
699   /*
700      if nonzero structure of submatrix B cannot change then we know that
701      no processor disassembled thus we can skip this stuff
702   */
703   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
704     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
705     if (mat->was_assembled && !other_disassembled) {
706       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
707     }
708   }
709   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
710     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
711   }
712   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
713   ierr = MatSetOption(aij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
714   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
715   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
716 
717   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
718 
719   aij->rowvalues = 0;
720 
721   /* used by MatAXPY() */
722   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
723   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
724 
725   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
726   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
732 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
733 {
734   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
735   PetscErrorCode ierr;
736 
737   PetscFunctionBegin;
738   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
739   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatZeroRows_MPIAIJ"
745 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
746 {
747   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
748   PetscErrorCode    ierr;
749   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
750   PetscInt          i,*owners = A->rmap->range;
751   PetscInt          *nprocs,j,idx,nsends,row;
752   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
753   PetscInt          *rvalues,count,base,slen,*source;
754   PetscInt          *lens,*lrows,*values,rstart=A->rmap->rstart;
755   MPI_Comm          comm;
756   MPI_Request       *send_waits,*recv_waits;
757   MPI_Status        recv_status,*send_status;
758   const PetscScalar *xx;
759   PetscScalar       *bb;
760 #if defined(PETSC_DEBUG)
761   PetscBool found = PETSC_FALSE;
762 #endif
763 
764   PetscFunctionBegin;
765   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
766   /*  first count number of contributors to each processor */
767   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
768   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
769   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
770   j    = 0;
771   for (i=0; i<N; i++) {
772     if (lastidx > (idx = rows[i])) j = 0;
773     lastidx = idx;
774     for (; j<size; j++) {
775       if (idx >= owners[j] && idx < owners[j+1]) {
776         nprocs[2*j]++;
777         nprocs[2*j+1] = 1;
778         owner[i]      = j;
779 #if defined(PETSC_DEBUG)
780         found = PETSC_TRUE;
781 #endif
782         break;
783       }
784     }
785 #if defined(PETSC_DEBUG)
786     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
787     found = PETSC_FALSE;
788 #endif
789   }
790   nsends = 0;
791   for (i=0; i<size; i++) nsends += nprocs[2*i+1];
792 
793   if (A->nooffproczerorows) {
794     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
795     nrecvs = nsends;
796     nmax   = N;
797   } else {
798     /* inform other processors of number of messages and max length*/
799     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
800   }
801 
802   /* post receives:   */
803   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
804   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
805   for (i=0; i<nrecvs; i++) {
806     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
807   }
808 
809   /* do sends:
810       1) starts[i] gives the starting index in svalues for stuff going to
811          the ith processor
812   */
813   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
814   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
815   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
816 
817   starts[0] = 0;
818   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
819   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
820 
821   starts[0] = 0;
822   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
823   count = 0;
824   for (i=0; i<size; i++) {
825     if (nprocs[2*i+1]) {
826       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
827     }
828   }
829   ierr = PetscFree(starts);CHKERRQ(ierr);
830 
831   base = owners[rank];
832 
833   /*  wait on receives */
834   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
835   count = nrecvs; slen = 0;
836   while (count) {
837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
838     /* unpack receives into our local space */
839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
840 
841     source[imdex] = recv_status.MPI_SOURCE;
842     lens[imdex]   = n;
843     slen         += n;
844     count--;
845   }
846   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
847 
848   /* move the data into the send scatter */
849   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
850   count = 0;
851   for (i=0; i<nrecvs; i++) {
852     values = rvalues + i*nmax;
853     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
854   }
855   ierr = PetscFree(rvalues);CHKERRQ(ierr);
856   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
857   ierr = PetscFree(owner);CHKERRQ(ierr);
858   ierr = PetscFree(nprocs);CHKERRQ(ierr);
859 
860   /* fix right hand side if needed */
861   if (x && b) {
862     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
863     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
864     for (i=0; i<slen; i++) bb[lrows[i]] = diag*xx[lrows[i]];
865     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
866     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
867   }
868   /*
869         Zero the required rows. If the "diagonal block" of the matrix
870      is square and the user wishes to set the diagonal we use separate
871      code so that MatSetValues() is not called for each diagonal allocating
872      new memory, thus calling lots of mallocs and slowing things down.
873 
874   */
875   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
876   ierr = MatZeroRows(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
877   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
878     ierr = MatZeroRows(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
879   } else if (diag != 0.0) {
880     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
881     if (((Mat_SeqAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
882     for (i = 0; i < slen; i++) {
883       row  = lrows[i] + rstart;
884       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
885     }
886     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
887     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   } else {
889     ierr = MatZeroRows(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
890   }
891   ierr = PetscFree(lrows);CHKERRQ(ierr);
892 
893   /* wait on sends */
894   if (nsends) {
895     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
896     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
897     ierr = PetscFree(send_status);CHKERRQ(ierr);
898   }
899   ierr = PetscFree(send_waits);CHKERRQ(ierr);
900   ierr = PetscFree(svalues);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
906 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
907 {
908   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
909   PetscErrorCode    ierr;
910   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
911   PetscInt          i,*owners = A->rmap->range;
912   PetscInt          *nprocs,j,idx,nsends;
913   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
914   PetscInt          *rvalues,count,base,slen,*source;
915   PetscInt          *lens,*lrows,*values,m;
916   MPI_Comm          comm;
917   MPI_Request       *send_waits,*recv_waits;
918   MPI_Status        recv_status,*send_status;
919   const PetscScalar *xx;
920   PetscScalar       *bb,*mask;
921   Vec               xmask,lmask;
922   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
923   const PetscInt    *aj, *ii,*ridx;
924   PetscScalar       *aa;
925 #if defined(PETSC_DEBUG)
926   PetscBool found = PETSC_FALSE;
927 #endif
928 
929   PetscFunctionBegin;
930   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
931   /*  first count number of contributors to each processor */
932   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
933   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
934   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
935   j    = 0;
936   for (i=0; i<N; i++) {
937     if (lastidx > (idx = rows[i])) j = 0;
938     lastidx = idx;
939     for (; j<size; j++) {
940       if (idx >= owners[j] && idx < owners[j+1]) {
941         nprocs[2*j]++;
942         nprocs[2*j+1] = 1;
943         owner[i]      = j;
944 #if defined(PETSC_DEBUG)
945         found = PETSC_TRUE;
946 #endif
947         break;
948       }
949     }
950 #if defined(PETSC_DEBUG)
951     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
952     found = PETSC_FALSE;
953 #endif
954   }
955   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
956 
957   /* inform other processors of number of messages and max length*/
958   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
959 
960   /* post receives:   */
961   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
962   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
963   for (i=0; i<nrecvs; i++) {
964     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
965   }
966 
967   /* do sends:
968       1) starts[i] gives the starting index in svalues for stuff going to
969          the ith processor
970   */
971   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
972   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
973   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
974 
975   starts[0] = 0;
976   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
977   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
978 
979   starts[0] = 0;
980   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
981   count = 0;
982   for (i=0; i<size; i++) {
983     if (nprocs[2*i+1]) {
984       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
985     }
986   }
987   ierr = PetscFree(starts);CHKERRQ(ierr);
988 
989   base = owners[rank];
990 
991   /*  wait on receives */
992   ierr  = PetscMalloc2(nrecvs,PetscInt,&lens,nrecvs,PetscInt,&source);CHKERRQ(ierr);
993   count = nrecvs; slen = 0;
994   while (count) {
995     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
996     /* unpack receives into our local space */
997     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
998 
999     source[imdex] = recv_status.MPI_SOURCE;
1000     lens[imdex]   = n;
1001     slen         += n;
1002     count--;
1003   }
1004   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1005 
1006   /* move the data into the send scatter */
1007   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1008   count = 0;
1009   for (i=0; i<nrecvs; i++) {
1010     values = rvalues + i*nmax;
1011     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
1012   }
1013   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1014   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1015   ierr = PetscFree(owner);CHKERRQ(ierr);
1016   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1017   /* lrows are the local rows to be zeroed, slen is the number of local rows */
1018 
1019   /* zero diagonal part of matrix */
1020   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
1021 
1022   /* handle off diagonal part of matrix */
1023   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1024   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1025   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1026   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
1027   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1028   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1029   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1030   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1031   if (x) {
1032     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1033     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1034     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1035     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1036   }
1037   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1038 
1039   /* remove zeroed rows of off diagonal matrix */
1040   ii = aij->i;
1041   for (i=0; i<slen; i++) {
1042     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
1043   }
1044 
1045   /* loop over all elements of off process part of matrix zeroing removed columns*/
1046   if (aij->compressedrow.use) {
1047     m    = aij->compressedrow.nrows;
1048     ii   = aij->compressedrow.i;
1049     ridx = aij->compressedrow.rindex;
1050     for (i=0; i<m; i++) {
1051       n  = ii[i+1] - ii[i];
1052       aj = aij->j + ii[i];
1053       aa = aij->a + ii[i];
1054 
1055       for (j=0; j<n; j++) {
1056         if (PetscAbsScalar(mask[*aj])) {
1057           if (b) bb[*ridx] -= *aa*xx[*aj];
1058           *aa = 0.0;
1059         }
1060         aa++;
1061         aj++;
1062       }
1063       ridx++;
1064     }
1065   } else { /* do not use compressed row format */
1066     m = l->B->rmap->n;
1067     for (i=0; i<m; i++) {
1068       n  = ii[i+1] - ii[i];
1069       aj = aij->j + ii[i];
1070       aa = aij->a + ii[i];
1071       for (j=0; j<n; j++) {
1072         if (PetscAbsScalar(mask[*aj])) {
1073           if (b) bb[i] -= *aa*xx[*aj];
1074           *aa = 0.0;
1075         }
1076         aa++;
1077         aj++;
1078       }
1079     }
1080   }
1081   if (x) {
1082     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1083     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1084   }
1085   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1086   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1087   ierr = PetscFree(lrows);CHKERRQ(ierr);
1088 
1089   /* wait on sends */
1090   if (nsends) {
1091     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1092     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1093     ierr = PetscFree(send_status);CHKERRQ(ierr);
1094   }
1095   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1096   ierr = PetscFree(svalues);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMult_MPIAIJ"
1102 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1105   PetscErrorCode ierr;
1106   PetscInt       nt;
1107 
1108   PetscFunctionBegin;
1109   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1110   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1111   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1113   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1114   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1121 {
1122   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1132 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1139   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1140   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1141   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1147 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1148 {
1149   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1150   PetscErrorCode ierr;
1151   PetscBool      merged;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1155   /* do nondiagonal part */
1156   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1157   if (!merged) {
1158     /* send it on its way */
1159     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1160     /* do local part */
1161     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1162     /* receive remote parts: note this assumes the values are not actually */
1163     /* added in yy until the next line, */
1164     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1165   } else {
1166     /* do local part */
1167     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1168     /* send it on its way */
1169     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1170     /* values actually were received in the Begin() but we need to call this nop */
1171     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1178 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1179 {
1180   MPI_Comm       comm;
1181   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1182   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1183   IS             Me,Notme;
1184   PetscErrorCode ierr;
1185   PetscInt       M,N,first,last,*notme,i;
1186   PetscMPIInt    size;
1187 
1188   PetscFunctionBegin;
1189   /* Easy test: symmetric diagonal block */
1190   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1191   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1192   if (!*f) PetscFunctionReturn(0);
1193   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1195   if (size == 1) PetscFunctionReturn(0);
1196 
1197   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1198   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1199   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1200   ierr = PetscMalloc((N-last+first)*sizeof(PetscInt),&notme);CHKERRQ(ierr);
1201   for (i=0; i<first; i++) notme[i] = i;
1202   for (i=last; i<M; i++) notme[i-last+first] = i;
1203   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1204   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1205   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1206   Aoff = Aoffs[0];
1207   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1208   Boff = Boffs[0];
1209   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1210   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1211   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1212   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1213   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1214   ierr = PetscFree(notme);CHKERRQ(ierr);
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1220 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1221 {
1222   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   /* do nondiagonal part */
1227   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1228   /* send it on its way */
1229   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1230   /* do local part */
1231   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1232   /* receive remote parts */
1233   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 /*
1238   This only works correctly for square matrices where the subblock A->A is the
1239    diagonal block
1240 */
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1243 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1244 {
1245   PetscErrorCode ierr;
1246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1247 
1248   PetscFunctionBegin;
1249   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1250   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1251   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatScale_MPIAIJ"
1257 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1258 {
1259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1260   PetscErrorCode ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1264   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatDestroy_MPIAIJ"
1270 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1271 {
1272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276 #if defined(PETSC_USE_LOG)
1277   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1278 #endif
1279   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1280   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1281   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1282   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1283 #if defined(PETSC_USE_CTABLE)
1284   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1285 #else
1286   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1287 #endif
1288   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1289   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1290   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1291   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1292   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1310 {
1311   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1312   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1313   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1314   PetscErrorCode ierr;
1315   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1316   int            fd;
1317   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1318   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz;
1319   PetscScalar    *column_values;
1320   PetscInt       message_count,flowcontrolcount;
1321   FILE           *file;
1322 
1323   PetscFunctionBegin;
1324   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1325   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1326   nz   = A->nz + B->nz;
1327   if (!rank) {
1328     header[0] = MAT_FILE_CLASSID;
1329     header[1] = mat->rmap->N;
1330     header[2] = mat->cmap->N;
1331 
1332     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1333     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1334     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1335     /* get largest number of rows any processor has */
1336     rlen  = mat->rmap->n;
1337     range = mat->rmap->range;
1338     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1339   } else {
1340     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1341     rlen = mat->rmap->n;
1342   }
1343 
1344   /* load up the local row counts */
1345   ierr = PetscMalloc((rlen+1)*sizeof(PetscInt),&row_lengths);CHKERRQ(ierr);
1346   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1347 
1348   /* store the row lengths to the file */
1349   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1350   if (!rank) {
1351     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1352     for (i=1; i<size; i++) {
1353       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1354       rlen = range[i+1] - range[i];
1355       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1356       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1357     }
1358     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1359   } else {
1360     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1361     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1362     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1363   }
1364   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1365 
1366   /* load up the local column indices */
1367   nzmax = nz; /* th processor needs space a largest processor needs */
1368   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1369   ierr  = PetscMalloc((nzmax+1)*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1370   cnt   = 0;
1371   for (i=0; i<mat->rmap->n; i++) {
1372     for (j=B->i[i]; j<B->i[i+1]; j++) {
1373       if ((col = garray[B->j[j]]) > cstart) break;
1374       column_indices[cnt++] = col;
1375     }
1376     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1377     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1378   }
1379   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1380 
1381   /* store the column indices to the file */
1382   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1383   if (!rank) {
1384     MPI_Status status;
1385     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1386     for (i=1; i<size; i++) {
1387       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1388       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1389       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1390       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1391       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1392     }
1393     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1394   } else {
1395     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1396     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1397     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1398     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1399   }
1400   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1401 
1402   /* load up the local column values */
1403   ierr = PetscMalloc((nzmax+1)*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1404   cnt  = 0;
1405   for (i=0; i<mat->rmap->n; i++) {
1406     for (j=B->i[i]; j<B->i[i+1]; j++) {
1407       if (garray[B->j[j]] > cstart) break;
1408       column_values[cnt++] = B->a[j];
1409     }
1410     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1411     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1412   }
1413   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1414 
1415   /* store the column values to the file */
1416   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1417   if (!rank) {
1418     MPI_Status status;
1419     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1420     for (i=1; i<size; i++) {
1421       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1422       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1423       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1424       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1425       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1426     }
1427     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1428   } else {
1429     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1430     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1431     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1432     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1433   }
1434   ierr = PetscFree(column_values);CHKERRQ(ierr);
1435 
1436   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1437   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #include <petscdraw.h>
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1444 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1445 {
1446   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1447   PetscErrorCode    ierr;
1448   PetscMPIInt       rank = aij->rank,size = aij->size;
1449   PetscBool         isdraw,iascii,isbinary;
1450   PetscViewer       sviewer;
1451   PetscViewerFormat format;
1452 
1453   PetscFunctionBegin;
1454   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1455   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1456   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1457   if (iascii) {
1458     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1459     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1460       MatInfo   info;
1461       PetscBool inodes;
1462 
1463       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1464       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1465       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1466       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1467       if (!inodes) {
1468         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1469                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1470       } else {
1471         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1472                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1473       }
1474       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1475       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1476       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1477       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1478       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1479       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1480       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1481       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1482       PetscFunctionReturn(0);
1483     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1484       PetscInt inodecount,inodelimit,*inodes;
1485       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1486       if (inodes) {
1487         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1488       } else {
1489         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1490       }
1491       PetscFunctionReturn(0);
1492     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494     }
1495   } else if (isbinary) {
1496     if (size == 1) {
1497       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1498       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1499     } else {
1500       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1501     }
1502     PetscFunctionReturn(0);
1503   } else if (isdraw) {
1504     PetscDraw draw;
1505     PetscBool isnull;
1506     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1507     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1508   }
1509 
1510   if (size == 1) {
1511     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1512     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1513   } else {
1514     /* assemble the entire matrix onto first processor. */
1515     Mat        A;
1516     Mat_SeqAIJ *Aloc;
1517     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1518     MatScalar  *a;
1519 
1520     if (mat->rmap->N > 1024) {
1521       PetscBool flg = PETSC_FALSE;
1522 
1523       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1524       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1525     }
1526 
1527     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1528     if (!rank) {
1529       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1530     } else {
1531       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1532     }
1533     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1534     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1535     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1536     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1537     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1538 
1539     /* copy over the A part */
1540     Aloc = (Mat_SeqAIJ*)aij->A->data;
1541     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1542     row  = mat->rmap->rstart;
1543     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1544     for (i=0; i<m; i++) {
1545       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1546       row++;
1547       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1548     }
1549     aj = Aloc->j;
1550     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1551 
1552     /* copy over the B part */
1553     Aloc = (Mat_SeqAIJ*)aij->B->data;
1554     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1555     row  = mat->rmap->rstart;
1556     ierr = PetscMalloc((ai[m]+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
1557     ct   = cols;
1558     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1559     for (i=0; i<m; i++) {
1560       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1561       row++;
1562       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1563     }
1564     ierr = PetscFree(ct);CHKERRQ(ierr);
1565     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1566     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1567     /*
1568        Everyone has to call to draw the matrix since the graphics waits are
1569        synchronized across all processors that share the PetscDraw object
1570     */
1571     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1572     if (!rank) {
1573       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1574       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1575       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1576       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1577     }
1578     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1579     ierr = MatDestroy(&A);CHKERRQ(ierr);
1580   }
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatView_MPIAIJ"
1586 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1587 {
1588   PetscErrorCode ierr;
1589   PetscBool      iascii,isdraw,issocket,isbinary;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1593   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1594   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1595   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1596   if (iascii || isdraw || isbinary || issocket) {
1597     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1598   }
1599   PetscFunctionReturn(0);
1600 }
1601 
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatSOR_MPIAIJ"
1604 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1605 {
1606   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1607   PetscErrorCode ierr;
1608   Vec            bb1 = 0;
1609   PetscBool      hasop;
1610 
1611   PetscFunctionBegin;
1612   if (flag == SOR_APPLY_UPPER) {
1613     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1614     PetscFunctionReturn(0);
1615   }
1616 
1617   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1618     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1619   }
1620 
1621   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1622     if (flag & SOR_ZERO_INITIAL_GUESS) {
1623       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1624       its--;
1625     }
1626 
1627     while (its--) {
1628       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1629       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1630 
1631       /* update rhs: bb1 = bb - B*x */
1632       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1633       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1634 
1635       /* local sweep */
1636       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1637     }
1638   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1639     if (flag & SOR_ZERO_INITIAL_GUESS) {
1640       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1641       its--;
1642     }
1643     while (its--) {
1644       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1645       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1646 
1647       /* update rhs: bb1 = bb - B*x */
1648       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1649       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1650 
1651       /* local sweep */
1652       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1653     }
1654   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1655     if (flag & SOR_ZERO_INITIAL_GUESS) {
1656       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1657       its--;
1658     }
1659     while (its--) {
1660       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1661       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1662 
1663       /* update rhs: bb1 = bb - B*x */
1664       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1665       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1666 
1667       /* local sweep */
1668       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1669     }
1670   } else if (flag & SOR_EISENSTAT) {
1671     Vec xx1;
1672 
1673     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1675 
1676     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1677     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1678     if (!mat->diag) {
1679       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1680       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1681     }
1682     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1683     if (hasop) {
1684       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1685     } else {
1686       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1687     }
1688     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1689 
1690     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1691 
1692     /* local sweep */
1693     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1694     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1695     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1696   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1697 
1698   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatPermute_MPIAIJ"
1704 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1705 {
1706   Mat            aA,aB,Aperm;
1707   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1708   PetscScalar    *aa,*ba;
1709   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1710   PetscSF        rowsf,sf;
1711   IS             parcolp = NULL;
1712   PetscBool      done;
1713   PetscErrorCode ierr;
1714 
1715   PetscFunctionBegin;
1716   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1717   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1718   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1719   ierr = PetscMalloc3(PetscMax(m,n),PetscInt,&work,m,PetscInt,&rdest,n,PetscInt,&cdest);CHKERRQ(ierr);
1720 
1721   /* Invert row permutation to find out where my rows should go */
1722   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1723   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1724   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1725   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1726   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1727   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1728 
1729   /* Invert column permutation to find out where my columns should go */
1730   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1731   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1732   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1733   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1734   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1735   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1736   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1737 
1738   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1739   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1740   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1741 
1742   /* Find out where my gcols should go */
1743   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1744   ierr = PetscMalloc(ng*sizeof(PetscInt),&gcdest);CHKERRQ(ierr);
1745   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1746   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1747   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1748   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1749   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1750   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1751 
1752   ierr = PetscMalloc4(m,PetscInt,&dnnz,m,PetscInt,&onnz,m,PetscInt,&tdnnz,m,PetscInt,&tonnz);CHKERRQ(ierr);
1753   ierr = PetscMemzero(dnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1754   ierr = PetscMemzero(onnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1755   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1756   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1757   for (i=0; i<m; i++) {
1758     PetscInt row = rdest[i],rowner;
1759     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1760     for (j=ai[i]; j<ai[i+1]; j++) {
1761       PetscInt cowner,col = cdest[aj[j]];
1762       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1763       if (rowner == cowner) dnnz[i]++;
1764       else onnz[i]++;
1765     }
1766     for (j=bi[i]; j<bi[i+1]; j++) {
1767       PetscInt cowner,col = gcdest[bj[j]];
1768       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1769       if (rowner == cowner) dnnz[i]++;
1770       else onnz[i]++;
1771     }
1772   }
1773   ierr = PetscMemzero(tdnnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1774   ierr = PetscMemzero(tonnz,m*sizeof(PetscInt));CHKERRQ(ierr);
1775   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1776   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1777   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1778   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1779   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1780 
1781   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1782   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1783   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1784   for (i=0; i<m; i++) {
1785     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1786     PetscInt rowlen;
1787     rowlen = ai[i+1] - ai[i];
1788     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1789     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1790     rowlen = bi[i+1] - bi[i];
1791     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1792     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1793   }
1794   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1797   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1798   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1799   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1800   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1801   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1802   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1803   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1804   *B = Aperm;
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1810 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1811 {
1812   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1813   Mat            A    = mat->A,B = mat->B;
1814   PetscErrorCode ierr;
1815   PetscReal      isend[5],irecv[5];
1816 
1817   PetscFunctionBegin;
1818   info->block_size = 1.0;
1819   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1820 
1821   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1822   isend[3] = info->memory;  isend[4] = info->mallocs;
1823 
1824   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1825 
1826   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1827   isend[3] += info->memory;  isend[4] += info->mallocs;
1828   if (flag == MAT_LOCAL) {
1829     info->nz_used      = isend[0];
1830     info->nz_allocated = isend[1];
1831     info->nz_unneeded  = isend[2];
1832     info->memory       = isend[3];
1833     info->mallocs      = isend[4];
1834   } else if (flag == MAT_GLOBAL_MAX) {
1835     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1836 
1837     info->nz_used      = irecv[0];
1838     info->nz_allocated = irecv[1];
1839     info->nz_unneeded  = irecv[2];
1840     info->memory       = irecv[3];
1841     info->mallocs      = irecv[4];
1842   } else if (flag == MAT_GLOBAL_SUM) {
1843     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1844 
1845     info->nz_used      = irecv[0];
1846     info->nz_allocated = irecv[1];
1847     info->nz_unneeded  = irecv[2];
1848     info->memory       = irecv[3];
1849     info->mallocs      = irecv[4];
1850   }
1851   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1852   info->fill_ratio_needed = 0;
1853   info->factor_mallocs    = 0;
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetOption_MPIAIJ"
1859 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1860 {
1861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   switch (op) {
1866   case MAT_NEW_NONZERO_LOCATIONS:
1867   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1868   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1869   case MAT_KEEP_NONZERO_PATTERN:
1870   case MAT_NEW_NONZERO_LOCATION_ERR:
1871   case MAT_USE_INODES:
1872   case MAT_IGNORE_ZERO_ENTRIES:
1873     MatCheckPreallocated(A,1);
1874     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1875     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1876     break;
1877   case MAT_ROW_ORIENTED:
1878     a->roworiented = flg;
1879 
1880     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1881     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1882     break;
1883   case MAT_NEW_DIAGONALS:
1884     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1885     break;
1886   case MAT_IGNORE_OFF_PROC_ENTRIES:
1887     a->donotstash = flg;
1888     break;
1889   case MAT_SPD:
1890     A->spd_set = PETSC_TRUE;
1891     A->spd     = flg;
1892     if (flg) {
1893       A->symmetric                  = PETSC_TRUE;
1894       A->structurally_symmetric     = PETSC_TRUE;
1895       A->symmetric_set              = PETSC_TRUE;
1896       A->structurally_symmetric_set = PETSC_TRUE;
1897     }
1898     break;
1899   case MAT_SYMMETRIC:
1900     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1901     break;
1902   case MAT_STRUCTURALLY_SYMMETRIC:
1903     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1904     break;
1905   case MAT_HERMITIAN:
1906     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1907     break;
1908   case MAT_SYMMETRY_ETERNAL:
1909     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1910     break;
1911   default:
1912     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1913   }
1914   PetscFunctionReturn(0);
1915 }
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatGetRow_MPIAIJ"
1919 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1920 {
1921   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1922   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1923   PetscErrorCode ierr;
1924   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1925   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1926   PetscInt       *cmap,*idx_p;
1927 
1928   PetscFunctionBegin;
1929   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1930   mat->getrowactive = PETSC_TRUE;
1931 
1932   if (!mat->rowvalues && (idx || v)) {
1933     /*
1934         allocate enough space to hold information from the longest row.
1935     */
1936     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1937     PetscInt   max = 1,tmp;
1938     for (i=0; i<matin->rmap->n; i++) {
1939       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1940       if (max < tmp) max = tmp;
1941     }
1942     ierr = PetscMalloc2(max,PetscScalar,&mat->rowvalues,max,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1943   }
1944 
1945   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1946   lrow = row - rstart;
1947 
1948   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1949   if (!v)   {pvA = 0; pvB = 0;}
1950   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1951   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1952   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1953   nztot = nzA + nzB;
1954 
1955   cmap = mat->garray;
1956   if (v  || idx) {
1957     if (nztot) {
1958       /* Sort by increasing column numbers, assuming A and B already sorted */
1959       PetscInt imark = -1;
1960       if (v) {
1961         *v = v_p = mat->rowvalues;
1962         for (i=0; i<nzB; i++) {
1963           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1964           else break;
1965         }
1966         imark = i;
1967         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1968         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1969       }
1970       if (idx) {
1971         *idx = idx_p = mat->rowindices;
1972         if (imark > -1) {
1973           for (i=0; i<imark; i++) {
1974             idx_p[i] = cmap[cworkB[i]];
1975           }
1976         } else {
1977           for (i=0; i<nzB; i++) {
1978             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1979             else break;
1980           }
1981           imark = i;
1982         }
1983         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1984         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1985       }
1986     } else {
1987       if (idx) *idx = 0;
1988       if (v)   *v   = 0;
1989     }
1990   }
1991   *nz  = nztot;
1992   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1993   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1999 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
2000 {
2001   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
2002 
2003   PetscFunctionBegin;
2004   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
2005   aij->getrowactive = PETSC_FALSE;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatNorm_MPIAIJ"
2011 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
2012 {
2013   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
2014   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
2015   PetscErrorCode ierr;
2016   PetscInt       i,j,cstart = mat->cmap->rstart;
2017   PetscReal      sum = 0.0;
2018   MatScalar      *v;
2019 
2020   PetscFunctionBegin;
2021   if (aij->size == 1) {
2022     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
2023   } else {
2024     if (type == NORM_FROBENIUS) {
2025       v = amat->a;
2026       for (i=0; i<amat->nz; i++) {
2027         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2028       }
2029       v = bmat->a;
2030       for (i=0; i<bmat->nz; i++) {
2031         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
2032       }
2033       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2034       *norm = PetscSqrtReal(*norm);
2035     } else if (type == NORM_1) { /* max column norm */
2036       PetscReal *tmp,*tmp2;
2037       PetscInt  *jj,*garray = aij->garray;
2038       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
2039       ierr  = PetscMalloc((mat->cmap->N+1)*sizeof(PetscReal),&tmp2);CHKERRQ(ierr);
2040       ierr  = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
2041       *norm = 0.0;
2042       v     = amat->a; jj = amat->j;
2043       for (j=0; j<amat->nz; j++) {
2044         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
2045       }
2046       v = bmat->a; jj = bmat->j;
2047       for (j=0; j<bmat->nz; j++) {
2048         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
2049       }
2050       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2051       for (j=0; j<mat->cmap->N; j++) {
2052         if (tmp2[j] > *norm) *norm = tmp2[j];
2053       }
2054       ierr = PetscFree(tmp);CHKERRQ(ierr);
2055       ierr = PetscFree(tmp2);CHKERRQ(ierr);
2056     } else if (type == NORM_INFINITY) { /* max row norm */
2057       PetscReal ntemp = 0.0;
2058       for (j=0; j<aij->A->rmap->n; j++) {
2059         v   = amat->a + amat->i[j];
2060         sum = 0.0;
2061         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
2062           sum += PetscAbsScalar(*v); v++;
2063         }
2064         v = bmat->a + bmat->i[j];
2065         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
2066           sum += PetscAbsScalar(*v); v++;
2067         }
2068         if (sum > ntemp) ntemp = sum;
2069       }
2070       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2071     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
2072   }
2073   PetscFunctionReturn(0);
2074 }
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatTranspose_MPIAIJ"
2078 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
2079 {
2080   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
2081   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
2082   PetscErrorCode ierr;
2083   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
2084   PetscInt       cstart = A->cmap->rstart,ncol;
2085   Mat            B;
2086   MatScalar      *array;
2087 
2088   PetscFunctionBegin;
2089   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
2090 
2091   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
2092   ai = Aloc->i; aj = Aloc->j;
2093   bi = Bloc->i; bj = Bloc->j;
2094   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
2095     PetscInt             *d_nnz,*g_nnz,*o_nnz;
2096     PetscSFNode          *oloc;
2097     PETSC_UNUSED PetscSF sf;
2098 
2099     ierr = PetscMalloc4(na,PetscInt,&d_nnz,na,PetscInt,&o_nnz,nb,PetscInt,&g_nnz,nb,PetscSFNode,&oloc);CHKERRQ(ierr);
2100     /* compute d_nnz for preallocation */
2101     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2102     for (i=0; i<ai[ma]; i++) {
2103       d_nnz[aj[i]]++;
2104       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2105     }
2106     /* compute local off-diagonal contributions */
2107     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2108     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2109     /* map those to global */
2110     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2111     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2112     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2113     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2114     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2115     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2116     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2117 
2118     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2119     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2120     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2121     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2122     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2124   } else {
2125     B    = *matout;
2126     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2127     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2128   }
2129 
2130   /* copy over the A part */
2131   array = Aloc->a;
2132   row   = A->rmap->rstart;
2133   for (i=0; i<ma; i++) {
2134     ncol = ai[i+1]-ai[i];
2135     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2136     row++;
2137     array += ncol; aj += ncol;
2138   }
2139   aj = Aloc->j;
2140   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2141 
2142   /* copy over the B part */
2143   ierr  = PetscMalloc(bi[mb]*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144   ierr  = PetscMemzero(cols,bi[mb]*sizeof(PetscInt));CHKERRQ(ierr);
2145   array = Bloc->a;
2146   row   = A->rmap->rstart;
2147   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2148   cols_tmp = cols;
2149   for (i=0; i<mb; i++) {
2150     ncol = bi[i+1]-bi[i];
2151     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2152     row++;
2153     array += ncol; cols_tmp += ncol;
2154   }
2155   ierr = PetscFree(cols);CHKERRQ(ierr);
2156 
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2160     *matout = B;
2161   } else {
2162     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2169 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2170 {
2171   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2172   Mat            a    = aij->A,b = aij->B;
2173   PetscErrorCode ierr;
2174   PetscInt       s1,s2,s3;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2178   if (rr) {
2179     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2180     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2181     /* Overlap communication with computation. */
2182     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2183   }
2184   if (ll) {
2185     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2186     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2187     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2188   }
2189   /* scale  the diagonal block */
2190   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2191 
2192   if (rr) {
2193     /* Do a scatter end and then right scale the off-diagonal block */
2194     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2195     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2202 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2203 {
2204   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2205   PetscErrorCode ierr;
2206 
2207   PetscFunctionBegin;
2208   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2209   PetscFunctionReturn(0);
2210 }
2211 
2212 #undef __FUNCT__
2213 #define __FUNCT__ "MatEqual_MPIAIJ"
2214 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2215 {
2216   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2217   Mat            a,b,c,d;
2218   PetscBool      flg;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   a = matA->A; b = matA->B;
2223   c = matB->A; d = matB->B;
2224 
2225   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2226   if (flg) {
2227     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2228   }
2229   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2230   PetscFunctionReturn(0);
2231 }
2232 
2233 #undef __FUNCT__
2234 #define __FUNCT__ "MatCopy_MPIAIJ"
2235 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2236 {
2237   PetscErrorCode ierr;
2238   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2239   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2240 
2241   PetscFunctionBegin;
2242   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2243   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2244     /* because of the column compression in the off-processor part of the matrix a->B,
2245        the number of columns in a->B and b->B may be different, hence we cannot call
2246        the MatCopy() directly on the two parts. If need be, we can provide a more
2247        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2248        then copying the submatrices */
2249     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2250   } else {
2251     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2252     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2253   }
2254   PetscFunctionReturn(0);
2255 }
2256 
2257 #undef __FUNCT__
2258 #define __FUNCT__ "MatSetUp_MPIAIJ"
2259 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2260 {
2261   PetscErrorCode ierr;
2262 
2263   PetscFunctionBegin;
2264   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2270 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2271 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2272 {
2273   PetscInt       i,m=Y->rmap->N;
2274   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2275   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2276   const PetscInt *xi = x->i,*yi = y->i;
2277 
2278   PetscFunctionBegin;
2279   /* Set the number of nonzeros in the new matrix */
2280   for (i=0; i<m; i++) {
2281     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2282     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2283     nnz[i] = 0;
2284     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2285       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2286       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2287       nnz[i]++;
2288     }
2289     for (; k<nzy; k++) nnz[i]++;
2290   }
2291   PetscFunctionReturn(0);
2292 }
2293 
2294 #undef __FUNCT__
2295 #define __FUNCT__ "MatAXPY_MPIAIJ"
2296 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2297 {
2298   PetscErrorCode ierr;
2299   PetscInt       i;
2300   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2301   PetscBLASInt   bnz,one=1;
2302   Mat_SeqAIJ     *x,*y;
2303 
2304   PetscFunctionBegin;
2305   if (str == SAME_NONZERO_PATTERN) {
2306     PetscScalar alpha = a;
2307     x    = (Mat_SeqAIJ*)xx->A->data;
2308     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2309     y    = (Mat_SeqAIJ*)yy->A->data;
2310     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2311     x    = (Mat_SeqAIJ*)xx->B->data;
2312     y    = (Mat_SeqAIJ*)yy->B->data;
2313     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2314     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2315   } else if (str == SUBSET_NONZERO_PATTERN) {
2316     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2317 
2318     x = (Mat_SeqAIJ*)xx->B->data;
2319     y = (Mat_SeqAIJ*)yy->B->data;
2320     if (y->xtoy && y->XtoY != xx->B) {
2321       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2322       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2323     }
2324     if (!y->xtoy) { /* get xtoy */
2325       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2326       y->XtoY = xx->B;
2327       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2328     }
2329     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2330   } else {
2331     Mat      B;
2332     PetscInt *nnz_d,*nnz_o;
2333     ierr = PetscMalloc(yy->A->rmap->N*sizeof(PetscInt),&nnz_d);CHKERRQ(ierr);
2334     ierr = PetscMalloc(yy->B->rmap->N*sizeof(PetscInt),&nnz_o);CHKERRQ(ierr);
2335     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2336     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2337     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2338     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2339     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2340     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2341     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2342     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2343     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2344     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2345     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2346     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2347   }
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatConjugate_MPIAIJ"
2355 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2356 {
2357 #if defined(PETSC_USE_COMPLEX)
2358   PetscErrorCode ierr;
2359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2360 
2361   PetscFunctionBegin;
2362   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2363   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2364 #else
2365   PetscFunctionBegin;
2366 #endif
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatRealPart_MPIAIJ"
2372 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2373 {
2374   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2375   PetscErrorCode ierr;
2376 
2377   PetscFunctionBegin;
2378   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2379   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2385 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2386 {
2387   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2392   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #if defined(PETSC_HAVE_PBGL)
2397 
2398 #include <boost/parallel/mpi/bsp_process_group.hpp>
2399 #include <boost/graph/distributed/ilu_default_graph.hpp>
2400 #include <boost/graph/distributed/ilu_0_block.hpp>
2401 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2402 #include <boost/graph/distributed/petsc/interface.hpp>
2403 #include <boost/multi_array.hpp>
2404 #include <boost/parallel/distributed_property_map->hpp>
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2408 /*
2409   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2410 */
2411 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2412 {
2413   namespace petsc = boost::distributed::petsc;
2414 
2415   namespace graph_dist = boost::graph::distributed;
2416   using boost::graph::distributed::ilu_default::process_group_type;
2417   using boost::graph::ilu_permuted;
2418 
2419   PetscBool      row_identity, col_identity;
2420   PetscContainer c;
2421   PetscInt       m, n, M, N;
2422   PetscErrorCode ierr;
2423 
2424   PetscFunctionBegin;
2425   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2426   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2427   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2428   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2429 
2430   process_group_type pg;
2431   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2432   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2433   lgraph_type& level_graph = *lgraph_p;
2434   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2435 
2436   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2437   ilu_permuted(level_graph);
2438 
2439   /* put together the new matrix */
2440   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2441   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2442   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2443   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2444   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2445   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2446   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2447   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448 
2449   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2450   ierr = PetscContainerSetPointer(c, lgraph_p);
2451   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2452   ierr = PetscContainerDestroy(&c);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 #undef __FUNCT__
2457 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2458 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2459 {
2460   PetscFunctionBegin;
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSolve_MPIAIJ"
2466 /*
2467   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2468 */
2469 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2470 {
2471   namespace graph_dist = boost::graph::distributed;
2472 
2473   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2474   lgraph_type    *lgraph_p;
2475   PetscContainer c;
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2480   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2481   ierr = VecCopy(b, x);CHKERRQ(ierr);
2482 
2483   PetscScalar *array_x;
2484   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2485   PetscInt sx;
2486   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2487 
2488   PetscScalar *array_b;
2489   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2490   PetscInt sb;
2491   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2492 
2493   lgraph_type& level_graph = *lgraph_p;
2494   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2495 
2496   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2497   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2498   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2499 
2500   typedef boost::iterator_property_map<array_ref_type::iterator,
2501                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2502   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2503   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2504 
2505   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2506   PetscFunctionReturn(0);
2507 }
2508 #endif
2509 
2510 typedef struct { /* used by MatGetRedundantMatrix() for reusing matredundant */
2511   PetscInt    nzlocal,nsends,nrecvs;
2512   PetscMPIInt *send_rank,*recv_rank;
2513   PetscInt    *sbuf_nz,*rbuf_nz,*sbuf_j,**rbuf_j;
2514   PetscScalar *sbuf_a,**rbuf_a;
2515   PetscErrorCode (*Destroy)(Mat);
2516 } Mat_Redundant;
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "PetscContainerDestroy_MatRedundant"
2520 PetscErrorCode PetscContainerDestroy_MatRedundant(void *ptr)
2521 {
2522   PetscErrorCode ierr;
2523   Mat_Redundant  *redund=(Mat_Redundant*)ptr;
2524   PetscInt       i;
2525 
2526   PetscFunctionBegin;
2527   ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2528   ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2529   ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2530   for (i=0; i<redund->nrecvs; i++) {
2531     ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2532     ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2533   }
2534   ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2535   ierr = PetscFree(redund);CHKERRQ(ierr);
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatDestroy_MatRedundant"
2541 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2542 {
2543   PetscErrorCode ierr;
2544   PetscContainer container;
2545   Mat_Redundant  *redund=NULL;
2546 
2547   PetscFunctionBegin;
2548   ierr = PetscObjectQuery((PetscObject)A,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2549   if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2550   ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2551 
2552   A->ops->destroy = redund->Destroy;
2553 
2554   ierr = PetscObjectCompose((PetscObject)A,"Mat_Redundant",0);CHKERRQ(ierr);
2555   if (A->ops->destroy) {
2556     ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2563 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscSubcomm psubcomm,MatReuse reuse,Mat *matredundant)
2564 {
2565   PetscMPIInt    rank,size;
2566   MPI_Comm       comm,subcomm=psubcomm->comm;
2567   PetscErrorCode ierr;
2568   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,nsubcomm=psubcomm->n;
2569   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL;
2570   PetscInt       *rowrange = mat->rmap->range;
2571   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2572   Mat            A = aij->A,B=aij->B,C=*matredundant;
2573   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2574   PetscScalar    *sbuf_a;
2575   PetscInt       nzlocal=a->nz+b->nz;
2576   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2577   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N,m,n;
2578   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2579   MatScalar      *aworkA,*aworkB;
2580   PetscScalar    *vals;
2581   PetscMPIInt    tag1,tag2,tag3,imdex;
2582   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2583   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2584   MPI_Status     recv_status,*send_status;
2585   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2586   PetscInt       **rbuf_j=NULL;
2587   PetscScalar    **rbuf_a=NULL;
2588   Mat_Redundant  *redund =NULL;
2589   PetscContainer container;
2590   PetscInt       rstart_sub,rend_sub,mloc_sub;
2591 
2592   PetscFunctionBegin;
2593   if (psubcomm->type != PETSC_SUBCOMM_INTERLACED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMETYPE,"Input psubcomm must be PETSC_SUBCOMM_INTERLACED type");
2594   ierr = MatGetSize(mat,&M,&N);CHKERRQ(ierr);
2595   if (M != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-square matrix yet, rows %D columns %D",M,N);
2596 
2597   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2598   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2599   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2600 
2601   if (reuse == MAT_REUSE_MATRIX) {
2602     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2603     ierr = MatGetLocalSize(C,&m,&n);CHKERRQ(ierr);
2604     if (m != mat->rmap->n || n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2605     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2606     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2607     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2608     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2609 
2610     nsends    = redund->nsends;
2611     nrecvs    = redund->nrecvs;
2612     send_rank = redund->send_rank;
2613     recv_rank = redund->recv_rank;
2614     sbuf_nz   = redund->sbuf_nz;
2615     rbuf_nz   = redund->rbuf_nz;
2616     sbuf_j    = redund->sbuf_j;
2617     sbuf_a    = redund->sbuf_a;
2618     rbuf_j    = redund->rbuf_j;
2619     rbuf_a    = redund->rbuf_a;
2620   }
2621 
2622   if (reuse == MAT_INITIAL_MATRIX) {
2623     PetscMPIInt subrank,subsize;
2624     PetscInt    nleftover,np_subcomm;
2625 
2626     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2627     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2628 
2629     /* get local size of redundant matrix */
2630     const PetscInt *range;
2631     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2632     rstart_sub = range[nsubcomm*subrank];
2633     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2634       rend_sub = range[nsubcomm*(subrank+1)];
2635     } else {
2636       rend_sub = mat->rmap->N;
2637     }
2638 
2639     mloc_sub = rend_sub - rstart_sub;
2640     /* printf("[%d] rstart_sub %d %d, mloc_sub %d\n",rank,rstart_sub,rend_sub, mloc_sub); */
2641 
2642     /* get the destination processors' id send_rank, nsends and nrecvs */
2643     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2644 
2645     np_subcomm = size/nsubcomm;
2646     nleftover  = size - nsubcomm*np_subcomm;
2647 
2648     nsends = 0; nrecvs = 0;
2649     for (i=0; i<size; i++) {
2650       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2651         send_rank[nsends]   = i; nsends++;
2652         recv_rank[nrecvs++] = i;
2653       }
2654     }
2655     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2656       i = size-nleftover-1;
2657       j = 0;
2658       while (j < nsubcomm - nleftover) {
2659         send_rank[nsends++] = i;
2660         i--; j++;
2661       }
2662     }
2663 
2664     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2665       for (i=0; i<nleftover; i++) {
2666         recv_rank[nrecvs++] = size-nleftover+i;
2667       }
2668     }
2669 
2670     /* allocate sbuf_j, sbuf_a */
2671     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2672     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
2673     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
2674   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2675 
2676   /* copy mat's local entries into the buffers */
2677   if (reuse == MAT_INITIAL_MATRIX) {
2678     rownz_max = 0;
2679     rptr      = sbuf_j;
2680     cols      = sbuf_j + rend-rstart + 1;
2681     vals      = sbuf_a;
2682     rptr[0]   = 0;
2683     for (i=0; i<rend-rstart; i++) {
2684       row    = i + rstart;
2685       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2686       ncols  = nzA + nzB;
2687       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2688       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2689       /* load the column indices for this row into cols */
2690       lwrite = 0;
2691       for (l=0; l<nzB; l++) {
2692         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2693           vals[lwrite]   = aworkB[l];
2694           cols[lwrite++] = ctmp;
2695         }
2696       }
2697       for (l=0; l<nzA; l++) {
2698         vals[lwrite]   = aworkA[l];
2699         cols[lwrite++] = cstart + cworkA[l];
2700       }
2701       for (l=0; l<nzB; l++) {
2702         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2703           vals[lwrite]   = aworkB[l];
2704           cols[lwrite++] = ctmp;
2705         }
2706       }
2707       vals     += ncols;
2708       cols     += ncols;
2709       rptr[i+1] = rptr[i] + ncols;
2710       if (rownz_max < ncols) rownz_max = ncols;
2711     }
2712     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2713   } else { /* only copy matrix values into sbuf_a */
2714     rptr    = sbuf_j;
2715     vals    = sbuf_a;
2716     rptr[0] = 0;
2717     for (i=0; i<rend-rstart; i++) {
2718       row    = i + rstart;
2719       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2720       ncols  = nzA + nzB;
2721       cworkB = b->j + b->i[i];
2722       aworkA = a->a + a->i[i];
2723       aworkB = b->a + b->i[i];
2724       lwrite = 0;
2725       for (l=0; l<nzB; l++) {
2726         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2727       }
2728       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2729       for (l=0; l<nzB; l++) {
2730         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2731       }
2732       vals     += ncols;
2733       rptr[i+1] = rptr[i] + ncols;
2734     }
2735   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2736 
2737   /* send nzlocal to others, and recv other's nzlocal */
2738   /*--------------------------------------------------*/
2739   if (reuse == MAT_INITIAL_MATRIX) {
2740     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2741 
2742     s_waits2 = s_waits3 + nsends;
2743     s_waits1 = s_waits2 + nsends;
2744     r_waits1 = s_waits1 + nsends;
2745     r_waits2 = r_waits1 + nrecvs;
2746     r_waits3 = r_waits2 + nrecvs;
2747   } else {
2748     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
2749 
2750     r_waits3 = s_waits3 + nsends;
2751   }
2752 
2753   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2754   if (reuse == MAT_INITIAL_MATRIX) {
2755     /* get new tags to keep the communication clean */
2756     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2757     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2758     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
2759 
2760     /* post receives of other's nzlocal */
2761     for (i=0; i<nrecvs; i++) {
2762       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2763     }
2764     /* send nzlocal to others */
2765     for (i=0; i<nsends; i++) {
2766       sbuf_nz[i] = nzlocal;
2767       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2768     }
2769     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2770     count = nrecvs;
2771     while (count) {
2772       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2773 
2774       recv_rank[imdex] = recv_status.MPI_SOURCE;
2775       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2776       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
2777 
2778       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2779 
2780       rbuf_nz[imdex] += i + 2;
2781 
2782       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
2783       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2784       count--;
2785     }
2786     /* wait on sends of nzlocal */
2787     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2788     /* send mat->i,j to others, and recv from other's */
2789     /*------------------------------------------------*/
2790     for (i=0; i<nsends; i++) {
2791       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2792       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2793     }
2794     /* wait on receives of mat->i,j */
2795     /*------------------------------*/
2796     count = nrecvs;
2797     while (count) {
2798       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2799       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2800       count--;
2801     }
2802     /* wait on sends of mat->i,j */
2803     /*---------------------------*/
2804     if (nsends) {
2805       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2806     }
2807   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2808 
2809   /* post receives, send and receive mat->a */
2810   /*----------------------------------------*/
2811   for (imdex=0; imdex<nrecvs; imdex++) {
2812     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2813   }
2814   for (i=0; i<nsends; i++) {
2815     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2816   }
2817   count = nrecvs;
2818   while (count) {
2819     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2820     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2821     count--;
2822   }
2823   if (nsends) {
2824     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2825   }
2826 
2827   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2828 
2829   /* create redundant matrix */
2830   /*-------------------------*/
2831   if (reuse == MAT_INITIAL_MATRIX) {
2832     /* compute rownz_max for preallocation */
2833     for (imdex=0; imdex<nrecvs; imdex++) {
2834       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2835       rptr = rbuf_j[imdex];
2836       for (i=0; i<j; i++) {
2837         ncols = rptr[i+1] - rptr[i];
2838         if (rownz_max < ncols) rownz_max = ncols;
2839       }
2840     }
2841 
2842     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2843     ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2844     /* ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DETERMINE,mat->cmap->N);CHKERRQ(ierr); -- give incorrect local column! */
2845     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2846     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2847     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2848     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2849   } else {
2850     C = *matredundant;
2851   }
2852 
2853   /* insert local matrix entries */
2854   rptr = sbuf_j;
2855   cols = sbuf_j + rend-rstart + 1;
2856   vals = sbuf_a;
2857   for (i=0; i<rend-rstart; i++) {
2858     row   = i + rstart;
2859     ncols = rptr[i+1] - rptr[i];
2860     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2861     vals += ncols;
2862     cols += ncols;
2863   }
2864   /* insert received matrix entries */
2865   for (imdex=0; imdex<nrecvs; imdex++) {
2866     rstart = rowrange[recv_rank[imdex]];
2867     rend   = rowrange[recv_rank[imdex]+1];
2868     rptr   = rbuf_j[imdex];
2869     cols   = rbuf_j[imdex] + rend-rstart + 1;
2870     vals   = rbuf_a[imdex];
2871     for (i=0; i<rend-rstart; i++) {
2872       row   = i + rstart;
2873       ncols = rptr[i+1] - rptr[i];
2874       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2875       vals += ncols;
2876       cols += ncols;
2877     }
2878   }
2879   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2880   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2881 
2882   if (reuse == MAT_INITIAL_MATRIX) {
2883     PetscContainer container;
2884     *matredundant = C;
2885     /* create a supporting struct and attach it to C for reuse */
2886     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
2887     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
2888     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
2889     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
2890     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
2891     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
2892 
2893     redund->nzlocal   = nzlocal;
2894     redund->nsends    = nsends;
2895     redund->nrecvs    = nrecvs;
2896     redund->send_rank = send_rank;
2897     redund->recv_rank = recv_rank;
2898     redund->sbuf_nz   = sbuf_nz;
2899     redund->rbuf_nz   = rbuf_nz;
2900     redund->sbuf_j    = sbuf_j;
2901     redund->sbuf_a    = sbuf_a;
2902     redund->rbuf_j    = rbuf_j;
2903     redund->rbuf_a    = rbuf_a;
2904 
2905     redund->Destroy = C->ops->destroy;
2906     C->ops->destroy = MatDestroy_MatRedundant;
2907   }
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 #undef __FUNCT__
2912 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2913 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,PetscInt mlocal_sub,MatReuse reuse,Mat *matredundant)
2914 {
2915   PetscErrorCode ierr;
2916   PetscMPIInt    rank,size;
2917   MPI_Comm       comm;
2918   PetscInt       nsends    = 0,nrecvs=0,i,rownz_max=0;
2919   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL;
2920   PetscInt       *rowrange = mat->rmap->range;
2921   Mat_MPIAIJ     *aij      = (Mat_MPIAIJ*)mat->data;
2922   Mat            A         = aij->A,B=aij->B,C=*matredundant;
2923   Mat_SeqAIJ     *a        = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2924   PetscScalar    *sbuf_a;
2925   PetscInt       nzlocal=a->nz+b->nz;
2926   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2927   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray,M,N;
2928   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2929   MatScalar      *aworkA,*aworkB;
2930   PetscScalar    *vals;
2931   PetscMPIInt    tag1,tag2,tag3,imdex;
2932   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2933   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2934   MPI_Status     recv_status,*send_status;
2935   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2936   PetscInt       **rbuf_j=NULL;
2937   PetscScalar    **rbuf_a=NULL;
2938   Mat_Redundant  *redund =NULL;
2939   PetscContainer container;
2940 
2941   PetscFunctionBegin;
2942   /* Only MatGetRedundantMatrix_MPIAIJ_interlaced() is written now */
2943   /*
2944   ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,psubcomm,reuse,matredundant);CHKERRQ(ierr);
2945   PetscFunctionReturn(0);
2946    */
2947 
2948   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2949   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2950   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2951 
2952   if (reuse == MAT_REUSE_MATRIX) {
2953     ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
2954     if (M != N || M != mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2955     ierr = MatGetLocalSize(C,&M,&N);CHKERRQ(ierr);
2956     if (M != N || M != mlocal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong local size");
2957     ierr = PetscObjectQuery((PetscObject)C,"Mat_Redundant",(PetscObject*)&container);CHKERRQ(ierr);
2958     if (!container) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Container does not exit");
2959     ierr = PetscContainerGetPointer(container,(void**)&redund);CHKERRQ(ierr);
2960     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2961 
2962     nsends    = redund->nsends;
2963     nrecvs    = redund->nrecvs;
2964     send_rank = redund->send_rank;
2965     recv_rank = redund->recv_rank;
2966     sbuf_nz   = redund->sbuf_nz;
2967     rbuf_nz   = redund->rbuf_nz;
2968     sbuf_j    = redund->sbuf_j;
2969     sbuf_a    = redund->sbuf_a;
2970     rbuf_j    = redund->rbuf_j;
2971     rbuf_a    = redund->rbuf_a;
2972   }
2973 
2974   if (reuse == MAT_INITIAL_MATRIX) {
2975     PetscMPIInt subrank,subsize;
2976     PetscInt    nleftover,np_subcomm;
2977     /* get the destination processors' id send_rank, nsends and nrecvs */
2978     ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2979     ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2980     ierr = PetscMalloc2(size,PetscMPIInt,&send_rank,size,PetscMPIInt,&recv_rank);CHKERRQ(ierr);
2981 
2982     np_subcomm = size/nsubcomm;
2983     nleftover  = size - nsubcomm*np_subcomm;
2984 
2985     nsends = 0; nrecvs = 0;
2986     for (i=0; i<size; i++) { /* i=rank*/
2987       if (subrank == i/nsubcomm && rank != i) { /* my_subrank == other's subrank */
2988         send_rank[nsends]   = i; nsends++;
2989         recv_rank[nrecvs++] = i;
2990       }
2991     }
2992     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2993       i = size-nleftover-1;
2994       j = 0;
2995       while (j < nsubcomm - nleftover) {
2996         send_rank[nsends++] = i;
2997         i--; j++;
2998       }
2999     }
3000 
3001     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
3002       for (i=0; i<nleftover; i++) {
3003         recv_rank[nrecvs++] = size-nleftover+i;
3004       }
3005     }
3006 
3007     /* allocate sbuf_j, sbuf_a */
3008     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
3009     ierr = PetscMalloc(i*sizeof(PetscInt),&sbuf_j);CHKERRQ(ierr);
3010     ierr = PetscMalloc((nzlocal+1)*sizeof(PetscScalar),&sbuf_a);CHKERRQ(ierr);
3011   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
3012 
3013   /* copy mat's local entries into the buffers */
3014   if (reuse == MAT_INITIAL_MATRIX) {
3015     rownz_max = 0;
3016     rptr      = sbuf_j;
3017     cols      = sbuf_j + rend-rstart + 1;
3018     vals      = sbuf_a;
3019     rptr[0]   = 0;
3020     for (i=0; i<rend-rstart; i++) {
3021       row    = i + rstart;
3022       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
3023       ncols  = nzA + nzB;
3024       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
3025       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
3026       /* load the column indices for this row into cols */
3027       lwrite = 0;
3028       for (l=0; l<nzB; l++) {
3029         if ((ctmp = bmap[cworkB[l]]) < cstart) {
3030           vals[lwrite]   = aworkB[l];
3031           cols[lwrite++] = ctmp;
3032         }
3033       }
3034       for (l=0; l<nzA; l++) {
3035         vals[lwrite]   = aworkA[l];
3036         cols[lwrite++] = cstart + cworkA[l];
3037       }
3038       for (l=0; l<nzB; l++) {
3039         if ((ctmp = bmap[cworkB[l]]) >= cend) {
3040           vals[lwrite]   = aworkB[l];
3041           cols[lwrite++] = ctmp;
3042         }
3043       }
3044       vals     += ncols;
3045       cols     += ncols;
3046       rptr[i+1] = rptr[i] + ncols;
3047       if (rownz_max < ncols) rownz_max = ncols;
3048     }
3049     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
3050   } else { /* only copy matrix values into sbuf_a */
3051     rptr    = sbuf_j;
3052     vals    = sbuf_a;
3053     rptr[0] = 0;
3054     for (i=0; i<rend-rstart; i++) {
3055       row    = i + rstart;
3056       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
3057       ncols  = nzA + nzB;
3058       cworkB = b->j + b->i[i];
3059       aworkA = a->a + a->i[i];
3060       aworkB = b->a + b->i[i];
3061       lwrite = 0;
3062       for (l=0; l<nzB; l++) {
3063         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
3064       }
3065       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
3066       for (l=0; l<nzB; l++) {
3067         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
3068       }
3069       vals     += ncols;
3070       rptr[i+1] = rptr[i] + ncols;
3071     }
3072   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
3073 
3074   /* send nzlocal to others, and recv other's nzlocal */
3075   /*--------------------------------------------------*/
3076   if (reuse == MAT_INITIAL_MATRIX) {
3077     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
3078 
3079     s_waits2 = s_waits3 + nsends;
3080     s_waits1 = s_waits2 + nsends;
3081     r_waits1 = s_waits1 + nsends;
3082     r_waits2 = r_waits1 + nrecvs;
3083     r_waits3 = r_waits2 + nrecvs;
3084   } else {
3085     ierr = PetscMalloc2(nsends + nrecvs +1,MPI_Request,&s_waits3,nsends+1,MPI_Status,&send_status);CHKERRQ(ierr);
3086 
3087     r_waits3 = s_waits3 + nsends;
3088   }
3089 
3090   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
3091   if (reuse == MAT_INITIAL_MATRIX) {
3092     /* get new tags to keep the communication clean */
3093     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
3094     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
3095     ierr = PetscMalloc4(nsends,PetscInt,&sbuf_nz,nrecvs,PetscInt,&rbuf_nz,nrecvs,PetscInt*,&rbuf_j,nrecvs,PetscScalar*,&rbuf_a);CHKERRQ(ierr);
3096 
3097     /* post receives of other's nzlocal */
3098     for (i=0; i<nrecvs; i++) {
3099       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
3100     }
3101     /* send nzlocal to others */
3102     for (i=0; i<nsends; i++) {
3103       sbuf_nz[i] = nzlocal;
3104       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
3105     }
3106     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
3107     count = nrecvs;
3108     while (count) {
3109       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
3110 
3111       recv_rank[imdex] = recv_status.MPI_SOURCE;
3112       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
3113       ierr = PetscMalloc((rbuf_nz[imdex]+1)*sizeof(PetscScalar),&rbuf_a[imdex]);CHKERRQ(ierr);
3114 
3115       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
3116 
3117       rbuf_nz[imdex] += i + 2;
3118 
3119       ierr = PetscMalloc(rbuf_nz[imdex]*sizeof(PetscInt),&rbuf_j[imdex]);CHKERRQ(ierr);
3120       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
3121       count--;
3122     }
3123     /* wait on sends of nzlocal */
3124     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
3125     /* send mat->i,j to others, and recv from other's */
3126     /*------------------------------------------------*/
3127     for (i=0; i<nsends; i++) {
3128       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
3129       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
3130     }
3131     /* wait on receives of mat->i,j */
3132     /*------------------------------*/
3133     count = nrecvs;
3134     while (count) {
3135       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
3136       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
3137       count--;
3138     }
3139     /* wait on sends of mat->i,j */
3140     /*---------------------------*/
3141     if (nsends) {
3142       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
3143     }
3144   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
3145 
3146   /* post receives, send and receive mat->a */
3147   /*----------------------------------------*/
3148   for (imdex=0; imdex<nrecvs; imdex++) {
3149     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
3150   }
3151   for (i=0; i<nsends; i++) {
3152     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
3153   }
3154   count = nrecvs;
3155   while (count) {
3156     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
3157     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
3158     count--;
3159   }
3160   if (nsends) {
3161     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
3162   }
3163 
3164   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
3165 
3166   /* create redundant matrix */
3167   /*-------------------------*/
3168   if (reuse == MAT_INITIAL_MATRIX) {
3169     /* compute rownz_max for preallocation */
3170     for (imdex=0; imdex<nrecvs; imdex++) {
3171       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
3172       rptr = rbuf_j[imdex];
3173       for (i=0; i<j; i++) {
3174         ncols = rptr[i+1] - rptr[i];
3175         if (rownz_max < ncols) rownz_max = ncols;
3176       }
3177     }
3178 
3179     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
3180     ierr = MatSetSizes(C,mlocal_sub,mlocal_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
3181     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
3182     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
3183     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
3184     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
3185   } else {
3186     C = *matredundant;
3187   }
3188 
3189   /* insert local matrix entries */
3190   rptr = sbuf_j;
3191   cols = sbuf_j + rend-rstart + 1;
3192   vals = sbuf_a;
3193   for (i=0; i<rend-rstart; i++) {
3194     row   = i + rstart;
3195     ncols = rptr[i+1] - rptr[i];
3196     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3197     vals += ncols;
3198     cols += ncols;
3199   }
3200   /* insert received matrix entries */
3201   for (imdex=0; imdex<nrecvs; imdex++) {
3202     rstart = rowrange[recv_rank[imdex]];
3203     rend   = rowrange[recv_rank[imdex]+1];
3204     rptr   = rbuf_j[imdex];
3205     cols   = rbuf_j[imdex] + rend-rstart + 1;
3206     vals   = rbuf_a[imdex];
3207     for (i=0; i<rend-rstart; i++) {
3208       row   = i + rstart;
3209       ncols = rptr[i+1] - rptr[i];
3210       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
3211       vals += ncols;
3212       cols += ncols;
3213     }
3214   }
3215   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3216   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3217   ierr = MatGetSize(C,&M,&N);CHKERRQ(ierr);
3218   if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"redundant mat size %d != input mat size %d",M,mat->rmap->N);
3219   if (reuse == MAT_INITIAL_MATRIX) {
3220     PetscContainer container;
3221     *matredundant = C;
3222     /* create a supporting struct and attach it to C for reuse */
3223     ierr = PetscNewLog(C,Mat_Redundant,&redund);CHKERRQ(ierr);
3224     ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
3225     ierr = PetscContainerSetPointer(container,redund);CHKERRQ(ierr);
3226     ierr = PetscContainerSetUserDestroy(container,PetscContainerDestroy_MatRedundant);CHKERRQ(ierr);
3227     ierr = PetscObjectCompose((PetscObject)C,"Mat_Redundant",(PetscObject)container);CHKERRQ(ierr);
3228     ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
3229 
3230     redund->nzlocal   = nzlocal;
3231     redund->nsends    = nsends;
3232     redund->nrecvs    = nrecvs;
3233     redund->send_rank = send_rank;
3234     redund->recv_rank = recv_rank;
3235     redund->sbuf_nz   = sbuf_nz;
3236     redund->rbuf_nz   = rbuf_nz;
3237     redund->sbuf_j    = sbuf_j;
3238     redund->sbuf_a    = sbuf_a;
3239     redund->rbuf_j    = rbuf_j;
3240     redund->rbuf_a    = rbuf_a;
3241 
3242     redund->Destroy = C->ops->destroy;
3243     C->ops->destroy = MatDestroy_MatRedundant;
3244   }
3245   PetscFunctionReturn(0);
3246 }
3247 
3248 #undef __FUNCT__
3249 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
3250 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3251 {
3252   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3253   PetscErrorCode ierr;
3254   PetscInt       i,*idxb = 0;
3255   PetscScalar    *va,*vb;
3256   Vec            vtmp;
3257 
3258   PetscFunctionBegin;
3259   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
3260   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3261   if (idx) {
3262     for (i=0; i<A->rmap->n; i++) {
3263       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3264     }
3265   }
3266 
3267   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3268   if (idx) {
3269     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3270   }
3271   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3272   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3273 
3274   for (i=0; i<A->rmap->n; i++) {
3275     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
3276       va[i] = vb[i];
3277       if (idx) idx[i] = a->garray[idxb[i]];
3278     }
3279   }
3280 
3281   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3282   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3283   ierr = PetscFree(idxb);CHKERRQ(ierr);
3284   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3285   PetscFunctionReturn(0);
3286 }
3287 
3288 #undef __FUNCT__
3289 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
3290 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3291 {
3292   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
3293   PetscErrorCode ierr;
3294   PetscInt       i,*idxb = 0;
3295   PetscScalar    *va,*vb;
3296   Vec            vtmp;
3297 
3298   PetscFunctionBegin;
3299   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
3300   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
3301   if (idx) {
3302     for (i=0; i<A->cmap->n; i++) {
3303       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
3304     }
3305   }
3306 
3307   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
3308   if (idx) {
3309     ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);
3310   }
3311   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
3312   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
3313 
3314   for (i=0; i<A->rmap->n; i++) {
3315     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
3316       va[i] = vb[i];
3317       if (idx) idx[i] = a->garray[idxb[i]];
3318     }
3319   }
3320 
3321   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
3322   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
3323   ierr = PetscFree(idxb);CHKERRQ(ierr);
3324   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
3325   PetscFunctionReturn(0);
3326 }
3327 
3328 #undef __FUNCT__
3329 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
3330 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3331 {
3332   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3333   PetscInt       n      = A->rmap->n;
3334   PetscInt       cstart = A->cmap->rstart;
3335   PetscInt       *cmap  = mat->garray;
3336   PetscInt       *diagIdx, *offdiagIdx;
3337   Vec            diagV, offdiagV;
3338   PetscScalar    *a, *diagA, *offdiagA;
3339   PetscInt       r;
3340   PetscErrorCode ierr;
3341 
3342   PetscFunctionBegin;
3343   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3344   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3345   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3346   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3347   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3348   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3349   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3350   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3351   for (r = 0; r < n; ++r) {
3352     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3353       a[r]   = diagA[r];
3354       idx[r] = cstart + diagIdx[r];
3355     } else {
3356       a[r]   = offdiagA[r];
3357       idx[r] = cmap[offdiagIdx[r]];
3358     }
3359   }
3360   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3361   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3362   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3363   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3364   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3365   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3366   PetscFunctionReturn(0);
3367 }
3368 
3369 #undef __FUNCT__
3370 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3371 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3372 {
3373   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3374   PetscInt       n      = A->rmap->n;
3375   PetscInt       cstart = A->cmap->rstart;
3376   PetscInt       *cmap  = mat->garray;
3377   PetscInt       *diagIdx, *offdiagIdx;
3378   Vec            diagV, offdiagV;
3379   PetscScalar    *a, *diagA, *offdiagA;
3380   PetscInt       r;
3381   PetscErrorCode ierr;
3382 
3383   PetscFunctionBegin;
3384   ierr = PetscMalloc2(n,PetscInt,&diagIdx,n,PetscInt,&offdiagIdx);CHKERRQ(ierr);
3385   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3386   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3387   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3388   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3389   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3390   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3391   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3392   for (r = 0; r < n; ++r) {
3393     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3394       a[r]   = diagA[r];
3395       idx[r] = cstart + diagIdx[r];
3396     } else {
3397       a[r]   = offdiagA[r];
3398       idx[r] = cmap[offdiagIdx[r]];
3399     }
3400   }
3401   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3402   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3403   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3404   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3405   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3406   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 #undef __FUNCT__
3411 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3412 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3413 {
3414   PetscErrorCode ierr;
3415   Mat            *dummy;
3416 
3417   PetscFunctionBegin;
3418   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3419   *newmat = *dummy;
3420   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3421   PetscFunctionReturn(0);
3422 }
3423 
3424 extern PetscErrorCode  MatFDColoringApply_AIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
3425 
3426 #undef __FUNCT__
3427 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3428 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3429 {
3430   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3431   PetscErrorCode ierr;
3432 
3433   PetscFunctionBegin;
3434   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3435   PetscFunctionReturn(0);
3436 }
3437 
3438 #undef __FUNCT__
3439 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3440 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3441 {
3442   PetscErrorCode ierr;
3443   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3444 
3445   PetscFunctionBegin;
3446   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3447   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3448   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3449   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3450   PetscFunctionReturn(0);
3451 }
3452 
3453 /* -------------------------------------------------------------------*/
3454 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3455                                        MatGetRow_MPIAIJ,
3456                                        MatRestoreRow_MPIAIJ,
3457                                        MatMult_MPIAIJ,
3458                                 /* 4*/ MatMultAdd_MPIAIJ,
3459                                        MatMultTranspose_MPIAIJ,
3460                                        MatMultTransposeAdd_MPIAIJ,
3461 #if defined(PETSC_HAVE_PBGL)
3462                                        MatSolve_MPIAIJ,
3463 #else
3464                                        0,
3465 #endif
3466                                        0,
3467                                        0,
3468                                 /*10*/ 0,
3469                                        0,
3470                                        0,
3471                                        MatSOR_MPIAIJ,
3472                                        MatTranspose_MPIAIJ,
3473                                 /*15*/ MatGetInfo_MPIAIJ,
3474                                        MatEqual_MPIAIJ,
3475                                        MatGetDiagonal_MPIAIJ,
3476                                        MatDiagonalScale_MPIAIJ,
3477                                        MatNorm_MPIAIJ,
3478                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3479                                        MatAssemblyEnd_MPIAIJ,
3480                                        MatSetOption_MPIAIJ,
3481                                        MatZeroEntries_MPIAIJ,
3482                                 /*24*/ MatZeroRows_MPIAIJ,
3483                                        0,
3484 #if defined(PETSC_HAVE_PBGL)
3485                                        0,
3486 #else
3487                                        0,
3488 #endif
3489                                        0,
3490                                        0,
3491                                 /*29*/ MatSetUp_MPIAIJ,
3492 #if defined(PETSC_HAVE_PBGL)
3493                                        0,
3494 #else
3495                                        0,
3496 #endif
3497                                        0,
3498                                        0,
3499                                        0,
3500                                 /*34*/ MatDuplicate_MPIAIJ,
3501                                        0,
3502                                        0,
3503                                        0,
3504                                        0,
3505                                 /*39*/ MatAXPY_MPIAIJ,
3506                                        MatGetSubMatrices_MPIAIJ,
3507                                        MatIncreaseOverlap_MPIAIJ,
3508                                        MatGetValues_MPIAIJ,
3509                                        MatCopy_MPIAIJ,
3510                                 /*44*/ MatGetRowMax_MPIAIJ,
3511                                        MatScale_MPIAIJ,
3512                                        0,
3513                                        0,
3514                                        MatZeroRowsColumns_MPIAIJ,
3515                                 /*49*/ MatSetRandom_MPIAIJ,
3516                                        0,
3517                                        0,
3518                                        0,
3519                                        0,
3520                                 /*54*/ MatFDColoringCreate_MPIAIJ,
3521                                        0,
3522                                        MatSetUnfactored_MPIAIJ,
3523                                        MatPermute_MPIAIJ,
3524                                        0,
3525                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3526                                        MatDestroy_MPIAIJ,
3527                                        MatView_MPIAIJ,
3528                                        0,
3529                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3530                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3531                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3532                                        0,
3533                                        0,
3534                                        0,
3535                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3536                                        MatGetRowMinAbs_MPIAIJ,
3537                                        0,
3538                                        MatSetColoring_MPIAIJ,
3539                                        0,
3540                                        MatSetValuesAdifor_MPIAIJ,
3541                                 /*75*/ MatFDColoringApply_AIJ,
3542                                        0,
3543                                        0,
3544                                        0,
3545                                        MatFindZeroDiagonals_MPIAIJ,
3546                                 /*80*/ 0,
3547                                        0,
3548                                        0,
3549                                 /*83*/ MatLoad_MPIAIJ,
3550                                        0,
3551                                        0,
3552                                        0,
3553                                        0,
3554                                        0,
3555                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3556                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3557                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3558                                        MatPtAP_MPIAIJ_MPIAIJ,
3559                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3560                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3561                                        0,
3562                                        0,
3563                                        0,
3564                                        0,
3565                                 /*99*/ 0,
3566                                        0,
3567                                        0,
3568                                        MatConjugate_MPIAIJ,
3569                                        0,
3570                                 /*104*/MatSetValuesRow_MPIAIJ,
3571                                        MatRealPart_MPIAIJ,
3572                                        MatImaginaryPart_MPIAIJ,
3573                                        0,
3574                                        0,
3575                                 /*109*/0,
3576                                        MatGetRedundantMatrix_MPIAIJ,
3577                                        MatGetRowMin_MPIAIJ,
3578                                        0,
3579                                        0,
3580                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3581                                        0,
3582                                        0,
3583                                        0,
3584                                        0,
3585                                 /*119*/0,
3586                                        0,
3587                                        0,
3588                                        0,
3589                                        MatGetMultiProcBlock_MPIAIJ,
3590                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3591                                        MatGetColumnNorms_MPIAIJ,
3592                                        MatInvertBlockDiagonal_MPIAIJ,
3593                                        0,
3594                                        MatGetSubMatricesParallel_MPIAIJ,
3595                                 /*129*/0,
3596                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3597                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3598                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3599                                        0,
3600                                 /*134*/0,
3601                                        0,
3602                                        0,
3603                                        0,
3604                                        0,
3605                                 /*139*/0,
3606                                        0
3607 };
3608 
3609 /* ----------------------------------------------------------------------------------------*/
3610 
3611 #undef __FUNCT__
3612 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3613 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3614 {
3615   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3616   PetscErrorCode ierr;
3617 
3618   PetscFunctionBegin;
3619   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3620   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3621   PetscFunctionReturn(0);
3622 }
3623 
3624 #undef __FUNCT__
3625 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3626 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3627 {
3628   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3629   PetscErrorCode ierr;
3630 
3631   PetscFunctionBegin;
3632   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3633   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3634   PetscFunctionReturn(0);
3635 }
3636 
3637 #undef __FUNCT__
3638 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3639 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3640 {
3641   Mat_MPIAIJ     *b;
3642   PetscErrorCode ierr;
3643 
3644   PetscFunctionBegin;
3645   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3646   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3647   b = (Mat_MPIAIJ*)B->data;
3648 
3649   if (!B->preallocated) {
3650     /* Explicitly create 2 MATSEQAIJ matrices. */
3651     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3652     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3653     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3654     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3655     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3656     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3657     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3658     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3659     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3660     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3661   }
3662 
3663   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3664   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3665   B->preallocated = PETSC_TRUE;
3666   PetscFunctionReturn(0);
3667 }
3668 
3669 #undef __FUNCT__
3670 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3671 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3672 {
3673   Mat            mat;
3674   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3675   PetscErrorCode ierr;
3676 
3677   PetscFunctionBegin;
3678   *newmat = 0;
3679   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3680   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3681   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3682   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3683   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3684   a       = (Mat_MPIAIJ*)mat->data;
3685 
3686   mat->factortype   = matin->factortype;
3687   mat->rmap->bs     = matin->rmap->bs;
3688   mat->cmap->bs     = matin->cmap->bs;
3689   mat->assembled    = PETSC_TRUE;
3690   mat->insertmode   = NOT_SET_VALUES;
3691   mat->preallocated = PETSC_TRUE;
3692 
3693   a->size         = oldmat->size;
3694   a->rank         = oldmat->rank;
3695   a->donotstash   = oldmat->donotstash;
3696   a->roworiented  = oldmat->roworiented;
3697   a->rowindices   = 0;
3698   a->rowvalues    = 0;
3699   a->getrowactive = PETSC_FALSE;
3700 
3701   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3702   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3703 
3704   if (oldmat->colmap) {
3705 #if defined(PETSC_USE_CTABLE)
3706     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3707 #else
3708     ierr = PetscMalloc((mat->cmap->N)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3709     ierr = PetscLogObjectMemory(mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3710     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3711 #endif
3712   } else a->colmap = 0;
3713   if (oldmat->garray) {
3714     PetscInt len;
3715     len  = oldmat->B->cmap->n;
3716     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3717     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3718     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3719   } else a->garray = 0;
3720 
3721   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3722   ierr    = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3723   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3724   ierr    = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3725   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3726   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3727   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3728   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3729   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3730   *newmat = mat;
3731   PetscFunctionReturn(0);
3732 }
3733 
3734 
3735 
3736 #undef __FUNCT__
3737 #define __FUNCT__ "MatLoad_MPIAIJ"
3738 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3739 {
3740   PetscScalar    *vals,*svals;
3741   MPI_Comm       comm;
3742   PetscErrorCode ierr;
3743   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3744   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3745   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3746   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3747   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3748   int            fd;
3749   PetscInt       bs = 1;
3750 
3751   PetscFunctionBegin;
3752   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3753   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3754   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3755   if (!rank) {
3756     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3757     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3758     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3759   }
3760 
3761   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3762   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3763   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3764 
3765   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3766 
3767   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3768   M    = header[1]; N = header[2];
3769   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3770   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3771   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3772 
3773   /* If global sizes are set, check if they are consistent with that given in the file */
3774   if (sizesset) {
3775     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3776   }
3777   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3778   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3779 
3780   /* determine ownership of all (block) rows */
3781   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3782   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3783   else m = newMat->rmap->n; /* Set by user */
3784 
3785   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&rowners);CHKERRQ(ierr);
3786   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3787 
3788   /* First process needs enough room for process with most rows */
3789   if (!rank) {
3790     mmax = rowners[1];
3791     for (i=2; i<=size; i++) {
3792       mmax = PetscMax(mmax, rowners[i]);
3793     }
3794   } else mmax = -1;             /* unused, but compilers complain */
3795 
3796   rowners[0] = 0;
3797   for (i=2; i<=size; i++) {
3798     rowners[i] += rowners[i-1];
3799   }
3800   rstart = rowners[rank];
3801   rend   = rowners[rank+1];
3802 
3803   /* distribute row lengths to all processors */
3804   ierr = PetscMalloc2(m,PetscInt,&ourlens,m,PetscInt,&offlens);CHKERRQ(ierr);
3805   if (!rank) {
3806     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3807     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3808     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3809     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3810     for (j=0; j<m; j++) {
3811       procsnz[0] += ourlens[j];
3812     }
3813     for (i=1; i<size; i++) {
3814       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3815       /* calculate the number of nonzeros on each processor */
3816       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3817         procsnz[i] += rowlengths[j];
3818       }
3819       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3820     }
3821     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3822   } else {
3823     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3824   }
3825 
3826   if (!rank) {
3827     /* determine max buffer needed and allocate it */
3828     maxnz = 0;
3829     for (i=0; i<size; i++) {
3830       maxnz = PetscMax(maxnz,procsnz[i]);
3831     }
3832     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3833 
3834     /* read in my part of the matrix column indices  */
3835     nz   = procsnz[0];
3836     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3837     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3838 
3839     /* read in every one elses and ship off */
3840     for (i=1; i<size; i++) {
3841       nz   = procsnz[i];
3842       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3843       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3844     }
3845     ierr = PetscFree(cols);CHKERRQ(ierr);
3846   } else {
3847     /* determine buffer space needed for message */
3848     nz = 0;
3849     for (i=0; i<m; i++) {
3850       nz += ourlens[i];
3851     }
3852     ierr = PetscMalloc(nz*sizeof(PetscInt),&mycols);CHKERRQ(ierr);
3853 
3854     /* receive message of column indices*/
3855     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3856   }
3857 
3858   /* determine column ownership if matrix is not square */
3859   if (N != M) {
3860     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3861     else n = newMat->cmap->n;
3862     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3863     cstart = cend - n;
3864   } else {
3865     cstart = rstart;
3866     cend   = rend;
3867     n      = cend - cstart;
3868   }
3869 
3870   /* loop over local rows, determining number of off diagonal entries */
3871   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3872   jj   = 0;
3873   for (i=0; i<m; i++) {
3874     for (j=0; j<ourlens[i]; j++) {
3875       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3876       jj++;
3877     }
3878   }
3879 
3880   for (i=0; i<m; i++) {
3881     ourlens[i] -= offlens[i];
3882   }
3883   if (!sizesset) {
3884     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3885   }
3886 
3887   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3888 
3889   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3890 
3891   for (i=0; i<m; i++) {
3892     ourlens[i] += offlens[i];
3893   }
3894 
3895   if (!rank) {
3896     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3897 
3898     /* read in my part of the matrix numerical values  */
3899     nz   = procsnz[0];
3900     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3901 
3902     /* insert into matrix */
3903     jj      = rstart;
3904     smycols = mycols;
3905     svals   = vals;
3906     for (i=0; i<m; i++) {
3907       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3908       smycols += ourlens[i];
3909       svals   += ourlens[i];
3910       jj++;
3911     }
3912 
3913     /* read in other processors and ship out */
3914     for (i=1; i<size; i++) {
3915       nz   = procsnz[i];
3916       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3917       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3918     }
3919     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3920   } else {
3921     /* receive numeric values */
3922     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&vals);CHKERRQ(ierr);
3923 
3924     /* receive message of values*/
3925     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3926 
3927     /* insert into matrix */
3928     jj      = rstart;
3929     smycols = mycols;
3930     svals   = vals;
3931     for (i=0; i<m; i++) {
3932       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3933       smycols += ourlens[i];
3934       svals   += ourlens[i];
3935       jj++;
3936     }
3937   }
3938   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3939   ierr = PetscFree(vals);CHKERRQ(ierr);
3940   ierr = PetscFree(mycols);CHKERRQ(ierr);
3941   ierr = PetscFree(rowners);CHKERRQ(ierr);
3942   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3943   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3944   PetscFunctionReturn(0);
3945 }
3946 
3947 #undef __FUNCT__
3948 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3949 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3950 {
3951   PetscErrorCode ierr;
3952   IS             iscol_local;
3953   PetscInt       csize;
3954 
3955   PetscFunctionBegin;
3956   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3957   if (call == MAT_REUSE_MATRIX) {
3958     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3959     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3960   } else {
3961     PetscInt cbs;
3962     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3963     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3964     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3965   }
3966   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3967   if (call == MAT_INITIAL_MATRIX) {
3968     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3969     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3970   }
3971   PetscFunctionReturn(0);
3972 }
3973 
3974 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3975 #undef __FUNCT__
3976 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3977 /*
3978     Not great since it makes two copies of the submatrix, first an SeqAIJ
3979   in local and then by concatenating the local matrices the end result.
3980   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3981 
3982   Note: This requires a sequential iscol with all indices.
3983 */
3984 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3985 {
3986   PetscErrorCode ierr;
3987   PetscMPIInt    rank,size;
3988   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3989   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3990   PetscBool      allcolumns, colflag;
3991   Mat            M,Mreuse;
3992   MatScalar      *vwork,*aa;
3993   MPI_Comm       comm;
3994   Mat_SeqAIJ     *aij;
3995 
3996   PetscFunctionBegin;
3997   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3998   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3999   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4000 
4001   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
4002   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
4003   if (colflag && ncol == mat->cmap->N) {
4004     allcolumns = PETSC_TRUE;
4005   } else {
4006     allcolumns = PETSC_FALSE;
4007   }
4008   if (call ==  MAT_REUSE_MATRIX) {
4009     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
4010     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
4011     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
4012   } else {
4013     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
4014   }
4015 
4016   /*
4017       m - number of local rows
4018       n - number of columns (same on all processors)
4019       rstart - first row in new global matrix generated
4020   */
4021   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
4022   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
4023   if (call == MAT_INITIAL_MATRIX) {
4024     aij = (Mat_SeqAIJ*)(Mreuse)->data;
4025     ii  = aij->i;
4026     jj  = aij->j;
4027 
4028     /*
4029         Determine the number of non-zeros in the diagonal and off-diagonal
4030         portions of the matrix in order to do correct preallocation
4031     */
4032 
4033     /* first get start and end of "diagonal" columns */
4034     if (csize == PETSC_DECIDE) {
4035       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
4036       if (mglobal == n) { /* square matrix */
4037         nlocal = m;
4038       } else {
4039         nlocal = n/size + ((n % size) > rank);
4040       }
4041     } else {
4042       nlocal = csize;
4043     }
4044     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4045     rstart = rend - nlocal;
4046     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
4047 
4048     /* next, compute all the lengths */
4049     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
4050     olens = dlens + m;
4051     for (i=0; i<m; i++) {
4052       jend = ii[i+1] - ii[i];
4053       olen = 0;
4054       dlen = 0;
4055       for (j=0; j<jend; j++) {
4056         if (*jj < rstart || *jj >= rend) olen++;
4057         else dlen++;
4058         jj++;
4059       }
4060       olens[i] = olen;
4061       dlens[i] = dlen;
4062     }
4063     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
4064     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
4065     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
4066     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
4067     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
4068     ierr = PetscFree(dlens);CHKERRQ(ierr);
4069   } else {
4070     PetscInt ml,nl;
4071 
4072     M    = *newmat;
4073     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
4074     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
4075     ierr = MatZeroEntries(M);CHKERRQ(ierr);
4076     /*
4077          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
4078        rather than the slower MatSetValues().
4079     */
4080     M->was_assembled = PETSC_TRUE;
4081     M->assembled     = PETSC_FALSE;
4082   }
4083   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
4084   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
4085   ii   = aij->i;
4086   jj   = aij->j;
4087   aa   = aij->a;
4088   for (i=0; i<m; i++) {
4089     row   = rstart + i;
4090     nz    = ii[i+1] - ii[i];
4091     cwork = jj;     jj += nz;
4092     vwork = aa;     aa += nz;
4093     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
4094   }
4095 
4096   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4097   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4098   *newmat = M;
4099 
4100   /* save submatrix used in processor for next request */
4101   if (call ==  MAT_INITIAL_MATRIX) {
4102     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
4103     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
4104   }
4105   PetscFunctionReturn(0);
4106 }
4107 
4108 #undef __FUNCT__
4109 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
4110 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4111 {
4112   PetscInt       m,cstart, cend,j,nnz,i,d;
4113   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
4114   const PetscInt *JJ;
4115   PetscScalar    *values;
4116   PetscErrorCode ierr;
4117 
4118   PetscFunctionBegin;
4119   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
4120 
4121   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
4122   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
4123   m      = B->rmap->n;
4124   cstart = B->cmap->rstart;
4125   cend   = B->cmap->rend;
4126   rstart = B->rmap->rstart;
4127 
4128   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
4129 
4130 #if defined(PETSC_USE_DEBUGGING)
4131   for (i=0; i<m; i++) {
4132     nnz = Ii[i+1]- Ii[i];
4133     JJ  = J + Ii[i];
4134     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
4135     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
4136     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
4137   }
4138 #endif
4139 
4140   for (i=0; i<m; i++) {
4141     nnz     = Ii[i+1]- Ii[i];
4142     JJ      = J + Ii[i];
4143     nnz_max = PetscMax(nnz_max,nnz);
4144     d       = 0;
4145     for (j=0; j<nnz; j++) {
4146       if (cstart <= JJ[j] && JJ[j] < cend) d++;
4147     }
4148     d_nnz[i] = d;
4149     o_nnz[i] = nnz - d;
4150   }
4151   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
4152   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
4153 
4154   if (v) values = (PetscScalar*)v;
4155   else {
4156     ierr = PetscMalloc((nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
4157     ierr = PetscMemzero(values,nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
4158   }
4159 
4160   for (i=0; i<m; i++) {
4161     ii   = i + rstart;
4162     nnz  = Ii[i+1]- Ii[i];
4163     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
4164   }
4165   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4166   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4167 
4168   if (!v) {
4169     ierr = PetscFree(values);CHKERRQ(ierr);
4170   }
4171   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
4172   PetscFunctionReturn(0);
4173 }
4174 
4175 #undef __FUNCT__
4176 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
4177 /*@
4178    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
4179    (the default parallel PETSc format).
4180 
4181    Collective on MPI_Comm
4182 
4183    Input Parameters:
4184 +  B - the matrix
4185 .  i - the indices into j for the start of each local row (starts with zero)
4186 .  j - the column indices for each local row (starts with zero)
4187 -  v - optional values in the matrix
4188 
4189    Level: developer
4190 
4191    Notes:
4192        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4193      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4194      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4195 
4196        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4197 
4198        The format which is used for the sparse matrix input, is equivalent to a
4199     row-major ordering.. i.e for the following matrix, the input data expected is
4200     as shown:
4201 
4202         1 0 0
4203         2 0 3     P0
4204        -------
4205         4 5 6     P1
4206 
4207      Process0 [P0]: rows_owned=[0,1]
4208         i =  {0,1,3}  [size = nrow+1  = 2+1]
4209         j =  {0,0,2}  [size = nz = 6]
4210         v =  {1,2,3}  [size = nz = 6]
4211 
4212      Process1 [P1]: rows_owned=[2]
4213         i =  {0,3}    [size = nrow+1  = 1+1]
4214         j =  {0,1,2}  [size = nz = 6]
4215         v =  {4,5,6}  [size = nz = 6]
4216 
4217 .keywords: matrix, aij, compressed row, sparse, parallel
4218 
4219 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
4220           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
4221 @*/
4222 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
4223 {
4224   PetscErrorCode ierr;
4225 
4226   PetscFunctionBegin;
4227   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
4228   PetscFunctionReturn(0);
4229 }
4230 
4231 #undef __FUNCT__
4232 #define __FUNCT__ "MatMPIAIJSetPreallocation"
4233 /*@C
4234    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
4235    (the default parallel PETSc format).  For good matrix assembly performance
4236    the user should preallocate the matrix storage by setting the parameters
4237    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4238    performance can be increased by more than a factor of 50.
4239 
4240    Collective on MPI_Comm
4241 
4242    Input Parameters:
4243 +  A - the matrix
4244 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4245            (same value is used for all local rows)
4246 .  d_nnz - array containing the number of nonzeros in the various rows of the
4247            DIAGONAL portion of the local submatrix (possibly different for each row)
4248            or NULL, if d_nz is used to specify the nonzero structure.
4249            The size of this array is equal to the number of local rows, i.e 'm'.
4250            For matrices that will be factored, you must leave room for (and set)
4251            the diagonal entry even if it is zero.
4252 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4253            submatrix (same value is used for all local rows).
4254 -  o_nnz - array containing the number of nonzeros in the various rows of the
4255            OFF-DIAGONAL portion of the local submatrix (possibly different for
4256            each row) or NULL, if o_nz is used to specify the nonzero
4257            structure. The size of this array is equal to the number
4258            of local rows, i.e 'm'.
4259 
4260    If the *_nnz parameter is given then the *_nz parameter is ignored
4261 
4262    The AIJ format (also called the Yale sparse matrix format or
4263    compressed row storage (CSR)), is fully compatible with standard Fortran 77
4264    storage.  The stored row and column indices begin with zero.
4265    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
4266 
4267    The parallel matrix is partitioned such that the first m0 rows belong to
4268    process 0, the next m1 rows belong to process 1, the next m2 rows belong
4269    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
4270 
4271    The DIAGONAL portion of the local submatrix of a processor can be defined
4272    as the submatrix which is obtained by extraction the part corresponding to
4273    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
4274    first row that belongs to the processor, r2 is the last row belonging to
4275    the this processor, and c1-c2 is range of indices of the local part of a
4276    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
4277    common case of a square matrix, the row and column ranges are the same and
4278    the DIAGONAL part is also square. The remaining portion of the local
4279    submatrix (mxN) constitute the OFF-DIAGONAL portion.
4280 
4281    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4282 
4283    You can call MatGetInfo() to get information on how effective the preallocation was;
4284    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4285    You can also run with the option -info and look for messages with the string
4286    malloc in them to see if additional memory allocation was needed.
4287 
4288    Example usage:
4289 
4290    Consider the following 8x8 matrix with 34 non-zero values, that is
4291    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4292    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4293    as follows:
4294 
4295 .vb
4296             1  2  0  |  0  3  0  |  0  4
4297     Proc0   0  5  6  |  7  0  0  |  8  0
4298             9  0 10  | 11  0  0  | 12  0
4299     -------------------------------------
4300            13  0 14  | 15 16 17  |  0  0
4301     Proc1   0 18  0  | 19 20 21  |  0  0
4302             0  0  0  | 22 23  0  | 24  0
4303     -------------------------------------
4304     Proc2  25 26 27  |  0  0 28  | 29  0
4305            30  0  0  | 31 32 33  |  0 34
4306 .ve
4307 
4308    This can be represented as a collection of submatrices as:
4309 
4310 .vb
4311       A B C
4312       D E F
4313       G H I
4314 .ve
4315 
4316    Where the submatrices A,B,C are owned by proc0, D,E,F are
4317    owned by proc1, G,H,I are owned by proc2.
4318 
4319    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4320    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4321    The 'M','N' parameters are 8,8, and have the same values on all procs.
4322 
4323    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4324    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4325    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4326    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4327    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4328    matrix, ans [DF] as another SeqAIJ matrix.
4329 
4330    When d_nz, o_nz parameters are specified, d_nz storage elements are
4331    allocated for every row of the local diagonal submatrix, and o_nz
4332    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4333    One way to choose d_nz and o_nz is to use the max nonzerors per local
4334    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4335    In this case, the values of d_nz,o_nz are:
4336 .vb
4337      proc0 : dnz = 2, o_nz = 2
4338      proc1 : dnz = 3, o_nz = 2
4339      proc2 : dnz = 1, o_nz = 4
4340 .ve
4341    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4342    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4343    for proc3. i.e we are using 12+15+10=37 storage locations to store
4344    34 values.
4345 
4346    When d_nnz, o_nnz parameters are specified, the storage is specified
4347    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4348    In the above case the values for d_nnz,o_nnz are:
4349 .vb
4350      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4351      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4352      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4353 .ve
4354    Here the space allocated is sum of all the above values i.e 34, and
4355    hence pre-allocation is perfect.
4356 
4357    Level: intermediate
4358 
4359 .keywords: matrix, aij, compressed row, sparse, parallel
4360 
4361 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4362           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4363 @*/
4364 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4365 {
4366   PetscErrorCode ierr;
4367 
4368   PetscFunctionBegin;
4369   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4370   PetscValidType(B,1);
4371   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4372   PetscFunctionReturn(0);
4373 }
4374 
4375 #undef __FUNCT__
4376 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4377 /*@
4378      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4379          CSR format the local rows.
4380 
4381    Collective on MPI_Comm
4382 
4383    Input Parameters:
4384 +  comm - MPI communicator
4385 .  m - number of local rows (Cannot be PETSC_DECIDE)
4386 .  n - This value should be the same as the local size used in creating the
4387        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4388        calculated if N is given) For square matrices n is almost always m.
4389 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4390 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4391 .   i - row indices
4392 .   j - column indices
4393 -   a - matrix values
4394 
4395    Output Parameter:
4396 .   mat - the matrix
4397 
4398    Level: intermediate
4399 
4400    Notes:
4401        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4402      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4403      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4404 
4405        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4406 
4407        The format which is used for the sparse matrix input, is equivalent to a
4408     row-major ordering.. i.e for the following matrix, the input data expected is
4409     as shown:
4410 
4411         1 0 0
4412         2 0 3     P0
4413        -------
4414         4 5 6     P1
4415 
4416      Process0 [P0]: rows_owned=[0,1]
4417         i =  {0,1,3}  [size = nrow+1  = 2+1]
4418         j =  {0,0,2}  [size = nz = 6]
4419         v =  {1,2,3}  [size = nz = 6]
4420 
4421      Process1 [P1]: rows_owned=[2]
4422         i =  {0,3}    [size = nrow+1  = 1+1]
4423         j =  {0,1,2}  [size = nz = 6]
4424         v =  {4,5,6}  [size = nz = 6]
4425 
4426 .keywords: matrix, aij, compressed row, sparse, parallel
4427 
4428 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4429           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4430 @*/
4431 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4432 {
4433   PetscErrorCode ierr;
4434 
4435   PetscFunctionBegin;
4436   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4437   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4438   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4439   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4440   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4441   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4442   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4443   PetscFunctionReturn(0);
4444 }
4445 
4446 #undef __FUNCT__
4447 #define __FUNCT__ "MatCreateAIJ"
4448 /*@C
4449    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4450    (the default parallel PETSc format).  For good matrix assembly performance
4451    the user should preallocate the matrix storage by setting the parameters
4452    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4453    performance can be increased by more than a factor of 50.
4454 
4455    Collective on MPI_Comm
4456 
4457    Input Parameters:
4458 +  comm - MPI communicator
4459 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4460            This value should be the same as the local size used in creating the
4461            y vector for the matrix-vector product y = Ax.
4462 .  n - This value should be the same as the local size used in creating the
4463        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4464        calculated if N is given) For square matrices n is almost always m.
4465 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4466 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4467 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4468            (same value is used for all local rows)
4469 .  d_nnz - array containing the number of nonzeros in the various rows of the
4470            DIAGONAL portion of the local submatrix (possibly different for each row)
4471            or NULL, if d_nz is used to specify the nonzero structure.
4472            The size of this array is equal to the number of local rows, i.e 'm'.
4473 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4474            submatrix (same value is used for all local rows).
4475 -  o_nnz - array containing the number of nonzeros in the various rows of the
4476            OFF-DIAGONAL portion of the local submatrix (possibly different for
4477            each row) or NULL, if o_nz is used to specify the nonzero
4478            structure. The size of this array is equal to the number
4479            of local rows, i.e 'm'.
4480 
4481    Output Parameter:
4482 .  A - the matrix
4483 
4484    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4485    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4486    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4487 
4488    Notes:
4489    If the *_nnz parameter is given then the *_nz parameter is ignored
4490 
4491    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4492    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4493    storage requirements for this matrix.
4494 
4495    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4496    processor than it must be used on all processors that share the object for
4497    that argument.
4498 
4499    The user MUST specify either the local or global matrix dimensions
4500    (possibly both).
4501 
4502    The parallel matrix is partitioned across processors such that the
4503    first m0 rows belong to process 0, the next m1 rows belong to
4504    process 1, the next m2 rows belong to process 2 etc.. where
4505    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4506    values corresponding to [m x N] submatrix.
4507 
4508    The columns are logically partitioned with the n0 columns belonging
4509    to 0th partition, the next n1 columns belonging to the next
4510    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4511 
4512    The DIAGONAL portion of the local submatrix on any given processor
4513    is the submatrix corresponding to the rows and columns m,n
4514    corresponding to the given processor. i.e diagonal matrix on
4515    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4516    etc. The remaining portion of the local submatrix [m x (N-n)]
4517    constitute the OFF-DIAGONAL portion. The example below better
4518    illustrates this concept.
4519 
4520    For a square global matrix we define each processor's diagonal portion
4521    to be its local rows and the corresponding columns (a square submatrix);
4522    each processor's off-diagonal portion encompasses the remainder of the
4523    local matrix (a rectangular submatrix).
4524 
4525    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4526 
4527    When calling this routine with a single process communicator, a matrix of
4528    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4529    type of communicator, use the construction mechanism:
4530      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4531 
4532    By default, this format uses inodes (identical nodes) when possible.
4533    We search for consecutive rows with the same nonzero structure, thereby
4534    reusing matrix information to achieve increased efficiency.
4535 
4536    Options Database Keys:
4537 +  -mat_no_inode  - Do not use inodes
4538 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4539 -  -mat_aij_oneindex - Internally use indexing starting at 1
4540         rather than 0.  Note that when calling MatSetValues(),
4541         the user still MUST index entries starting at 0!
4542 
4543 
4544    Example usage:
4545 
4546    Consider the following 8x8 matrix with 34 non-zero values, that is
4547    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4548    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4549    as follows:
4550 
4551 .vb
4552             1  2  0  |  0  3  0  |  0  4
4553     Proc0   0  5  6  |  7  0  0  |  8  0
4554             9  0 10  | 11  0  0  | 12  0
4555     -------------------------------------
4556            13  0 14  | 15 16 17  |  0  0
4557     Proc1   0 18  0  | 19 20 21  |  0  0
4558             0  0  0  | 22 23  0  | 24  0
4559     -------------------------------------
4560     Proc2  25 26 27  |  0  0 28  | 29  0
4561            30  0  0  | 31 32 33  |  0 34
4562 .ve
4563 
4564    This can be represented as a collection of submatrices as:
4565 
4566 .vb
4567       A B C
4568       D E F
4569       G H I
4570 .ve
4571 
4572    Where the submatrices A,B,C are owned by proc0, D,E,F are
4573    owned by proc1, G,H,I are owned by proc2.
4574 
4575    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4576    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4577    The 'M','N' parameters are 8,8, and have the same values on all procs.
4578 
4579    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4580    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4581    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4582    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4583    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4584    matrix, ans [DF] as another SeqAIJ matrix.
4585 
4586    When d_nz, o_nz parameters are specified, d_nz storage elements are
4587    allocated for every row of the local diagonal submatrix, and o_nz
4588    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4589    One way to choose d_nz and o_nz is to use the max nonzerors per local
4590    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4591    In this case, the values of d_nz,o_nz are:
4592 .vb
4593      proc0 : dnz = 2, o_nz = 2
4594      proc1 : dnz = 3, o_nz = 2
4595      proc2 : dnz = 1, o_nz = 4
4596 .ve
4597    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4598    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4599    for proc3. i.e we are using 12+15+10=37 storage locations to store
4600    34 values.
4601 
4602    When d_nnz, o_nnz parameters are specified, the storage is specified
4603    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4604    In the above case the values for d_nnz,o_nnz are:
4605 .vb
4606      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4607      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4608      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4609 .ve
4610    Here the space allocated is sum of all the above values i.e 34, and
4611    hence pre-allocation is perfect.
4612 
4613    Level: intermediate
4614 
4615 .keywords: matrix, aij, compressed row, sparse, parallel
4616 
4617 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4618           MPIAIJ, MatCreateMPIAIJWithArrays()
4619 @*/
4620 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4621 {
4622   PetscErrorCode ierr;
4623   PetscMPIInt    size;
4624 
4625   PetscFunctionBegin;
4626   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4627   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4628   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4629   if (size > 1) {
4630     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4631     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4632   } else {
4633     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4634     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4635   }
4636   PetscFunctionReturn(0);
4637 }
4638 
4639 #undef __FUNCT__
4640 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4641 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4642 {
4643   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4644 
4645   PetscFunctionBegin;
4646   *Ad     = a->A;
4647   *Ao     = a->B;
4648   *colmap = a->garray;
4649   PetscFunctionReturn(0);
4650 }
4651 
4652 #undef __FUNCT__
4653 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4654 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4655 {
4656   PetscErrorCode ierr;
4657   PetscInt       i;
4658   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4659 
4660   PetscFunctionBegin;
4661   if (coloring->ctype == IS_COLORING_GLOBAL) {
4662     ISColoringValue *allcolors,*colors;
4663     ISColoring      ocoloring;
4664 
4665     /* set coloring for diagonal portion */
4666     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4667 
4668     /* set coloring for off-diagonal portion */
4669     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4670     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4671     for (i=0; i<a->B->cmap->n; i++) {
4672       colors[i] = allcolors[a->garray[i]];
4673     }
4674     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4675     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4676     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4677     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4678   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4679     ISColoringValue *colors;
4680     PetscInt        *larray;
4681     ISColoring      ocoloring;
4682 
4683     /* set coloring for diagonal portion */
4684     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4685     for (i=0; i<a->A->cmap->n; i++) {
4686       larray[i] = i + A->cmap->rstart;
4687     }
4688     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4689     ierr = PetscMalloc((a->A->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4690     for (i=0; i<a->A->cmap->n; i++) {
4691       colors[i] = coloring->colors[larray[i]];
4692     }
4693     ierr = PetscFree(larray);CHKERRQ(ierr);
4694     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4695     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4696     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4697 
4698     /* set coloring for off-diagonal portion */
4699     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(PetscInt),&larray);CHKERRQ(ierr);
4700     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4701     ierr = PetscMalloc((a->B->cmap->n+1)*sizeof(ISColoringValue),&colors);CHKERRQ(ierr);
4702     for (i=0; i<a->B->cmap->n; i++) {
4703       colors[i] = coloring->colors[larray[i]];
4704     }
4705     ierr = PetscFree(larray);CHKERRQ(ierr);
4706     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4707     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4708     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4709   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4710   PetscFunctionReturn(0);
4711 }
4712 
4713 #undef __FUNCT__
4714 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4715 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4716 {
4717   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4718   PetscErrorCode ierr;
4719 
4720   PetscFunctionBegin;
4721   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4722   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4723   PetscFunctionReturn(0);
4724 }
4725 
4726 #undef __FUNCT__
4727 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4728 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4729 {
4730   PetscErrorCode ierr;
4731   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4732   PetscInt       *indx;
4733 
4734   PetscFunctionBegin;
4735   /* This routine will ONLY return MPIAIJ type matrix */
4736   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4737   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4738   if (n == PETSC_DECIDE) {
4739     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4740   }
4741   /* Check sum(n) = N */
4742   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4743   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4744 
4745   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4746   rstart -= m;
4747 
4748   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4749   for (i=0; i<m; i++) {
4750     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4751     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4752     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4753   }
4754 
4755   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4756   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4757   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4758   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4759   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4760   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4761   PetscFunctionReturn(0);
4762 }
4763 
4764 #undef __FUNCT__
4765 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4766 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4767 {
4768   PetscErrorCode ierr;
4769   PetscInt       m,N,i,rstart,nnz,Ii;
4770   PetscInt       *indx;
4771   PetscScalar    *values;
4772 
4773   PetscFunctionBegin;
4774   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4775   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4776   for (i=0; i<m; i++) {
4777     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4778     Ii   = i + rstart;
4779     ierr = MatSetValues_MPIAIJ(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4780     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4781   }
4782   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4783   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4784   PetscFunctionReturn(0);
4785 }
4786 
4787 #undef __FUNCT__
4788 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4789 /*@
4790       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4791                  matrices from each processor
4792 
4793     Collective on MPI_Comm
4794 
4795    Input Parameters:
4796 +    comm - the communicators the parallel matrix will live on
4797 .    inmat - the input sequential matrices
4798 .    n - number of local columns (or PETSC_DECIDE)
4799 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4800 
4801    Output Parameter:
4802 .    outmat - the parallel matrix generated
4803 
4804     Level: advanced
4805 
4806    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4807 
4808 @*/
4809 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4810 {
4811   PetscErrorCode ierr;
4812 
4813   PetscFunctionBegin;
4814   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4815   if (scall == MAT_INITIAL_MATRIX) {
4816     ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4817   }
4818   ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4819   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4820   PetscFunctionReturn(0);
4821 }
4822 
4823 #undef __FUNCT__
4824 #define __FUNCT__ "MatFileSplit"
4825 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4826 {
4827   PetscErrorCode    ierr;
4828   PetscMPIInt       rank;
4829   PetscInt          m,N,i,rstart,nnz;
4830   size_t            len;
4831   const PetscInt    *indx;
4832   PetscViewer       out;
4833   char              *name;
4834   Mat               B;
4835   const PetscScalar *values;
4836 
4837   PetscFunctionBegin;
4838   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4839   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4840   /* Should this be the type of the diagonal block of A? */
4841   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4842   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4843   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4844   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4845   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4846   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4847   for (i=0; i<m; i++) {
4848     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4849     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4850     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4851   }
4852   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4853   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4854 
4855   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4856   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4857   ierr = PetscMalloc((len+5)*sizeof(char),&name);CHKERRQ(ierr);
4858   sprintf(name,"%s.%d",outfile,rank);
4859   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4860   ierr = PetscFree(name);CHKERRQ(ierr);
4861   ierr = MatView(B,out);CHKERRQ(ierr);
4862   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4863   ierr = MatDestroy(&B);CHKERRQ(ierr);
4864   PetscFunctionReturn(0);
4865 }
4866 
4867 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4868 #undef __FUNCT__
4869 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4870 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4871 {
4872   PetscErrorCode      ierr;
4873   Mat_Merge_SeqsToMPI *merge;
4874   PetscContainer      container;
4875 
4876   PetscFunctionBegin;
4877   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4878   if (container) {
4879     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4880     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4881     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4882     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4883     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4884     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4885     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4886     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4887     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4888     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4889     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4890     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4891     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4892     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4893     ierr = PetscFree(merge);CHKERRQ(ierr);
4894     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4895   }
4896   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4897   PetscFunctionReturn(0);
4898 }
4899 
4900 #include <../src/mat/utils/freespace.h>
4901 #include <petscbt.h>
4902 
4903 #undef __FUNCT__
4904 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4905 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4906 {
4907   PetscErrorCode      ierr;
4908   MPI_Comm            comm;
4909   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4910   PetscMPIInt         size,rank,taga,*len_s;
4911   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4912   PetscInt            proc,m;
4913   PetscInt            **buf_ri,**buf_rj;
4914   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4915   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4916   MPI_Request         *s_waits,*r_waits;
4917   MPI_Status          *status;
4918   MatScalar           *aa=a->a;
4919   MatScalar           **abuf_r,*ba_i;
4920   Mat_Merge_SeqsToMPI *merge;
4921   PetscContainer      container;
4922 
4923   PetscFunctionBegin;
4924   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4925   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4926 
4927   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4928   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4929 
4930   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4931   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4932 
4933   bi     = merge->bi;
4934   bj     = merge->bj;
4935   buf_ri = merge->buf_ri;
4936   buf_rj = merge->buf_rj;
4937 
4938   ierr   = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
4939   owners = merge->rowmap->range;
4940   len_s  = merge->len_s;
4941 
4942   /* send and recv matrix values */
4943   /*-----------------------------*/
4944   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4945   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4946 
4947   ierr = PetscMalloc((merge->nsend+1)*sizeof(MPI_Request),&s_waits);CHKERRQ(ierr);
4948   for (proc=0,k=0; proc<size; proc++) {
4949     if (!len_s[proc]) continue;
4950     i    = owners[proc];
4951     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4952     k++;
4953   }
4954 
4955   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4956   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4957   ierr = PetscFree(status);CHKERRQ(ierr);
4958 
4959   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4960   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4961 
4962   /* insert mat values of mpimat */
4963   /*----------------------------*/
4964   ierr = PetscMalloc(N*sizeof(PetscScalar),&ba_i);CHKERRQ(ierr);
4965   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
4966 
4967   for (k=0; k<merge->nrecv; k++) {
4968     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4969     nrows       = *(buf_ri_k[k]);
4970     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4971     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4972   }
4973 
4974   /* set values of ba */
4975   m = merge->rowmap->n;
4976   for (i=0; i<m; i++) {
4977     arow = owners[rank] + i;
4978     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4979     bnzi = bi[i+1] - bi[i];
4980     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4981 
4982     /* add local non-zero vals of this proc's seqmat into ba */
4983     anzi   = ai[arow+1] - ai[arow];
4984     aj     = a->j + ai[arow];
4985     aa     = a->a + ai[arow];
4986     nextaj = 0;
4987     for (j=0; nextaj<anzi; j++) {
4988       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4989         ba_i[j] += aa[nextaj++];
4990       }
4991     }
4992 
4993     /* add received vals into ba */
4994     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4995       /* i-th row */
4996       if (i == *nextrow[k]) {
4997         anzi   = *(nextai[k]+1) - *nextai[k];
4998         aj     = buf_rj[k] + *(nextai[k]);
4999         aa     = abuf_r[k] + *(nextai[k]);
5000         nextaj = 0;
5001         for (j=0; nextaj<anzi; j++) {
5002           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
5003             ba_i[j] += aa[nextaj++];
5004           }
5005         }
5006         nextrow[k]++; nextai[k]++;
5007       }
5008     }
5009     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
5010   }
5011   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5012   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5013 
5014   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
5015   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
5016   ierr = PetscFree(ba_i);CHKERRQ(ierr);
5017   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5018   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
5019   PetscFunctionReturn(0);
5020 }
5021 
5022 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
5023 
5024 #undef __FUNCT__
5025 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
5026 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
5027 {
5028   PetscErrorCode      ierr;
5029   Mat                 B_mpi;
5030   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
5031   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
5032   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
5033   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
5034   PetscInt            len,proc,*dnz,*onz,bs,cbs;
5035   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
5036   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
5037   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
5038   MPI_Status          *status;
5039   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
5040   PetscBT             lnkbt;
5041   Mat_Merge_SeqsToMPI *merge;
5042   PetscContainer      container;
5043 
5044   PetscFunctionBegin;
5045   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5046 
5047   /* make sure it is a PETSc comm */
5048   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
5049   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5050   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5051 
5052   ierr = PetscNew(Mat_Merge_SeqsToMPI,&merge);CHKERRQ(ierr);
5053   ierr = PetscMalloc(size*sizeof(MPI_Status),&status);CHKERRQ(ierr);
5054 
5055   /* determine row ownership */
5056   /*---------------------------------------------------------*/
5057   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
5058   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
5059   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
5060   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
5061   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
5062   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&len_si);CHKERRQ(ierr);
5063   ierr = PetscMalloc(size*sizeof(PetscMPIInt),&merge->len_s);CHKERRQ(ierr);
5064 
5065   m      = merge->rowmap->n;
5066   owners = merge->rowmap->range;
5067 
5068   /* determine the number of messages to send, their lengths */
5069   /*---------------------------------------------------------*/
5070   len_s = merge->len_s;
5071 
5072   len          = 0; /* length of buf_si[] */
5073   merge->nsend = 0;
5074   for (proc=0; proc<size; proc++) {
5075     len_si[proc] = 0;
5076     if (proc == rank) {
5077       len_s[proc] = 0;
5078     } else {
5079       len_si[proc] = owners[proc+1] - owners[proc] + 1;
5080       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
5081     }
5082     if (len_s[proc]) {
5083       merge->nsend++;
5084       nrows = 0;
5085       for (i=owners[proc]; i<owners[proc+1]; i++) {
5086         if (ai[i+1] > ai[i]) nrows++;
5087       }
5088       len_si[proc] = 2*(nrows+1);
5089       len         += len_si[proc];
5090     }
5091   }
5092 
5093   /* determine the number and length of messages to receive for ij-structure */
5094   /*-------------------------------------------------------------------------*/
5095   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
5096   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
5097 
5098   /* post the Irecv of j-structure */
5099   /*-------------------------------*/
5100   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
5101   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
5102 
5103   /* post the Isend of j-structure */
5104   /*--------------------------------*/
5105   ierr = PetscMalloc2(merge->nsend,MPI_Request,&si_waits,merge->nsend,MPI_Request,&sj_waits);CHKERRQ(ierr);
5106 
5107   for (proc=0, k=0; proc<size; proc++) {
5108     if (!len_s[proc]) continue;
5109     i    = owners[proc];
5110     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
5111     k++;
5112   }
5113 
5114   /* receives and sends of j-structure are complete */
5115   /*------------------------------------------------*/
5116   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
5117   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
5118 
5119   /* send and recv i-structure */
5120   /*---------------------------*/
5121   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
5122   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
5123 
5124   ierr   = PetscMalloc((len+1)*sizeof(PetscInt),&buf_s);CHKERRQ(ierr);
5125   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
5126   for (proc=0,k=0; proc<size; proc++) {
5127     if (!len_s[proc]) continue;
5128     /* form outgoing message for i-structure:
5129          buf_si[0]:                 nrows to be sent
5130                [1:nrows]:           row index (global)
5131                [nrows+1:2*nrows+1]: i-structure index
5132     */
5133     /*-------------------------------------------*/
5134     nrows       = len_si[proc]/2 - 1;
5135     buf_si_i    = buf_si + nrows+1;
5136     buf_si[0]   = nrows;
5137     buf_si_i[0] = 0;
5138     nrows       = 0;
5139     for (i=owners[proc]; i<owners[proc+1]; i++) {
5140       anzi = ai[i+1] - ai[i];
5141       if (anzi) {
5142         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
5143         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
5144         nrows++;
5145       }
5146     }
5147     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
5148     k++;
5149     buf_si += len_si[proc];
5150   }
5151 
5152   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
5153   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
5154 
5155   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
5156   for (i=0; i<merge->nrecv; i++) {
5157     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
5158   }
5159 
5160   ierr = PetscFree(len_si);CHKERRQ(ierr);
5161   ierr = PetscFree(len_ri);CHKERRQ(ierr);
5162   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
5163   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
5164   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
5165   ierr = PetscFree(buf_s);CHKERRQ(ierr);
5166   ierr = PetscFree(status);CHKERRQ(ierr);
5167 
5168   /* compute a local seq matrix in each processor */
5169   /*----------------------------------------------*/
5170   /* allocate bi array and free space for accumulating nonzero column info */
5171   ierr  = PetscMalloc((m+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
5172   bi[0] = 0;
5173 
5174   /* create and initialize a linked list */
5175   nlnk = N+1;
5176   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5177 
5178   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
5179   len  = ai[owners[rank+1]] - ai[owners[rank]];
5180   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
5181 
5182   current_space = free_space;
5183 
5184   /* determine symbolic info for each local row */
5185   ierr = PetscMalloc3(merge->nrecv,PetscInt*,&buf_ri_k,merge->nrecv,PetscInt*,&nextrow,merge->nrecv,PetscInt*,&nextai);CHKERRQ(ierr);
5186 
5187   for (k=0; k<merge->nrecv; k++) {
5188     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
5189     nrows       = *buf_ri_k[k];
5190     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
5191     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
5192   }
5193 
5194   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
5195   len  = 0;
5196   for (i=0; i<m; i++) {
5197     bnzi = 0;
5198     /* add local non-zero cols of this proc's seqmat into lnk */
5199     arow  = owners[rank] + i;
5200     anzi  = ai[arow+1] - ai[arow];
5201     aj    = a->j + ai[arow];
5202     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5203     bnzi += nlnk;
5204     /* add received col data into lnk */
5205     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
5206       if (i == *nextrow[k]) { /* i-th row */
5207         anzi  = *(nextai[k]+1) - *nextai[k];
5208         aj    = buf_rj[k] + *nextai[k];
5209         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
5210         bnzi += nlnk;
5211         nextrow[k]++; nextai[k]++;
5212       }
5213     }
5214     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
5215 
5216     /* if free space is not available, make more free space */
5217     if (current_space->local_remaining<bnzi) {
5218       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
5219       nspacedouble++;
5220     }
5221     /* copy data into free space, then initialize lnk */
5222     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
5223     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
5224 
5225     current_space->array           += bnzi;
5226     current_space->local_used      += bnzi;
5227     current_space->local_remaining -= bnzi;
5228 
5229     bi[i+1] = bi[i] + bnzi;
5230   }
5231 
5232   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
5233 
5234   ierr = PetscMalloc((bi[m]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
5235   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
5236   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
5237 
5238   /* create symbolic parallel matrix B_mpi */
5239   /*---------------------------------------*/
5240   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
5241   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
5242   if (n==PETSC_DECIDE) {
5243     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
5244   } else {
5245     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5246   }
5247   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
5248   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
5249   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
5250   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
5251   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
5252 
5253   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
5254   B_mpi->assembled    = PETSC_FALSE;
5255   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
5256   merge->bi           = bi;
5257   merge->bj           = bj;
5258   merge->buf_ri       = buf_ri;
5259   merge->buf_rj       = buf_rj;
5260   merge->coi          = NULL;
5261   merge->coj          = NULL;
5262   merge->owners_co    = NULL;
5263 
5264   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
5265 
5266   /* attach the supporting struct to B_mpi for reuse */
5267   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
5268   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
5269   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
5270   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
5271   *mpimat = B_mpi;
5272 
5273   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
5274   PetscFunctionReturn(0);
5275 }
5276 
5277 #undef __FUNCT__
5278 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
5279 /*@C
5280       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
5281                  matrices from each processor
5282 
5283     Collective on MPI_Comm
5284 
5285    Input Parameters:
5286 +    comm - the communicators the parallel matrix will live on
5287 .    seqmat - the input sequential matrices
5288 .    m - number of local rows (or PETSC_DECIDE)
5289 .    n - number of local columns (or PETSC_DECIDE)
5290 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5291 
5292    Output Parameter:
5293 .    mpimat - the parallel matrix generated
5294 
5295     Level: advanced
5296 
5297    Notes:
5298      The dimensions of the sequential matrix in each processor MUST be the same.
5299      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
5300      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
5301 @*/
5302 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
5303 {
5304   PetscErrorCode ierr;
5305   PetscMPIInt    size;
5306 
5307   PetscFunctionBegin;
5308   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5309   if (size == 1) {
5310     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5311     if (scall == MAT_INITIAL_MATRIX) {
5312       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
5313     } else {
5314       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5315     }
5316     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5317     PetscFunctionReturn(0);
5318   }
5319   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5320   if (scall == MAT_INITIAL_MATRIX) {
5321     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5322   }
5323   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5324   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5325   PetscFunctionReturn(0);
5326 }
5327 
5328 #undef __FUNCT__
5329 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5330 /*@
5331      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5332           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5333           with MatGetSize()
5334 
5335     Not Collective
5336 
5337    Input Parameters:
5338 +    A - the matrix
5339 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5340 
5341    Output Parameter:
5342 .    A_loc - the local sequential matrix generated
5343 
5344     Level: developer
5345 
5346 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5347 
5348 @*/
5349 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5350 {
5351   PetscErrorCode ierr;
5352   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5353   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5354   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5355   MatScalar      *aa=a->a,*ba=b->a,*cam;
5356   PetscScalar    *ca;
5357   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5358   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5359   PetscBool      match;
5360 
5361   PetscFunctionBegin;
5362   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5363   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5364   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5365   if (scall == MAT_INITIAL_MATRIX) {
5366     ierr  = PetscMalloc((1+am)*sizeof(PetscInt),&ci);CHKERRQ(ierr);
5367     ci[0] = 0;
5368     for (i=0; i<am; i++) {
5369       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5370     }
5371     ierr = PetscMalloc((1+ci[am])*sizeof(PetscInt),&cj);CHKERRQ(ierr);
5372     ierr = PetscMalloc((1+ci[am])*sizeof(PetscScalar),&ca);CHKERRQ(ierr);
5373     k    = 0;
5374     for (i=0; i<am; i++) {
5375       ncols_o = bi[i+1] - bi[i];
5376       ncols_d = ai[i+1] - ai[i];
5377       /* off-diagonal portion of A */
5378       for (jo=0; jo<ncols_o; jo++) {
5379         col = cmap[*bj];
5380         if (col >= cstart) break;
5381         cj[k]   = col; bj++;
5382         ca[k++] = *ba++;
5383       }
5384       /* diagonal portion of A */
5385       for (j=0; j<ncols_d; j++) {
5386         cj[k]   = cstart + *aj++;
5387         ca[k++] = *aa++;
5388       }
5389       /* off-diagonal portion of A */
5390       for (j=jo; j<ncols_o; j++) {
5391         cj[k]   = cmap[*bj++];
5392         ca[k++] = *ba++;
5393       }
5394     }
5395     /* put together the new matrix */
5396     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5397     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5398     /* Since these are PETSc arrays, change flags to free them as necessary. */
5399     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5400     mat->free_a  = PETSC_TRUE;
5401     mat->free_ij = PETSC_TRUE;
5402     mat->nonew   = 0;
5403   } else if (scall == MAT_REUSE_MATRIX) {
5404     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5405     ci = mat->i; cj = mat->j; cam = mat->a;
5406     for (i=0; i<am; i++) {
5407       /* off-diagonal portion of A */
5408       ncols_o = bi[i+1] - bi[i];
5409       for (jo=0; jo<ncols_o; jo++) {
5410         col = cmap[*bj];
5411         if (col >= cstart) break;
5412         *cam++ = *ba++; bj++;
5413       }
5414       /* diagonal portion of A */
5415       ncols_d = ai[i+1] - ai[i];
5416       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5417       /* off-diagonal portion of A */
5418       for (j=jo; j<ncols_o; j++) {
5419         *cam++ = *ba++; bj++;
5420       }
5421     }
5422   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5423   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5424   PetscFunctionReturn(0);
5425 }
5426 
5427 #undef __FUNCT__
5428 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5429 /*@C
5430      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5431 
5432     Not Collective
5433 
5434    Input Parameters:
5435 +    A - the matrix
5436 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5437 -    row, col - index sets of rows and columns to extract (or NULL)
5438 
5439    Output Parameter:
5440 .    A_loc - the local sequential matrix generated
5441 
5442     Level: developer
5443 
5444 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5445 
5446 @*/
5447 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5448 {
5449   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5450   PetscErrorCode ierr;
5451   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5452   IS             isrowa,iscola;
5453   Mat            *aloc;
5454   PetscBool      match;
5455 
5456   PetscFunctionBegin;
5457   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5458   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5459   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5460   if (!row) {
5461     start = A->rmap->rstart; end = A->rmap->rend;
5462     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5463   } else {
5464     isrowa = *row;
5465   }
5466   if (!col) {
5467     start = A->cmap->rstart;
5468     cmap  = a->garray;
5469     nzA   = a->A->cmap->n;
5470     nzB   = a->B->cmap->n;
5471     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5472     ncols = 0;
5473     for (i=0; i<nzB; i++) {
5474       if (cmap[i] < start) idx[ncols++] = cmap[i];
5475       else break;
5476     }
5477     imark = i;
5478     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5479     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5480     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5481   } else {
5482     iscola = *col;
5483   }
5484   if (scall != MAT_INITIAL_MATRIX) {
5485     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5486     aloc[0] = *A_loc;
5487   }
5488   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5489   *A_loc = aloc[0];
5490   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5491   if (!row) {
5492     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5493   }
5494   if (!col) {
5495     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5496   }
5497   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5498   PetscFunctionReturn(0);
5499 }
5500 
5501 #undef __FUNCT__
5502 #define __FUNCT__ "MatGetBrowsOfAcols"
5503 /*@C
5504     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5505 
5506     Collective on Mat
5507 
5508    Input Parameters:
5509 +    A,B - the matrices in mpiaij format
5510 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5511 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5512 
5513    Output Parameter:
5514 +    rowb, colb - index sets of rows and columns of B to extract
5515 -    B_seq - the sequential matrix generated
5516 
5517     Level: developer
5518 
5519 @*/
5520 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5521 {
5522   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5523   PetscErrorCode ierr;
5524   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5525   IS             isrowb,iscolb;
5526   Mat            *bseq=NULL;
5527 
5528   PetscFunctionBegin;
5529   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5530     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5531   }
5532   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5533 
5534   if (scall == MAT_INITIAL_MATRIX) {
5535     start = A->cmap->rstart;
5536     cmap  = a->garray;
5537     nzA   = a->A->cmap->n;
5538     nzB   = a->B->cmap->n;
5539     ierr  = PetscMalloc((nzA+nzB)*sizeof(PetscInt), &idx);CHKERRQ(ierr);
5540     ncols = 0;
5541     for (i=0; i<nzB; i++) {  /* row < local row index */
5542       if (cmap[i] < start) idx[ncols++] = cmap[i];
5543       else break;
5544     }
5545     imark = i;
5546     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5547     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5548     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5549     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5550   } else {
5551     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5552     isrowb  = *rowb; iscolb = *colb;
5553     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5554     bseq[0] = *B_seq;
5555   }
5556   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5557   *B_seq = bseq[0];
5558   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5559   if (!rowb) {
5560     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5561   } else {
5562     *rowb = isrowb;
5563   }
5564   if (!colb) {
5565     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5566   } else {
5567     *colb = iscolb;
5568   }
5569   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5570   PetscFunctionReturn(0);
5571 }
5572 
5573 #undef __FUNCT__
5574 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5575 /*
5576     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5577     of the OFF-DIAGONAL portion of local A
5578 
5579     Collective on Mat
5580 
5581    Input Parameters:
5582 +    A,B - the matrices in mpiaij format
5583 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5584 
5585    Output Parameter:
5586 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5587 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5588 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5589 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5590 
5591     Level: developer
5592 
5593 */
5594 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5595 {
5596   VecScatter_MPI_General *gen_to,*gen_from;
5597   PetscErrorCode         ierr;
5598   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5599   Mat_SeqAIJ             *b_oth;
5600   VecScatter             ctx =a->Mvctx;
5601   MPI_Comm               comm;
5602   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5603   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5604   PetscScalar            *rvalues,*svalues;
5605   MatScalar              *b_otha,*bufa,*bufA;
5606   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5607   MPI_Request            *rwaits = NULL,*swaits = NULL;
5608   MPI_Status             *sstatus,rstatus;
5609   PetscMPIInt            jj;
5610   PetscInt               *cols,sbs,rbs;
5611   PetscScalar            *vals;
5612 
5613   PetscFunctionBegin;
5614   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5615   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5616     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5617   }
5618   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5619   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5620 
5621   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5622   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5623   rvalues  = gen_from->values; /* holds the length of receiving row */
5624   svalues  = gen_to->values;   /* holds the length of sending row */
5625   nrecvs   = gen_from->n;
5626   nsends   = gen_to->n;
5627 
5628   ierr    = PetscMalloc2(nrecvs,MPI_Request,&rwaits,nsends,MPI_Request,&swaits);CHKERRQ(ierr);
5629   srow    = gen_to->indices;    /* local row index to be sent */
5630   sstarts = gen_to->starts;
5631   sprocs  = gen_to->procs;
5632   sstatus = gen_to->sstatus;
5633   sbs     = gen_to->bs;
5634   rstarts = gen_from->starts;
5635   rprocs  = gen_from->procs;
5636   rbs     = gen_from->bs;
5637 
5638   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5639   if (scall == MAT_INITIAL_MATRIX) {
5640     /* i-array */
5641     /*---------*/
5642     /*  post receives */
5643     for (i=0; i<nrecvs; i++) {
5644       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5645       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5646       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5647     }
5648 
5649     /* pack the outgoing message */
5650     ierr = PetscMalloc2(nsends+1,PetscInt,&sstartsj,nrecvs+1,PetscInt,&rstartsj);CHKERRQ(ierr);
5651 
5652     sstartsj[0] = 0;
5653     rstartsj[0] = 0;
5654     len         = 0; /* total length of j or a array to be sent */
5655     k           = 0;
5656     for (i=0; i<nsends; i++) {
5657       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5658       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5659       for (j=0; j<nrows; j++) {
5660         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5661         for (l=0; l<sbs; l++) {
5662           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5663 
5664           rowlen[j*sbs+l] = ncols;
5665 
5666           len += ncols;
5667           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5668         }
5669         k++;
5670       }
5671       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5672 
5673       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5674     }
5675     /* recvs and sends of i-array are completed */
5676     i = nrecvs;
5677     while (i--) {
5678       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5679     }
5680     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5681 
5682     /* allocate buffers for sending j and a arrays */
5683     ierr = PetscMalloc((len+1)*sizeof(PetscInt),&bufj);CHKERRQ(ierr);
5684     ierr = PetscMalloc((len+1)*sizeof(PetscScalar),&bufa);CHKERRQ(ierr);
5685 
5686     /* create i-array of B_oth */
5687     ierr = PetscMalloc((aBn+2)*sizeof(PetscInt),&b_othi);CHKERRQ(ierr);
5688 
5689     b_othi[0] = 0;
5690     len       = 0; /* total length of j or a array to be received */
5691     k         = 0;
5692     for (i=0; i<nrecvs; i++) {
5693       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5694       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5695       for (j=0; j<nrows; j++) {
5696         b_othi[k+1] = b_othi[k] + rowlen[j];
5697         len        += rowlen[j]; k++;
5698       }
5699       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5700     }
5701 
5702     /* allocate space for j and a arrrays of B_oth */
5703     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(PetscInt),&b_othj);CHKERRQ(ierr);
5704     ierr = PetscMalloc((b_othi[aBn]+1)*sizeof(MatScalar),&b_otha);CHKERRQ(ierr);
5705 
5706     /* j-array */
5707     /*---------*/
5708     /*  post receives of j-array */
5709     for (i=0; i<nrecvs; i++) {
5710       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5711       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5712     }
5713 
5714     /* pack the outgoing message j-array */
5715     k = 0;
5716     for (i=0; i<nsends; i++) {
5717       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5718       bufJ  = bufj+sstartsj[i];
5719       for (j=0; j<nrows; j++) {
5720         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5721         for (ll=0; ll<sbs; ll++) {
5722           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5723           for (l=0; l<ncols; l++) {
5724             *bufJ++ = cols[l];
5725           }
5726           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5727         }
5728       }
5729       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5730     }
5731 
5732     /* recvs and sends of j-array are completed */
5733     i = nrecvs;
5734     while (i--) {
5735       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5736     }
5737     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5738   } else if (scall == MAT_REUSE_MATRIX) {
5739     sstartsj = *startsj_s;
5740     rstartsj = *startsj_r;
5741     bufa     = *bufa_ptr;
5742     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5743     b_otha   = b_oth->a;
5744   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5745 
5746   /* a-array */
5747   /*---------*/
5748   /*  post receives of a-array */
5749   for (i=0; i<nrecvs; i++) {
5750     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5751     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5752   }
5753 
5754   /* pack the outgoing message a-array */
5755   k = 0;
5756   for (i=0; i<nsends; i++) {
5757     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5758     bufA  = bufa+sstartsj[i];
5759     for (j=0; j<nrows; j++) {
5760       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5761       for (ll=0; ll<sbs; ll++) {
5762         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5763         for (l=0; l<ncols; l++) {
5764           *bufA++ = vals[l];
5765         }
5766         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5767       }
5768     }
5769     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5770   }
5771   /* recvs and sends of a-array are completed */
5772   i = nrecvs;
5773   while (i--) {
5774     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5775   }
5776   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5777   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5778 
5779   if (scall == MAT_INITIAL_MATRIX) {
5780     /* put together the new matrix */
5781     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5782 
5783     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5784     /* Since these are PETSc arrays, change flags to free them as necessary. */
5785     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5786     b_oth->free_a  = PETSC_TRUE;
5787     b_oth->free_ij = PETSC_TRUE;
5788     b_oth->nonew   = 0;
5789 
5790     ierr = PetscFree(bufj);CHKERRQ(ierr);
5791     if (!startsj_s || !bufa_ptr) {
5792       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5793       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5794     } else {
5795       *startsj_s = sstartsj;
5796       *startsj_r = rstartsj;
5797       *bufa_ptr  = bufa;
5798     }
5799   }
5800   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5801   PetscFunctionReturn(0);
5802 }
5803 
5804 #undef __FUNCT__
5805 #define __FUNCT__ "MatGetCommunicationStructs"
5806 /*@C
5807   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5808 
5809   Not Collective
5810 
5811   Input Parameters:
5812 . A - The matrix in mpiaij format
5813 
5814   Output Parameter:
5815 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5816 . colmap - A map from global column index to local index into lvec
5817 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5818 
5819   Level: developer
5820 
5821 @*/
5822 #if defined(PETSC_USE_CTABLE)
5823 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5824 #else
5825 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5826 #endif
5827 {
5828   Mat_MPIAIJ *a;
5829 
5830   PetscFunctionBegin;
5831   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5832   PetscValidPointer(lvec, 2);
5833   PetscValidPointer(colmap, 3);
5834   PetscValidPointer(multScatter, 4);
5835   a = (Mat_MPIAIJ*) A->data;
5836   if (lvec) *lvec = a->lvec;
5837   if (colmap) *colmap = a->colmap;
5838   if (multScatter) *multScatter = a->Mvctx;
5839   PetscFunctionReturn(0);
5840 }
5841 
5842 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5843 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5844 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5845 
5846 #undef __FUNCT__
5847 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5848 /*
5849     Computes (B'*A')' since computing B*A directly is untenable
5850 
5851                n                       p                          p
5852         (              )       (              )         (                  )
5853       m (      A       )  *  n (       B      )   =   m (         C        )
5854         (              )       (              )         (                  )
5855 
5856 */
5857 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5858 {
5859   PetscErrorCode ierr;
5860   Mat            At,Bt,Ct;
5861 
5862   PetscFunctionBegin;
5863   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5864   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5865   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5866   ierr = MatDestroy(&At);CHKERRQ(ierr);
5867   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5868   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5869   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5870   PetscFunctionReturn(0);
5871 }
5872 
5873 #undef __FUNCT__
5874 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5875 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5876 {
5877   PetscErrorCode ierr;
5878   PetscInt       m=A->rmap->n,n=B->cmap->n;
5879   Mat            Cmat;
5880 
5881   PetscFunctionBegin;
5882   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5883   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5884   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5885   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5886   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5887   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5888   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5889   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5890 
5891   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5892 
5893   *C = Cmat;
5894   PetscFunctionReturn(0);
5895 }
5896 
5897 /* ----------------------------------------------------------------*/
5898 #undef __FUNCT__
5899 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5900 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5901 {
5902   PetscErrorCode ierr;
5903 
5904   PetscFunctionBegin;
5905   if (scall == MAT_INITIAL_MATRIX) {
5906     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5907     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5908     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5909   }
5910   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5911   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5912   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5913   PetscFunctionReturn(0);
5914 }
5915 
5916 #if defined(PETSC_HAVE_MUMPS)
5917 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5918 #endif
5919 #if defined(PETSC_HAVE_PASTIX)
5920 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5921 #endif
5922 #if defined(PETSC_HAVE_SUPERLU_DIST)
5923 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5924 #endif
5925 #if defined(PETSC_HAVE_CLIQUE)
5926 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5927 #endif
5928 
5929 /*MC
5930    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5931 
5932    Options Database Keys:
5933 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5934 
5935   Level: beginner
5936 
5937 .seealso: MatCreateAIJ()
5938 M*/
5939 
5940 #undef __FUNCT__
5941 #define __FUNCT__ "MatCreate_MPIAIJ"
5942 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5943 {
5944   Mat_MPIAIJ     *b;
5945   PetscErrorCode ierr;
5946   PetscMPIInt    size;
5947 
5948   PetscFunctionBegin;
5949   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5950 
5951   ierr          = PetscNewLog(B,Mat_MPIAIJ,&b);CHKERRQ(ierr);
5952   B->data       = (void*)b;
5953   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5954   B->assembled  = PETSC_FALSE;
5955   B->insertmode = NOT_SET_VALUES;
5956   b->size       = size;
5957 
5958   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5959 
5960   /* build cache for off array entries formed */
5961   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5962 
5963   b->donotstash  = PETSC_FALSE;
5964   b->colmap      = 0;
5965   b->garray      = 0;
5966   b->roworiented = PETSC_TRUE;
5967 
5968   /* stuff used for matrix vector multiply */
5969   b->lvec  = NULL;
5970   b->Mvctx = NULL;
5971 
5972   /* stuff for MatGetRow() */
5973   b->rowindices   = 0;
5974   b->rowvalues    = 0;
5975   b->getrowactive = PETSC_FALSE;
5976 
5977   /* flexible pointer used in CUSP/CUSPARSE classes */
5978   b->spptr = NULL;
5979 
5980 #if defined(PETSC_HAVE_MUMPS)
5981   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5982 #endif
5983 #if defined(PETSC_HAVE_PASTIX)
5984   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5985 #endif
5986 #if defined(PETSC_HAVE_SUPERLU_DIST)
5987   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5988 #endif
5989 #if defined(PETSC_HAVE_CLIQUE)
5990   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5991 #endif
5992   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5993   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5994   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5995   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5996   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5997   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5998   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5999   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6000   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6001   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6002   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
6003   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
6004   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
6005   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6006   PetscFunctionReturn(0);
6007 }
6008 
6009 #undef __FUNCT__
6010 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
6011 /*@
6012      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6013          and "off-diagonal" part of the matrix in CSR format.
6014 
6015    Collective on MPI_Comm
6016 
6017    Input Parameters:
6018 +  comm - MPI communicator
6019 .  m - number of local rows (Cannot be PETSC_DECIDE)
6020 .  n - This value should be the same as the local size used in creating the
6021        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6022        calculated if N is given) For square matrices n is almost always m.
6023 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6024 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6025 .   i - row indices for "diagonal" portion of matrix
6026 .   j - column indices
6027 .   a - matrix values
6028 .   oi - row indices for "off-diagonal" portion of matrix
6029 .   oj - column indices
6030 -   oa - matrix values
6031 
6032    Output Parameter:
6033 .   mat - the matrix
6034 
6035    Level: advanced
6036 
6037    Notes:
6038        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6039        must free the arrays once the matrix has been destroyed and not before.
6040 
6041        The i and j indices are 0 based
6042 
6043        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6044 
6045        This sets local rows and cannot be used to set off-processor values.
6046 
6047        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6048        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6049        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6050        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6051        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6052        communication if it is known that only local entries will be set.
6053 
6054 .keywords: matrix, aij, compressed row, sparse, parallel
6055 
6056 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6057           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6058 @*/
6059 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6060 {
6061   PetscErrorCode ierr;
6062   Mat_MPIAIJ     *maij;
6063 
6064   PetscFunctionBegin;
6065   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6066   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6067   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6068   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6069   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6070   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6071   maij = (Mat_MPIAIJ*) (*mat)->data;
6072 
6073   (*mat)->preallocated = PETSC_TRUE;
6074 
6075   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6076   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6077 
6078   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6079   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6080 
6081   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6082   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6083   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6084   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6085 
6086   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6087   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6088   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6089   PetscFunctionReturn(0);
6090 }
6091 
6092 /*
6093     Special version for direct calls from Fortran
6094 */
6095 #include <petsc-private/fortranimpl.h>
6096 
6097 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6098 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6099 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6100 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6101 #endif
6102 
6103 /* Change these macros so can be used in void function */
6104 #undef CHKERRQ
6105 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6106 #undef SETERRQ2
6107 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6108 #undef SETERRQ3
6109 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6110 #undef SETERRQ
6111 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6112 
6113 #undef __FUNCT__
6114 #define __FUNCT__ "matsetvaluesmpiaij_"
6115 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6116 {
6117   Mat            mat  = *mmat;
6118   PetscInt       m    = *mm, n = *mn;
6119   InsertMode     addv = *maddv;
6120   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6121   PetscScalar    value;
6122   PetscErrorCode ierr;
6123 
6124   MatCheckPreallocated(mat,1);
6125   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6126 
6127 #if defined(PETSC_USE_DEBUG)
6128   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6129 #endif
6130   {
6131     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6132     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6133     PetscBool roworiented = aij->roworiented;
6134 
6135     /* Some Variables required in the macro */
6136     Mat        A                 = aij->A;
6137     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
6138     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6139     MatScalar  *aa               = a->a;
6140     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6141     Mat        B                 = aij->B;
6142     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
6143     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6144     MatScalar  *ba               = b->a;
6145 
6146     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6147     PetscInt  nonew = a->nonew;
6148     MatScalar *ap1,*ap2;
6149 
6150     PetscFunctionBegin;
6151     for (i=0; i<m; i++) {
6152       if (im[i] < 0) continue;
6153 #if defined(PETSC_USE_DEBUG)
6154       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6155 #endif
6156       if (im[i] >= rstart && im[i] < rend) {
6157         row      = im[i] - rstart;
6158         lastcol1 = -1;
6159         rp1      = aj + ai[row];
6160         ap1      = aa + ai[row];
6161         rmax1    = aimax[row];
6162         nrow1    = ailen[row];
6163         low1     = 0;
6164         high1    = nrow1;
6165         lastcol2 = -1;
6166         rp2      = bj + bi[row];
6167         ap2      = ba + bi[row];
6168         rmax2    = bimax[row];
6169         nrow2    = bilen[row];
6170         low2     = 0;
6171         high2    = nrow2;
6172 
6173         for (j=0; j<n; j++) {
6174           if (roworiented) value = v[i*n+j];
6175           else value = v[i+j*m];
6176           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
6177           if (in[j] >= cstart && in[j] < cend) {
6178             col = in[j] - cstart;
6179             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
6180           } else if (in[j] < 0) continue;
6181 #if defined(PETSC_USE_DEBUG)
6182           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6183 #endif
6184           else {
6185             if (mat->was_assembled) {
6186               if (!aij->colmap) {
6187                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6188               }
6189 #if defined(PETSC_USE_CTABLE)
6190               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6191               col--;
6192 #else
6193               col = aij->colmap[in[j]] - 1;
6194 #endif
6195               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6196                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6197                 col  =  in[j];
6198                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6199                 B     = aij->B;
6200                 b     = (Mat_SeqAIJ*)B->data;
6201                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6202                 rp2   = bj + bi[row];
6203                 ap2   = ba + bi[row];
6204                 rmax2 = bimax[row];
6205                 nrow2 = bilen[row];
6206                 low2  = 0;
6207                 high2 = nrow2;
6208                 bm    = aij->B->rmap->n;
6209                 ba    = b->a;
6210               }
6211             } else col = in[j];
6212             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
6213           }
6214         }
6215       } else if (!aij->donotstash) {
6216         if (roworiented) {
6217           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6218         } else {
6219           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6220         }
6221       }
6222     }
6223   }
6224   PetscFunctionReturnVoid();
6225 }
6226 
6227