xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d1da0b6977ee87f1a77143268e9e8db764d4e8b3)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
109 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
110 {
111   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
112   PetscErrorCode ierr;
113   PetscInt       i,rstart,nrows,*rows;
114 
115   PetscFunctionBegin;
116   *zrows = NULL;
117   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
118   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
119   for (i=0; i<nrows; i++) rows[i] += rstart;
120   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
126 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
127 {
128   PetscErrorCode ierr;
129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
130   PetscInt       i,n,*garray = aij->garray;
131   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
132   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
133   PetscReal      *work;
134 
135   PetscFunctionBegin;
136   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
137   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
138   if (type == NORM_2) {
139     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
140       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
141     }
142     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
143       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
144     }
145   } else if (type == NORM_1) {
146     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
147       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
148     }
149     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
150       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
151     }
152   } else if (type == NORM_INFINITY) {
153     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
154       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
155     }
156     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
157       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
158     }
159 
160   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
161   if (type == NORM_INFINITY) {
162     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
163   } else {
164     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
165   }
166   ierr = PetscFree(work);CHKERRQ(ierr);
167   if (type == NORM_2) {
168     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 #undef __FUNCT__
174 #define __FUNCT__ "MatDistribute_MPIAIJ"
175 /*
176     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
177     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
178 
179     Only for square matrices
180 
181     Used by a preconditioner, hence PETSC_EXTERN
182 */
183 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
184 {
185   PetscMPIInt    rank,size;
186   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
187   PetscErrorCode ierr;
188   Mat            mat;
189   Mat_SeqAIJ     *gmata;
190   PetscMPIInt    tag;
191   MPI_Status     status;
192   PetscBool      aij;
193   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
194 
195   PetscFunctionBegin;
196   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
198   if (!rank) {
199     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
200     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
201   }
202   if (reuse == MAT_INITIAL_MATRIX) {
203     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
204     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
205     if (!rank) {
206       bses[0] = gmat->rmap->bs;
207       bses[1] = gmat->cmap->bs;
208     }
209     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
210     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
211     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
212     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
213     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
214     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
215 
216     rowners[0] = 0;
217     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
218     rstart = rowners[rank];
219     rend   = rowners[rank+1];
220     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
221     if (!rank) {
222       gmata = (Mat_SeqAIJ*) gmat->data;
223       /* send row lengths to all processors */
224       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
225       for (i=1; i<size; i++) {
226         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
227       }
228       /* determine number diagonal and off-diagonal counts */
229       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
230       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
231       jj   = 0;
232       for (i=0; i<m; i++) {
233         for (j=0; j<dlens[i]; j++) {
234           if (gmata->j[jj] < rstart) ld[i]++;
235           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
236           jj++;
237         }
238       }
239       /* send column indices to other processes */
240       for (i=1; i<size; i++) {
241         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
242         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
243         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
244       }
245 
246       /* send numerical values to other processes */
247       for (i=1; i<size; i++) {
248         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
249         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
250       }
251       gmataa = gmata->a;
252       gmataj = gmata->j;
253 
254     } else {
255       /* receive row lengths */
256       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
257       /* receive column indices */
258       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
259       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
260       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
261       /* determine number diagonal and off-diagonal counts */
262       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
263       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
264       jj   = 0;
265       for (i=0; i<m; i++) {
266         for (j=0; j<dlens[i]; j++) {
267           if (gmataj[jj] < rstart) ld[i]++;
268           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
269           jj++;
270         }
271       }
272       /* receive numerical values */
273       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
275     }
276     /* set preallocation */
277     for (i=0; i<m; i++) {
278       dlens[i] -= olens[i];
279     }
280     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
281     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
282 
283     for (i=0; i<m; i++) {
284       dlens[i] += olens[i];
285     }
286     cnt = 0;
287     for (i=0; i<m; i++) {
288       row  = rstart + i;
289       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
290       cnt += dlens[i];
291     }
292     if (rank) {
293       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
294     }
295     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
296     ierr = PetscFree(rowners);CHKERRQ(ierr);
297 
298     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
299 
300     *inmat = mat;
301   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
302     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
303     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
304     mat  = *inmat;
305     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
306     if (!rank) {
307       /* send numerical values to other processes */
308       gmata  = (Mat_SeqAIJ*) gmat->data;
309       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
310       gmataa = gmata->a;
311       for (i=1; i<size; i++) {
312         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
313         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
314       }
315       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
316     } else {
317       /* receive numerical values from process 0*/
318       nz   = Ad->nz + Ao->nz;
319       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
320       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
321     }
322     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
323     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
324     ad = Ad->a;
325     ao = Ao->a;
326     if (mat->rmap->n) {
327       i  = 0;
328       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
329       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
330     }
331     for (i=1; i<mat->rmap->n; i++) {
332       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
333       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
334     }
335     i--;
336     if (mat->rmap->n) {
337       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
338     }
339     if (rank) {
340       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
341     }
342   }
343   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
344   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
345   PetscFunctionReturn(0);
346 }
347 
348 /*
349   Local utility routine that creates a mapping from the global column
350 number to the local number in the off-diagonal part of the local
351 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
352 a slightly higher hash table cost; without it it is not scalable (each processor
353 has an order N integer array but is fast to acess.
354 */
355 #undef __FUNCT__
356 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
357 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
358 {
359   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
360   PetscErrorCode ierr;
361   PetscInt       n = aij->B->cmap->n,i;
362 
363   PetscFunctionBegin;
364   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
365 #if defined(PETSC_USE_CTABLE)
366   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
367   for (i=0; i<n; i++) {
368     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
369   }
370 #else
371   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
372   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
373   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
374 #endif
375   PetscFunctionReturn(0);
376 }
377 
378 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
379 { \
380     if (col <= lastcol1)  low1 = 0;     \
381     else                 high1 = nrow1; \
382     lastcol1 = col;\
383     while (high1-low1 > 5) { \
384       t = (low1+high1)/2; \
385       if (rp1[t] > col) high1 = t; \
386       else              low1  = t; \
387     } \
388       for (_i=low1; _i<high1; _i++) { \
389         if (rp1[_i] > col) break; \
390         if (rp1[_i] == col) { \
391           if (addv == ADD_VALUES) ap1[_i] += value;   \
392           else                    ap1[_i] = value; \
393           goto a_noinsert; \
394         } \
395       }  \
396       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
397       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
399       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
400       N = nrow1++ - 1; a->nz++; high1++; \
401       /* shift up all the later entries in this row */ \
402       for (ii=N; ii>=_i; ii--) { \
403         rp1[ii+1] = rp1[ii]; \
404         ap1[ii+1] = ap1[ii]; \
405       } \
406       rp1[_i] = col;  \
407       ap1[_i] = value;  \
408       a_noinsert: ; \
409       ailen[row] = nrow1; \
410 }
411 
412 
413 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
414   { \
415     if (col <= lastcol2) low2 = 0;                        \
416     else high2 = nrow2;                                   \
417     lastcol2 = col;                                       \
418     while (high2-low2 > 5) {                              \
419       t = (low2+high2)/2;                                 \
420       if (rp2[t] > col) high2 = t;                        \
421       else             low2  = t;                         \
422     }                                                     \
423     for (_i=low2; _i<high2; _i++) {                       \
424       if (rp2[_i] > col) break;                           \
425       if (rp2[_i] == col) {                               \
426         if (addv == ADD_VALUES) ap2[_i] += value;         \
427         else                    ap2[_i] = value;          \
428         goto b_noinsert;                                  \
429       }                                                   \
430     }                                                     \
431     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
432     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
433     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
434     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
435     N = nrow2++ - 1; b->nz++; high2++;                    \
436     /* shift up all the later entries in this row */      \
437     for (ii=N; ii>=_i; ii--) {                            \
438       rp2[ii+1] = rp2[ii];                                \
439       ap2[ii+1] = ap2[ii];                                \
440     }                                                     \
441     rp2[_i] = col;                                        \
442     ap2[_i] = value;                                      \
443     b_noinsert: ;                                         \
444     bilen[row] = nrow2;                                   \
445   }
446 
447 #undef __FUNCT__
448 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
449 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
450 {
451   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
452   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
453   PetscErrorCode ierr;
454   PetscInt       l,*garray = mat->garray,diag;
455 
456   PetscFunctionBegin;
457   /* code only works for square matrices A */
458 
459   /* find size of row to the left of the diagonal part */
460   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
461   row  = row - diag;
462   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
463     if (garray[b->j[b->i[row]+l]] > diag) break;
464   }
465   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
466 
467   /* diagonal part */
468   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
469 
470   /* right of diagonal part */
471   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
472   PetscFunctionReturn(0);
473 }
474 
475 #undef __FUNCT__
476 #define __FUNCT__ "MatSetValues_MPIAIJ"
477 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
478 {
479   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
480   PetscScalar    value;
481   PetscErrorCode ierr;
482   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
483   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
484   PetscBool      roworiented = aij->roworiented;
485 
486   /* Some Variables required in the macro */
487   Mat        A                 = aij->A;
488   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
489   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
490   MatScalar  *aa               = a->a;
491   PetscBool  ignorezeroentries = a->ignorezeroentries;
492   Mat        B                 = aij->B;
493   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
494   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
495   MatScalar  *ba               = b->a;
496 
497   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
498   PetscInt  nonew;
499   MatScalar *ap1,*ap2;
500 
501   PetscFunctionBegin;
502   if (v) PetscValidScalarPointer(v,6);
503   for (i=0; i<m; i++) {
504     if (im[i] < 0) continue;
505 #if defined(PETSC_USE_DEBUG)
506     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
507 #endif
508     if (im[i] >= rstart && im[i] < rend) {
509       row      = im[i] - rstart;
510       lastcol1 = -1;
511       rp1      = aj + ai[row];
512       ap1      = aa + ai[row];
513       rmax1    = aimax[row];
514       nrow1    = ailen[row];
515       low1     = 0;
516       high1    = nrow1;
517       lastcol2 = -1;
518       rp2      = bj + bi[row];
519       ap2      = ba + bi[row];
520       rmax2    = bimax[row];
521       nrow2    = bilen[row];
522       low2     = 0;
523       high2    = nrow2;
524 
525       for (j=0; j<n; j++) {
526         if (v) {
527           if (roworiented) value = v[i*n+j];
528           else             value = v[i+j*m];
529         } else value = 0.0;
530         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
531         if (in[j] >= cstart && in[j] < cend) {
532           col   = in[j] - cstart;
533           nonew = a->nonew;
534           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
535         } else if (in[j] < 0) continue;
536 #if defined(PETSC_USE_DEBUG)
537         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
538 #endif
539         else {
540           if (mat->was_assembled) {
541             if (!aij->colmap) {
542               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
543             }
544 #if defined(PETSC_USE_CTABLE)
545             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
546             col--;
547 #else
548             col = aij->colmap[in[j]] - 1;
549 #endif
550             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
551               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
552               col  =  in[j];
553               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
554               B     = aij->B;
555               b     = (Mat_SeqAIJ*)B->data;
556               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
557               rp2   = bj + bi[row];
558               ap2   = ba + bi[row];
559               rmax2 = bimax[row];
560               nrow2 = bilen[row];
561               low2  = 0;
562               high2 = nrow2;
563               bm    = aij->B->rmap->n;
564               ba    = b->a;
565             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
566           } else col = in[j];
567           nonew = b->nonew;
568           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
569         }
570       }
571     } else {
572       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
573       if (!aij->donotstash) {
574         mat->assembled = PETSC_FALSE;
575         if (roworiented) {
576           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
577         } else {
578           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
579         }
580       }
581     }
582   }
583   PetscFunctionReturn(0);
584 }
585 
586 #undef __FUNCT__
587 #define __FUNCT__ "MatGetValues_MPIAIJ"
588 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
589 {
590   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
591   PetscErrorCode ierr;
592   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
593   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
594 
595   PetscFunctionBegin;
596   for (i=0; i<m; i++) {
597     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
598     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
599     if (idxm[i] >= rstart && idxm[i] < rend) {
600       row = idxm[i] - rstart;
601       for (j=0; j<n; j++) {
602         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
603         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
604         if (idxn[j] >= cstart && idxn[j] < cend) {
605           col  = idxn[j] - cstart;
606           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
607         } else {
608           if (!aij->colmap) {
609             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
610           }
611 #if defined(PETSC_USE_CTABLE)
612           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
613           col--;
614 #else
615           col = aij->colmap[idxn[j]] - 1;
616 #endif
617           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
618           else {
619             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620           }
621         }
622       }
623     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
624   }
625   PetscFunctionReturn(0);
626 }
627 
628 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
629 
630 #undef __FUNCT__
631 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
632 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
633 {
634   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
635   PetscErrorCode ierr;
636   PetscInt       nstash,reallocs;
637   InsertMode     addv;
638 
639   PetscFunctionBegin;
640   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
641 
642   /* make sure all processors are either in INSERTMODE or ADDMODE */
643   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
644   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
645   mat->insertmode = addv; /* in case this processor had no cache */
646 
647   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
648   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
649   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
655 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
658   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
659   PetscErrorCode ierr;
660   PetscMPIInt    n;
661   PetscInt       i,j,rstart,ncols,flg;
662   PetscInt       *row,*col;
663   PetscBool      other_disassembled;
664   PetscScalar    *val;
665   InsertMode     addv = mat->insertmode;
666 
667   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
668 
669   PetscFunctionBegin;
670   if (!aij->donotstash && !mat->nooffprocentries) {
671     while (1) {
672       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
673       if (!flg) break;
674 
675       for (i=0; i<n; ) {
676         /* Now identify the consecutive vals belonging to the same row */
677         for (j=i,rstart=row[j]; j<n; j++) {
678           if (row[j] != rstart) break;
679         }
680         if (j < n) ncols = j-i;
681         else       ncols = n-i;
682         /* Now assemble all these values with a single function call */
683         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
684 
685         i = j;
686       }
687     }
688     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
689   }
690   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
691   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
692 
693   /* determine if any processor has disassembled, if so we must
694      also disassemble ourselfs, in order that we may reassemble. */
695   /*
696      if nonzero structure of submatrix B cannot change then we know that
697      no processor disassembled thus we can skip this stuff
698   */
699   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
700     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
701     if (mat->was_assembled && !other_disassembled) {
702       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
703     }
704   }
705   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
706     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
707   }
708   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
709   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
710   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
711 
712   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
713 
714   aij->rowvalues = 0;
715 
716   /* used by MatAXPY() */
717   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
718   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
719 
720   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
721   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
727 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
728 {
729   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
730   PetscErrorCode ierr;
731 
732   PetscFunctionBegin;
733   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
734   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatZeroRows_MPIAIJ"
740 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
741 {
742   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
743   PetscInt      *owners = A->rmap->range;
744   PetscInt       n      = A->rmap->n;
745   PetscMPIInt    size   = mat->size;
746   PetscSF        sf;
747   PetscInt      *lrows;
748   PetscSFNode   *rrows;
749   PetscInt       lastidx = -1, r, p = 0, len = 0;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753   /* Create SF where leaves are input rows and roots are owned rows */
754   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
755   for (r = 0; r < n; ++r) lrows[r] = -1;
756   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
757   for (r = 0; r < N; ++r) {
758     const PetscInt idx   = rows[r];
759     PetscBool      found = PETSC_FALSE;
760     /* Trick for efficient searching for sorted rows */
761     if (lastidx > idx) p = 0;
762     lastidx = idx;
763     for (; p < size; ++p) {
764       if (idx >= owners[p] && idx < owners[p+1]) {
765         rrows[r].rank  = p;
766         rrows[r].index = rows[r] - owners[p];
767         found = PETSC_TRUE;
768         break;
769       }
770     }
771     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
772   }
773   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
774   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
775   /* Collect flags for rows to be zeroed */
776   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
777   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
778   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
779   /* Compress and put in row numbers */
780   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
781   /* fix right hand side if needed */
782   if (x && b) {
783     const PetscScalar *xx;
784     PetscScalar       *bb;
785 
786     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
787     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
788     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
789     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
790     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
791   }
792   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
793   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
794   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
795     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
796   } else if (diag != 0.0) {
797     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
798     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
799     for (r = 0; r < len; ++r) {
800       const PetscInt row = lrows[r] + A->rmap->rstart;
801       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
802     }
803     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
804     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
805   } else {
806     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
807   }
808   ierr = PetscFree(lrows);CHKERRQ(ierr);
809   PetscFunctionReturn(0);
810 }
811 
812 #undef __FUNCT__
813 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
814 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
815 {
816   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
817   PetscErrorCode    ierr;
818   PetscMPIInt       size = l->size,imdex,n,rank = l->rank,tag = ((PetscObject)A)->tag,lastidx = -1;
819   PetscInt          i,*owners = A->rmap->range;
820   PetscInt          *sizes,j,idx,nsends;
821   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
822   PetscInt          *rvalues,count,base,slen,*source;
823   PetscInt          *lens,*lrows,*values,m;
824   MPI_Comm          comm;
825   MPI_Request       *send_waits,*recv_waits;
826   MPI_Status        recv_status,*send_status;
827   const PetscScalar *xx;
828   PetscScalar       *bb,*mask;
829   Vec               xmask,lmask;
830   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
831   const PetscInt    *aj, *ii,*ridx;
832   PetscScalar       *aa;
833 #if defined(PETSC_DEBUG)
834   PetscBool found = PETSC_FALSE;
835 #endif
836 
837   PetscFunctionBegin;
838   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
839   /*  first count number of contributors to each processor */
840   ierr = PetscCalloc1(2*size,&sizes);CHKERRQ(ierr);
841   ierr = PetscMalloc1(N+1,&owner);CHKERRQ(ierr); /* see note*/
842   j    = 0;
843   for (i=0; i<N; i++) {
844     if (lastidx > (idx = rows[i])) j = 0;
845     lastidx = idx;
846     for (; j<size; j++) {
847       if (idx >= owners[j] && idx < owners[j+1]) {
848         sizes[2*j]++;
849         sizes[2*j+1] = 1;
850         owner[i]      = j;
851 #if defined(PETSC_DEBUG)
852         found = PETSC_TRUE;
853 #endif
854         break;
855       }
856     }
857 #if defined(PETSC_DEBUG)
858     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
859     found = PETSC_FALSE;
860 #endif
861   }
862   nsends = 0;  for (i=0; i<size; i++) nsends += sizes[2*i+1];
863 
864   /* inform other processors of number of messages and max length*/
865   ierr = PetscMaxSum(comm,sizes,&nmax,&nrecvs);CHKERRQ(ierr);
866 
867   /* post receives:   */
868   ierr = PetscMalloc1((nrecvs+1)*(nmax+1),&rvalues);CHKERRQ(ierr);
869   ierr = PetscMalloc1((nrecvs+1),&recv_waits);CHKERRQ(ierr);
870   for (i=0; i<nrecvs; i++) {
871     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
872   }
873 
874   /* do sends:
875       1) starts[i] gives the starting index in svalues for stuff going to
876          the ith processor
877   */
878   ierr = PetscMalloc1((N+1),&svalues);CHKERRQ(ierr);
879   ierr = PetscMalloc1((nsends+1),&send_waits);CHKERRQ(ierr);
880   ierr = PetscMalloc1((size+1),&starts);CHKERRQ(ierr);
881 
882   starts[0] = 0;
883   for (i=1; i<size; i++) starts[i] = starts[i-1] + sizes[2*i-2];
884   for (i=0; i<N; i++) svalues[starts[owner[i]]++] = rows[i];
885 
886   starts[0] = 0;
887   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + sizes[2*i-2];
888   count = 0;
889   for (i=0; i<size; i++) {
890     if (sizes[2*i+1]) {
891       ierr = MPI_Isend(svalues+starts[i],sizes[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
892     }
893   }
894   ierr = PetscFree(starts);CHKERRQ(ierr);
895 
896   base = owners[rank];
897 
898   /*  wait on receives */
899   ierr  = PetscMalloc2(nrecvs,&lens,nrecvs,&source);CHKERRQ(ierr);
900   count = nrecvs; slen = 0;
901   while (count) {
902     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
903     /* unpack receives into our local space */
904     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
905 
906     source[imdex] = recv_status.MPI_SOURCE;
907     lens[imdex]   = n;
908     slen         += n;
909     count--;
910   }
911   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
912 
913   /* move the data into the send scatter */
914   ierr  = PetscMalloc1((slen+1),&lrows);CHKERRQ(ierr);
915   count = 0;
916   for (i=0; i<nrecvs; i++) {
917     values = rvalues + i*nmax;
918     for (j=0; j<lens[i]; j++) lrows[count++] = values[j] - base;
919   }
920   ierr = PetscFree(rvalues);CHKERRQ(ierr);
921   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
922   ierr = PetscFree(owner);CHKERRQ(ierr);
923   ierr = PetscFree(sizes);CHKERRQ(ierr);
924   /* lrows are the local rows to be zeroed, slen is the number of local rows */
925 
926   /* zero diagonal part of matrix */
927   ierr = MatZeroRowsColumns(l->A,slen,lrows,diag,x,b);CHKERRQ(ierr);
928 
929   /* handle off diagonal part of matrix */
930   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
931   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
932   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
933   for (i=0; i<slen; i++) bb[lrows[i]] = 1;
934   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
935   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
937   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
938   if (x) {
939     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
942     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
943   }
944   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
945 
946   /* remove zeroed rows of off diagonal matrix */
947   ii = aij->i;
948   for (i=0; i<slen; i++) {
949     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
950   }
951 
952   /* loop over all elements of off process part of matrix zeroing removed columns*/
953   if (aij->compressedrow.use) {
954     m    = aij->compressedrow.nrows;
955     ii   = aij->compressedrow.i;
956     ridx = aij->compressedrow.rindex;
957     for (i=0; i<m; i++) {
958       n  = ii[i+1] - ii[i];
959       aj = aij->j + ii[i];
960       aa = aij->a + ii[i];
961 
962       for (j=0; j<n; j++) {
963         if (PetscAbsScalar(mask[*aj])) {
964           if (b) bb[*ridx] -= *aa*xx[*aj];
965           *aa = 0.0;
966         }
967         aa++;
968         aj++;
969       }
970       ridx++;
971     }
972   } else { /* do not use compressed row format */
973     m = l->B->rmap->n;
974     for (i=0; i<m; i++) {
975       n  = ii[i+1] - ii[i];
976       aj = aij->j + ii[i];
977       aa = aij->a + ii[i];
978       for (j=0; j<n; j++) {
979         if (PetscAbsScalar(mask[*aj])) {
980           if (b) bb[i] -= *aa*xx[*aj];
981           *aa = 0.0;
982         }
983         aa++;
984         aj++;
985       }
986     }
987   }
988   if (x) {
989     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
990     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
991   }
992   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
993   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
994   ierr = PetscFree(lrows);CHKERRQ(ierr);
995 
996   /* wait on sends */
997   if (nsends) {
998     ierr = PetscMalloc1(nsends,&send_status);CHKERRQ(ierr);
999     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1000     ierr = PetscFree(send_status);CHKERRQ(ierr);
1001   }
1002   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1003   ierr = PetscFree(svalues);CHKERRQ(ierr);
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatMult_MPIAIJ"
1009 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
1010 {
1011   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1012   PetscErrorCode ierr;
1013   PetscInt       nt;
1014 
1015   PetscFunctionBegin;
1016   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1017   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
1018   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1019   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1020   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1021   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
1027 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
1028 {
1029   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1030   PetscErrorCode ierr;
1031 
1032   PetscFunctionBegin;
1033   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
1034   PetscFunctionReturn(0);
1035 }
1036 
1037 #undef __FUNCT__
1038 #define __FUNCT__ "MatMultAdd_MPIAIJ"
1039 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1040 {
1041   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1042   PetscErrorCode ierr;
1043 
1044   PetscFunctionBegin;
1045   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1046   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1047   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1048   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1049   PetscFunctionReturn(0);
1050 }
1051 
1052 #undef __FUNCT__
1053 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1054 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1055 {
1056   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1057   PetscErrorCode ierr;
1058   PetscBool      merged;
1059 
1060   PetscFunctionBegin;
1061   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1062   /* do nondiagonal part */
1063   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1064   if (!merged) {
1065     /* send it on its way */
1066     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1067     /* do local part */
1068     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1069     /* receive remote parts: note this assumes the values are not actually */
1070     /* added in yy until the next line, */
1071     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1072   } else {
1073     /* do local part */
1074     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1075     /* send it on its way */
1076     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1077     /* values actually were received in the Begin() but we need to call this nop */
1078     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1079   }
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1085 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1086 {
1087   MPI_Comm       comm;
1088   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1089   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1090   IS             Me,Notme;
1091   PetscErrorCode ierr;
1092   PetscInt       M,N,first,last,*notme,i;
1093   PetscMPIInt    size;
1094 
1095   PetscFunctionBegin;
1096   /* Easy test: symmetric diagonal block */
1097   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1098   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1099   if (!*f) PetscFunctionReturn(0);
1100   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1101   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1102   if (size == 1) PetscFunctionReturn(0);
1103 
1104   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1105   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1106   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1107   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1108   for (i=0; i<first; i++) notme[i] = i;
1109   for (i=last; i<M; i++) notme[i-last+first] = i;
1110   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1111   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1112   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1113   Aoff = Aoffs[0];
1114   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1115   Boff = Boffs[0];
1116   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1117   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1118   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1119   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1120   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1121   ierr = PetscFree(notme);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 #undef __FUNCT__
1126 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1127 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1128 {
1129   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1130   PetscErrorCode ierr;
1131 
1132   PetscFunctionBegin;
1133   /* do nondiagonal part */
1134   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1135   /* send it on its way */
1136   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1137   /* do local part */
1138   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1139   /* receive remote parts */
1140   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 /*
1145   This only works correctly for square matrices where the subblock A->A is the
1146    diagonal block
1147 */
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1150 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1151 {
1152   PetscErrorCode ierr;
1153   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1154 
1155   PetscFunctionBegin;
1156   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1157   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1158   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1159   PetscFunctionReturn(0);
1160 }
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatScale_MPIAIJ"
1164 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1165 {
1166   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1167   PetscErrorCode ierr;
1168 
1169   PetscFunctionBegin;
1170   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1171   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatDestroy_MPIAIJ"
1177 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1178 {
1179   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183 #if defined(PETSC_USE_LOG)
1184   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1185 #endif
1186   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1187   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1188   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1189   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1190 #if defined(PETSC_USE_CTABLE)
1191   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1192 #else
1193   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1194 #endif
1195   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1196   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1197   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1198   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1199   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1200   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1201 
1202   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1203   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1204   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1205   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1206   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1207   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1208   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1209   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1210   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1216 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1217 {
1218   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1219   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1220   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1221   PetscErrorCode ierr;
1222   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1223   int            fd;
1224   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1225   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1226   PetscScalar    *column_values;
1227   PetscInt       message_count,flowcontrolcount;
1228   FILE           *file;
1229 
1230   PetscFunctionBegin;
1231   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1232   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1233   nz   = A->nz + B->nz;
1234   if (!rank) {
1235     header[0] = MAT_FILE_CLASSID;
1236     header[1] = mat->rmap->N;
1237     header[2] = mat->cmap->N;
1238 
1239     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1240     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1241     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1242     /* get largest number of rows any processor has */
1243     rlen  = mat->rmap->n;
1244     range = mat->rmap->range;
1245     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1246   } else {
1247     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1248     rlen = mat->rmap->n;
1249   }
1250 
1251   /* load up the local row counts */
1252   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1253   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1254 
1255   /* store the row lengths to the file */
1256   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1257   if (!rank) {
1258     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1259     for (i=1; i<size; i++) {
1260       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1261       rlen = range[i+1] - range[i];
1262       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1263       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1264     }
1265     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1266   } else {
1267     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1268     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1269     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1270   }
1271   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1272 
1273   /* load up the local column indices */
1274   nzmax = nz; /* th processor needs space a largest processor needs */
1275   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1276   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1277   cnt   = 0;
1278   for (i=0; i<mat->rmap->n; i++) {
1279     for (j=B->i[i]; j<B->i[i+1]; j++) {
1280       if ((col = garray[B->j[j]]) > cstart) break;
1281       column_indices[cnt++] = col;
1282     }
1283     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1284     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1285   }
1286   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1287 
1288   /* store the column indices to the file */
1289   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1290   if (!rank) {
1291     MPI_Status status;
1292     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1293     for (i=1; i<size; i++) {
1294       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1295       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1296       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1297       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1298       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1299     }
1300     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1301   } else {
1302     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1303     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1304     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1305     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1306   }
1307   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1308 
1309   /* load up the local column values */
1310   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1311   cnt  = 0;
1312   for (i=0; i<mat->rmap->n; i++) {
1313     for (j=B->i[i]; j<B->i[i+1]; j++) {
1314       if (garray[B->j[j]] > cstart) break;
1315       column_values[cnt++] = B->a[j];
1316     }
1317     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1318     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1319   }
1320   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1321 
1322   /* store the column values to the file */
1323   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1324   if (!rank) {
1325     MPI_Status status;
1326     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1327     for (i=1; i<size; i++) {
1328       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1329       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1330       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1331       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1332       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1333     }
1334     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1335   } else {
1336     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1337     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1338     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1339     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1340   }
1341   ierr = PetscFree(column_values);CHKERRQ(ierr);
1342 
1343   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1344   if (file) fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #include <petscdraw.h>
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1351 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1352 {
1353   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1354   PetscErrorCode    ierr;
1355   PetscMPIInt       rank = aij->rank,size = aij->size;
1356   PetscBool         isdraw,iascii,isbinary;
1357   PetscViewer       sviewer;
1358   PetscViewerFormat format;
1359 
1360   PetscFunctionBegin;
1361   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1362   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1363   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1364   if (iascii) {
1365     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1366     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1367       MatInfo   info;
1368       PetscBool inodes;
1369 
1370       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1371       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1372       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1373       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1374       if (!inodes) {
1375         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1376                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1377       } else {
1378         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1379                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1380       }
1381       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1382       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1383       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1384       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1385       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1386       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1387       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1388       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1389       PetscFunctionReturn(0);
1390     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1391       PetscInt inodecount,inodelimit,*inodes;
1392       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1393       if (inodes) {
1394         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1395       } else {
1396         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1397       }
1398       PetscFunctionReturn(0);
1399     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1400       PetscFunctionReturn(0);
1401     }
1402   } else if (isbinary) {
1403     if (size == 1) {
1404       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1405       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1406     } else {
1407       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1408     }
1409     PetscFunctionReturn(0);
1410   } else if (isdraw) {
1411     PetscDraw draw;
1412     PetscBool isnull;
1413     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1414     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1415   }
1416 
1417   if (size == 1) {
1418     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1419     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1420   } else {
1421     /* assemble the entire matrix onto first processor. */
1422     Mat        A;
1423     Mat_SeqAIJ *Aloc;
1424     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1425     MatScalar  *a;
1426 
1427     if (mat->rmap->N > 1024) {
1428       PetscBool flg = PETSC_FALSE;
1429 
1430       ierr = PetscOptionsGetBool(((PetscObject) mat)->prefix, "-mat_ascii_output_large", &flg,NULL);CHKERRQ(ierr);
1431       if (!flg) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_OUTOFRANGE,"ASCII matrix output not allowed for matrices with more than 1024 rows, use binary format instead.\nYou can override this restriction using -mat_ascii_output_large.");
1432     }
1433 
1434     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1435     if (!rank) {
1436       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1437     } else {
1438       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1439     }
1440     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1441     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1442     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1443     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1444     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1445 
1446     /* copy over the A part */
1447     Aloc = (Mat_SeqAIJ*)aij->A->data;
1448     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1449     row  = mat->rmap->rstart;
1450     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1451     for (i=0; i<m; i++) {
1452       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1453       row++;
1454       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1455     }
1456     aj = Aloc->j;
1457     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1458 
1459     /* copy over the B part */
1460     Aloc = (Mat_SeqAIJ*)aij->B->data;
1461     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1462     row  = mat->rmap->rstart;
1463     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1464     ct   = cols;
1465     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1466     for (i=0; i<m; i++) {
1467       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1468       row++;
1469       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1470     }
1471     ierr = PetscFree(ct);CHKERRQ(ierr);
1472     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1473     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1474     /*
1475        Everyone has to call to draw the matrix since the graphics waits are
1476        synchronized across all processors that share the PetscDraw object
1477     */
1478     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1479     if (!rank) {
1480       ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1481       /* Set the type name to MATMPIAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqAIJ_ASCII()*/
1482       PetscStrcpy(((PetscObject)((Mat_MPIAIJ*)(A->data))->A)->type_name,MATMPIAIJ);
1483       ierr = MatView(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1484     }
1485     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1486     ierr = MatDestroy(&A);CHKERRQ(ierr);
1487   }
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatView_MPIAIJ"
1493 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1494 {
1495   PetscErrorCode ierr;
1496   PetscBool      iascii,isdraw,issocket,isbinary;
1497 
1498   PetscFunctionBegin;
1499   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1500   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1501   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1502   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1503   if (iascii || isdraw || isbinary || issocket) {
1504     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1505   }
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatSOR_MPIAIJ"
1511 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1512 {
1513   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1514   PetscErrorCode ierr;
1515   Vec            bb1 = 0;
1516   PetscBool      hasop;
1517 
1518   PetscFunctionBegin;
1519   if (flag == SOR_APPLY_UPPER) {
1520     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1521     PetscFunctionReturn(0);
1522   }
1523 
1524   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1525     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1526   }
1527 
1528   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1529     if (flag & SOR_ZERO_INITIAL_GUESS) {
1530       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1531       its--;
1532     }
1533 
1534     while (its--) {
1535       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1537 
1538       /* update rhs: bb1 = bb - B*x */
1539       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1540       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1541 
1542       /* local sweep */
1543       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1544     }
1545   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1546     if (flag & SOR_ZERO_INITIAL_GUESS) {
1547       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1548       its--;
1549     }
1550     while (its--) {
1551       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1553 
1554       /* update rhs: bb1 = bb - B*x */
1555       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1556       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1557 
1558       /* local sweep */
1559       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1560     }
1561   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1562     if (flag & SOR_ZERO_INITIAL_GUESS) {
1563       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1564       its--;
1565     }
1566     while (its--) {
1567       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1568       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1569 
1570       /* update rhs: bb1 = bb - B*x */
1571       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1572       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1573 
1574       /* local sweep */
1575       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1576     }
1577   } else if (flag & SOR_EISENSTAT) {
1578     Vec xx1;
1579 
1580     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1581     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1582 
1583     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1584     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1585     if (!mat->diag) {
1586       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1587       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1588     }
1589     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1590     if (hasop) {
1591       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1592     } else {
1593       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1594     }
1595     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1596 
1597     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1598 
1599     /* local sweep */
1600     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1601     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1602     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1603   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1604 
1605   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatPermute_MPIAIJ"
1611 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1612 {
1613   Mat            aA,aB,Aperm;
1614   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1615   PetscScalar    *aa,*ba;
1616   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1617   PetscSF        rowsf,sf;
1618   IS             parcolp = NULL;
1619   PetscBool      done;
1620   PetscErrorCode ierr;
1621 
1622   PetscFunctionBegin;
1623   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1624   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1625   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1626   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1627 
1628   /* Invert row permutation to find out where my rows should go */
1629   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1630   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1631   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1632   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1633   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1634   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1635 
1636   /* Invert column permutation to find out where my columns should go */
1637   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1638   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1639   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1640   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1641   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1642   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1643   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1644 
1645   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1646   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1647   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1648 
1649   /* Find out where my gcols should go */
1650   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1651   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1652   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1653   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1654   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1655   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1656   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1657   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1658 
1659   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1660   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1661   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1662   for (i=0; i<m; i++) {
1663     PetscInt row = rdest[i],rowner;
1664     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1665     for (j=ai[i]; j<ai[i+1]; j++) {
1666       PetscInt cowner,col = cdest[aj[j]];
1667       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1668       if (rowner == cowner) dnnz[i]++;
1669       else onnz[i]++;
1670     }
1671     for (j=bi[i]; j<bi[i+1]; j++) {
1672       PetscInt cowner,col = gcdest[bj[j]];
1673       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1674       if (rowner == cowner) dnnz[i]++;
1675       else onnz[i]++;
1676     }
1677   }
1678   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1679   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1680   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1681   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1682   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1683 
1684   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1685   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1686   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1687   for (i=0; i<m; i++) {
1688     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1689     PetscInt rowlen;
1690     rowlen = ai[i+1] - ai[i];
1691     for (j=0; j<rowlen; j++) acols[j] = cdest[aj[ai[i]+j]];
1692     ierr   = MatSetValues(Aperm,1,&rdest[i],rowlen,acols,aa+ai[i],INSERT_VALUES);CHKERRQ(ierr);
1693     rowlen = bi[i+1] - bi[i];
1694     for (j=0; j<rowlen; j++) bcols[j] = gcdest[bj[bi[i]+j]];
1695     ierr = MatSetValues(Aperm,1,&rdest[i],rowlen,bcols,ba+bi[i],INSERT_VALUES);CHKERRQ(ierr);
1696   }
1697   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1699   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1700   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1701   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1702   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1703   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1704   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1705   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1706   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1707   *B = Aperm;
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 #undef __FUNCT__
1712 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1713 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1714 {
1715   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1716   Mat            A    = mat->A,B = mat->B;
1717   PetscErrorCode ierr;
1718   PetscReal      isend[5],irecv[5];
1719 
1720   PetscFunctionBegin;
1721   info->block_size = 1.0;
1722   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1723 
1724   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1725   isend[3] = info->memory;  isend[4] = info->mallocs;
1726 
1727   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1728 
1729   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1730   isend[3] += info->memory;  isend[4] += info->mallocs;
1731   if (flag == MAT_LOCAL) {
1732     info->nz_used      = isend[0];
1733     info->nz_allocated = isend[1];
1734     info->nz_unneeded  = isend[2];
1735     info->memory       = isend[3];
1736     info->mallocs      = isend[4];
1737   } else if (flag == MAT_GLOBAL_MAX) {
1738     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1739 
1740     info->nz_used      = irecv[0];
1741     info->nz_allocated = irecv[1];
1742     info->nz_unneeded  = irecv[2];
1743     info->memory       = irecv[3];
1744     info->mallocs      = irecv[4];
1745   } else if (flag == MAT_GLOBAL_SUM) {
1746     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1747 
1748     info->nz_used      = irecv[0];
1749     info->nz_allocated = irecv[1];
1750     info->nz_unneeded  = irecv[2];
1751     info->memory       = irecv[3];
1752     info->mallocs      = irecv[4];
1753   }
1754   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1755   info->fill_ratio_needed = 0;
1756   info->factor_mallocs    = 0;
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatSetOption_MPIAIJ"
1762 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1763 {
1764   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   switch (op) {
1769   case MAT_NEW_NONZERO_LOCATIONS:
1770   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1771   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1772   case MAT_KEEP_NONZERO_PATTERN:
1773   case MAT_NEW_NONZERO_LOCATION_ERR:
1774   case MAT_USE_INODES:
1775   case MAT_IGNORE_ZERO_ENTRIES:
1776     MatCheckPreallocated(A,1);
1777     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1778     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1779     break;
1780   case MAT_ROW_ORIENTED:
1781     a->roworiented = flg;
1782 
1783     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1784     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1785     break;
1786   case MAT_NEW_DIAGONALS:
1787     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1788     break;
1789   case MAT_IGNORE_OFF_PROC_ENTRIES:
1790     a->donotstash = flg;
1791     break;
1792   case MAT_SPD:
1793     A->spd_set = PETSC_TRUE;
1794     A->spd     = flg;
1795     if (flg) {
1796       A->symmetric                  = PETSC_TRUE;
1797       A->structurally_symmetric     = PETSC_TRUE;
1798       A->symmetric_set              = PETSC_TRUE;
1799       A->structurally_symmetric_set = PETSC_TRUE;
1800     }
1801     break;
1802   case MAT_SYMMETRIC:
1803     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1804     break;
1805   case MAT_STRUCTURALLY_SYMMETRIC:
1806     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1807     break;
1808   case MAT_HERMITIAN:
1809     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1810     break;
1811   case MAT_SYMMETRY_ETERNAL:
1812     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1813     break;
1814   default:
1815     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1816   }
1817   PetscFunctionReturn(0);
1818 }
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatGetRow_MPIAIJ"
1822 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1823 {
1824   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1825   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1826   PetscErrorCode ierr;
1827   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1828   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1829   PetscInt       *cmap,*idx_p;
1830 
1831   PetscFunctionBegin;
1832   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1833   mat->getrowactive = PETSC_TRUE;
1834 
1835   if (!mat->rowvalues && (idx || v)) {
1836     /*
1837         allocate enough space to hold information from the longest row.
1838     */
1839     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1840     PetscInt   max = 1,tmp;
1841     for (i=0; i<matin->rmap->n; i++) {
1842       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1843       if (max < tmp) max = tmp;
1844     }
1845     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1846   }
1847 
1848   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1849   lrow = row - rstart;
1850 
1851   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1852   if (!v)   {pvA = 0; pvB = 0;}
1853   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1854   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1855   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1856   nztot = nzA + nzB;
1857 
1858   cmap = mat->garray;
1859   if (v  || idx) {
1860     if (nztot) {
1861       /* Sort by increasing column numbers, assuming A and B already sorted */
1862       PetscInt imark = -1;
1863       if (v) {
1864         *v = v_p = mat->rowvalues;
1865         for (i=0; i<nzB; i++) {
1866           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1867           else break;
1868         }
1869         imark = i;
1870         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1871         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1872       }
1873       if (idx) {
1874         *idx = idx_p = mat->rowindices;
1875         if (imark > -1) {
1876           for (i=0; i<imark; i++) {
1877             idx_p[i] = cmap[cworkB[i]];
1878           }
1879         } else {
1880           for (i=0; i<nzB; i++) {
1881             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1882             else break;
1883           }
1884           imark = i;
1885         }
1886         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1887         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1888       }
1889     } else {
1890       if (idx) *idx = 0;
1891       if (v)   *v   = 0;
1892     }
1893   }
1894   *nz  = nztot;
1895   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1896   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1902 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1903 {
1904   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1905 
1906   PetscFunctionBegin;
1907   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1908   aij->getrowactive = PETSC_FALSE;
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatNorm_MPIAIJ"
1914 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1915 {
1916   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1917   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1918   PetscErrorCode ierr;
1919   PetscInt       i,j,cstart = mat->cmap->rstart;
1920   PetscReal      sum = 0.0;
1921   MatScalar      *v;
1922 
1923   PetscFunctionBegin;
1924   if (aij->size == 1) {
1925     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1926   } else {
1927     if (type == NORM_FROBENIUS) {
1928       v = amat->a;
1929       for (i=0; i<amat->nz; i++) {
1930         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1931       }
1932       v = bmat->a;
1933       for (i=0; i<bmat->nz; i++) {
1934         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1935       }
1936       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1937       *norm = PetscSqrtReal(*norm);
1938     } else if (type == NORM_1) { /* max column norm */
1939       PetscReal *tmp,*tmp2;
1940       PetscInt  *jj,*garray = aij->garray;
1941       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1942       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1943       *norm = 0.0;
1944       v     = amat->a; jj = amat->j;
1945       for (j=0; j<amat->nz; j++) {
1946         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1947       }
1948       v = bmat->a; jj = bmat->j;
1949       for (j=0; j<bmat->nz; j++) {
1950         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1951       }
1952       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1953       for (j=0; j<mat->cmap->N; j++) {
1954         if (tmp2[j] > *norm) *norm = tmp2[j];
1955       }
1956       ierr = PetscFree(tmp);CHKERRQ(ierr);
1957       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1958     } else if (type == NORM_INFINITY) { /* max row norm */
1959       PetscReal ntemp = 0.0;
1960       for (j=0; j<aij->A->rmap->n; j++) {
1961         v   = amat->a + amat->i[j];
1962         sum = 0.0;
1963         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1964           sum += PetscAbsScalar(*v); v++;
1965         }
1966         v = bmat->a + bmat->i[j];
1967         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1968           sum += PetscAbsScalar(*v); v++;
1969         }
1970         if (sum > ntemp) ntemp = sum;
1971       }
1972       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1973     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1974   }
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatTranspose_MPIAIJ"
1980 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1981 {
1982   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1983   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1984   PetscErrorCode ierr;
1985   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1986   PetscInt       cstart = A->cmap->rstart,ncol;
1987   Mat            B;
1988   MatScalar      *array;
1989 
1990   PetscFunctionBegin;
1991   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1992 
1993   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1994   ai = Aloc->i; aj = Aloc->j;
1995   bi = Bloc->i; bj = Bloc->j;
1996   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1997     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1998     PetscSFNode          *oloc;
1999     PETSC_UNUSED PetscSF sf;
2000 
2001     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
2002     /* compute d_nnz for preallocation */
2003     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2004     for (i=0; i<ai[ma]; i++) {
2005       d_nnz[aj[i]]++;
2006       aj[i] += cstart; /* global col index to be used by MatSetValues() */
2007     }
2008     /* compute local off-diagonal contributions */
2009     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
2010     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
2011     /* map those to global */
2012     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
2013     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
2014     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
2015     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
2016     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2017     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2018     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2019 
2020     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2021     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2022     ierr = MatSetBlockSizes(B,A->cmap->bs,A->rmap->bs);CHKERRQ(ierr);
2023     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2024     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2025     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2026   } else {
2027     B    = *matout;
2028     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2029     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2030   }
2031 
2032   /* copy over the A part */
2033   array = Aloc->a;
2034   row   = A->rmap->rstart;
2035   for (i=0; i<ma; i++) {
2036     ncol = ai[i+1]-ai[i];
2037     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2038     row++;
2039     array += ncol; aj += ncol;
2040   }
2041   aj = Aloc->j;
2042   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2043 
2044   /* copy over the B part */
2045   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2046   array = Bloc->a;
2047   row   = A->rmap->rstart;
2048   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2049   cols_tmp = cols;
2050   for (i=0; i<mb; i++) {
2051     ncol = bi[i+1]-bi[i];
2052     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2053     row++;
2054     array += ncol; cols_tmp += ncol;
2055   }
2056   ierr = PetscFree(cols);CHKERRQ(ierr);
2057 
2058   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2059   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2060   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2061     *matout = B;
2062   } else {
2063     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2064   }
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2070 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2071 {
2072   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2073   Mat            a    = aij->A,b = aij->B;
2074   PetscErrorCode ierr;
2075   PetscInt       s1,s2,s3;
2076 
2077   PetscFunctionBegin;
2078   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2079   if (rr) {
2080     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2081     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2082     /* Overlap communication with computation. */
2083     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2084   }
2085   if (ll) {
2086     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2087     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2088     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2089   }
2090   /* scale  the diagonal block */
2091   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2092 
2093   if (rr) {
2094     /* Do a scatter end and then right scale the off-diagonal block */
2095     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2096     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2097   }
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 #undef __FUNCT__
2102 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2103 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2104 {
2105   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2106   PetscErrorCode ierr;
2107 
2108   PetscFunctionBegin;
2109   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 #undef __FUNCT__
2114 #define __FUNCT__ "MatEqual_MPIAIJ"
2115 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2116 {
2117   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2118   Mat            a,b,c,d;
2119   PetscBool      flg;
2120   PetscErrorCode ierr;
2121 
2122   PetscFunctionBegin;
2123   a = matA->A; b = matA->B;
2124   c = matB->A; d = matB->B;
2125 
2126   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2127   if (flg) {
2128     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2129   }
2130   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatCopy_MPIAIJ"
2136 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2137 {
2138   PetscErrorCode ierr;
2139   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2140   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2141 
2142   PetscFunctionBegin;
2143   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2144   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2145     /* because of the column compression in the off-processor part of the matrix a->B,
2146        the number of columns in a->B and b->B may be different, hence we cannot call
2147        the MatCopy() directly on the two parts. If need be, we can provide a more
2148        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2149        then copying the submatrices */
2150     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2151   } else {
2152     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2153     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2154   }
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatSetUp_MPIAIJ"
2160 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2161 {
2162   PetscErrorCode ierr;
2163 
2164   PetscFunctionBegin;
2165   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2171 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2172 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2173 {
2174   PetscInt       i,m=Y->rmap->N;
2175   Mat_SeqAIJ     *x  = (Mat_SeqAIJ*)X->data;
2176   Mat_SeqAIJ     *y  = (Mat_SeqAIJ*)Y->data;
2177   const PetscInt *xi = x->i,*yi = y->i;
2178 
2179   PetscFunctionBegin;
2180   /* Set the number of nonzeros in the new matrix */
2181   for (i=0; i<m; i++) {
2182     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
2183     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
2184     nnz[i] = 0;
2185     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2186       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2187       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2188       nnz[i]++;
2189     }
2190     for (; k<nzy; k++) nnz[i]++;
2191   }
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatAXPY_MPIAIJ"
2197 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2198 {
2199   PetscErrorCode ierr;
2200   PetscInt       i;
2201   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2202   PetscBLASInt   bnz,one=1;
2203   Mat_SeqAIJ     *x,*y;
2204 
2205   PetscFunctionBegin;
2206   if (str == SAME_NONZERO_PATTERN) {
2207     PetscScalar alpha = a;
2208     x    = (Mat_SeqAIJ*)xx->A->data;
2209     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2210     y    = (Mat_SeqAIJ*)yy->A->data;
2211     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2212     x    = (Mat_SeqAIJ*)xx->B->data;
2213     y    = (Mat_SeqAIJ*)yy->B->data;
2214     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2215     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2216   } else if (str == SUBSET_NONZERO_PATTERN) {
2217     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2218 
2219     x = (Mat_SeqAIJ*)xx->B->data;
2220     y = (Mat_SeqAIJ*)yy->B->data;
2221     if (y->xtoy && y->XtoY != xx->B) {
2222       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2223       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2224     }
2225     if (!y->xtoy) { /* get xtoy */
2226       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2227       y->XtoY = xx->B;
2228       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2229     }
2230     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2231   } else {
2232     Mat      B;
2233     PetscInt *nnz_d,*nnz_o;
2234     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2235     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2236     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2237     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2238     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2239     ierr = MatSetBlockSizes(B,Y->rmap->bs,Y->cmap->bs);CHKERRQ(ierr);
2240     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2241     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2242     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2243     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2244     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2245     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2246     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2247     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2248   }
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatConjugate_MPIAIJ"
2256 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2257 {
2258 #if defined(PETSC_USE_COMPLEX)
2259   PetscErrorCode ierr;
2260   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2261 
2262   PetscFunctionBegin;
2263   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2264   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2265 #else
2266   PetscFunctionBegin;
2267 #endif
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatRealPart_MPIAIJ"
2273 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2274 {
2275   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2280   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2286 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2287 {
2288   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2289   PetscErrorCode ierr;
2290 
2291   PetscFunctionBegin;
2292   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2293   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 #if defined(PETSC_HAVE_PBGL)
2298 
2299 #include <boost/parallel/mpi/bsp_process_group.hpp>
2300 #include <boost/graph/distributed/ilu_default_graph.hpp>
2301 #include <boost/graph/distributed/ilu_0_block.hpp>
2302 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2303 #include <boost/graph/distributed/petsc/interface.hpp>
2304 #include <boost/multi_array.hpp>
2305 #include <boost/parallel/distributed_property_map->hpp>
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2309 /*
2310   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2311 */
2312 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2313 {
2314   namespace petsc = boost::distributed::petsc;
2315 
2316   namespace graph_dist = boost::graph::distributed;
2317   using boost::graph::distributed::ilu_default::process_group_type;
2318   using boost::graph::ilu_permuted;
2319 
2320   PetscBool      row_identity, col_identity;
2321   PetscContainer c;
2322   PetscInt       m, n, M, N;
2323   PetscErrorCode ierr;
2324 
2325   PetscFunctionBegin;
2326   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2327   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2328   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2329   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2330 
2331   process_group_type pg;
2332   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2333   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2334   lgraph_type& level_graph = *lgraph_p;
2335   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2336 
2337   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2338   ilu_permuted(level_graph);
2339 
2340   /* put together the new matrix */
2341   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2342   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2343   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2344   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2345   ierr = MatSetBlockSizes(fact,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2346   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2347   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2348   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2349 
2350   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2351   ierr = PetscContainerSetPointer(c, lgraph_p);
2352   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2353   ierr = PetscContainerDestroy(&c);
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 #undef __FUNCT__
2358 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2359 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2360 {
2361   PetscFunctionBegin;
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatSolve_MPIAIJ"
2367 /*
2368   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2369 */
2370 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2371 {
2372   namespace graph_dist = boost::graph::distributed;
2373 
2374   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2375   lgraph_type    *lgraph_p;
2376   PetscContainer c;
2377   PetscErrorCode ierr;
2378 
2379   PetscFunctionBegin;
2380   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2381   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2382   ierr = VecCopy(b, x);CHKERRQ(ierr);
2383 
2384   PetscScalar *array_x;
2385   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2386   PetscInt sx;
2387   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2388 
2389   PetscScalar *array_b;
2390   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2391   PetscInt sb;
2392   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2393 
2394   lgraph_type& level_graph = *lgraph_p;
2395   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2396 
2397   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2398   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2399   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2400 
2401   typedef boost::iterator_property_map<array_ref_type::iterator,
2402                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2403   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2404   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2405 
2406   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2407   PetscFunctionReturn(0);
2408 }
2409 #endif
2410 
2411 #undef __FUNCT__
2412 #define __FUNCT__ "MatDestroy_MatRedundant"
2413 PetscErrorCode MatDestroy_MatRedundant(Mat A)
2414 {
2415   PetscErrorCode ierr;
2416   Mat_Redundant  *redund;
2417   PetscInt       i;
2418   PetscMPIInt    size;
2419 
2420   PetscFunctionBegin;
2421   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2422   if (size == 1) {
2423     Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
2424     redund = a->redundant;
2425   } else {
2426     Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
2427     redund = a->redundant;
2428   }
2429   if (redund){
2430     if (redund->matseq) { /* via MatGetSubMatrices()  */
2431       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
2432       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
2433       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
2434       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
2435     } else {
2436       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
2437       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
2438       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
2439       for (i=0; i<redund->nrecvs; i++) {
2440         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
2441         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
2442       }
2443       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
2444     }
2445 
2446     if (redund->psubcomm) {
2447       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
2448     }
2449     ierr = redund->Destroy(A);CHKERRQ(ierr);
2450     ierr = PetscFree(redund);CHKERRQ(ierr);
2451   }
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2457 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2458 {
2459   PetscMPIInt    rank,size;
2460   MPI_Comm       comm;
2461   PetscErrorCode ierr;
2462   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2463   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2464   PetscInt       *rowrange = mat->rmap->range;
2465   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2466   Mat            A = aij->A,B=aij->B,C=*matredundant;
2467   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2468   PetscScalar    *sbuf_a;
2469   PetscInt       nzlocal=a->nz+b->nz;
2470   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2471   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2472   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2473   MatScalar      *aworkA,*aworkB;
2474   PetscScalar    *vals;
2475   PetscMPIInt    tag1,tag2,tag3,imdex;
2476   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2477   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2478   MPI_Status     recv_status,*send_status;
2479   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2480   PetscInt       **rbuf_j=NULL;
2481   PetscScalar    **rbuf_a=NULL;
2482   Mat_Redundant  *redund =NULL;
2483 
2484   PetscFunctionBegin;
2485   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2486   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2487   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2488   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2489   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2490 
2491   if (reuse == MAT_REUSE_MATRIX) {
2492     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2493     if (subsize == 1) {
2494       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2495       redund = c->redundant;
2496     } else {
2497       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2498       redund = c->redundant;
2499     }
2500     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2501 
2502     nsends    = redund->nsends;
2503     nrecvs    = redund->nrecvs;
2504     send_rank = redund->send_rank;
2505     recv_rank = redund->recv_rank;
2506     sbuf_nz   = redund->sbuf_nz;
2507     rbuf_nz   = redund->rbuf_nz;
2508     sbuf_j    = redund->sbuf_j;
2509     sbuf_a    = redund->sbuf_a;
2510     rbuf_j    = redund->rbuf_j;
2511     rbuf_a    = redund->rbuf_a;
2512   }
2513 
2514   if (reuse == MAT_INITIAL_MATRIX) {
2515     PetscInt    nleftover,np_subcomm;
2516 
2517     /* get the destination processors' id send_rank, nsends and nrecvs */
2518     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2519 
2520     np_subcomm = size/nsubcomm;
2521     nleftover  = size - nsubcomm*np_subcomm;
2522 
2523     /* block of codes below is specific for INTERLACED */
2524     /* ------------------------------------------------*/
2525     nsends = 0; nrecvs = 0;
2526     for (i=0; i<size; i++) {
2527       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2528         send_rank[nsends++] = i;
2529         recv_rank[nrecvs++] = i;
2530       }
2531     }
2532     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2533       i = size-nleftover-1;
2534       j = 0;
2535       while (j < nsubcomm - nleftover) {
2536         send_rank[nsends++] = i;
2537         i--; j++;
2538       }
2539     }
2540 
2541     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2542       for (i=0; i<nleftover; i++) {
2543         recv_rank[nrecvs++] = size-nleftover+i;
2544       }
2545     }
2546     /*----------------------------------------------*/
2547 
2548     /* allocate sbuf_j, sbuf_a */
2549     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2550     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2551     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2552     /*
2553     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2554     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2555      */
2556   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2557 
2558   /* copy mat's local entries into the buffers */
2559   if (reuse == MAT_INITIAL_MATRIX) {
2560     rownz_max = 0;
2561     rptr      = sbuf_j;
2562     cols      = sbuf_j + rend-rstart + 1;
2563     vals      = sbuf_a;
2564     rptr[0]   = 0;
2565     for (i=0; i<rend-rstart; i++) {
2566       row    = i + rstart;
2567       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2568       ncols  = nzA + nzB;
2569       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2570       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2571       /* load the column indices for this row into cols */
2572       lwrite = 0;
2573       for (l=0; l<nzB; l++) {
2574         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2575           vals[lwrite]   = aworkB[l];
2576           cols[lwrite++] = ctmp;
2577         }
2578       }
2579       for (l=0; l<nzA; l++) {
2580         vals[lwrite]   = aworkA[l];
2581         cols[lwrite++] = cstart + cworkA[l];
2582       }
2583       for (l=0; l<nzB; l++) {
2584         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2585           vals[lwrite]   = aworkB[l];
2586           cols[lwrite++] = ctmp;
2587         }
2588       }
2589       vals     += ncols;
2590       cols     += ncols;
2591       rptr[i+1] = rptr[i] + ncols;
2592       if (rownz_max < ncols) rownz_max = ncols;
2593     }
2594     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2595   } else { /* only copy matrix values into sbuf_a */
2596     rptr    = sbuf_j;
2597     vals    = sbuf_a;
2598     rptr[0] = 0;
2599     for (i=0; i<rend-rstart; i++) {
2600       row    = i + rstart;
2601       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2602       ncols  = nzA + nzB;
2603       cworkB = b->j + b->i[i];
2604       aworkA = a->a + a->i[i];
2605       aworkB = b->a + b->i[i];
2606       lwrite = 0;
2607       for (l=0; l<nzB; l++) {
2608         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2609       }
2610       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2611       for (l=0; l<nzB; l++) {
2612         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2613       }
2614       vals     += ncols;
2615       rptr[i+1] = rptr[i] + ncols;
2616     }
2617   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2618 
2619   /* send nzlocal to others, and recv other's nzlocal */
2620   /*--------------------------------------------------*/
2621   if (reuse == MAT_INITIAL_MATRIX) {
2622     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2623 
2624     s_waits2 = s_waits3 + nsends;
2625     s_waits1 = s_waits2 + nsends;
2626     r_waits1 = s_waits1 + nsends;
2627     r_waits2 = r_waits1 + nrecvs;
2628     r_waits3 = r_waits2 + nrecvs;
2629   } else {
2630     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2631 
2632     r_waits3 = s_waits3 + nsends;
2633   }
2634 
2635   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2636   if (reuse == MAT_INITIAL_MATRIX) {
2637     /* get new tags to keep the communication clean */
2638     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2639     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2640     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2641 
2642     /* post receives of other's nzlocal */
2643     for (i=0; i<nrecvs; i++) {
2644       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2645     }
2646     /* send nzlocal to others */
2647     for (i=0; i<nsends; i++) {
2648       sbuf_nz[i] = nzlocal;
2649       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2650     }
2651     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2652     count = nrecvs;
2653     while (count) {
2654       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2655 
2656       recv_rank[imdex] = recv_status.MPI_SOURCE;
2657       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2658       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2659 
2660       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2661 
2662       rbuf_nz[imdex] += i + 2;
2663 
2664       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2665       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2666       count--;
2667     }
2668     /* wait on sends of nzlocal */
2669     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2670     /* send mat->i,j to others, and recv from other's */
2671     /*------------------------------------------------*/
2672     for (i=0; i<nsends; i++) {
2673       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2674       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2675     }
2676     /* wait on receives of mat->i,j */
2677     /*------------------------------*/
2678     count = nrecvs;
2679     while (count) {
2680       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2681       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2682       count--;
2683     }
2684     /* wait on sends of mat->i,j */
2685     /*---------------------------*/
2686     if (nsends) {
2687       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2688     }
2689   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2690 
2691   /* post receives, send and receive mat->a */
2692   /*----------------------------------------*/
2693   for (imdex=0; imdex<nrecvs; imdex++) {
2694     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2695   }
2696   for (i=0; i<nsends; i++) {
2697     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2698   }
2699   count = nrecvs;
2700   while (count) {
2701     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2702     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2703     count--;
2704   }
2705   if (nsends) {
2706     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2707   }
2708 
2709   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2710 
2711   /* create redundant matrix */
2712   /*-------------------------*/
2713   if (reuse == MAT_INITIAL_MATRIX) {
2714     const PetscInt *range;
2715     PetscInt       rstart_sub,rend_sub,mloc_sub;
2716 
2717     /* compute rownz_max for preallocation */
2718     for (imdex=0; imdex<nrecvs; imdex++) {
2719       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2720       rptr = rbuf_j[imdex];
2721       for (i=0; i<j; i++) {
2722         ncols = rptr[i+1] - rptr[i];
2723         if (rownz_max < ncols) rownz_max = ncols;
2724       }
2725     }
2726 
2727     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2728 
2729     /* get local size of redundant matrix
2730        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2731     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2732     rstart_sub = range[nsubcomm*subrank];
2733     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2734       rend_sub = range[nsubcomm*(subrank+1)];
2735     } else {
2736       rend_sub = mat->rmap->N;
2737     }
2738     mloc_sub = rend_sub - rstart_sub;
2739 
2740     if (M == N) {
2741       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2742     } else { /* non-square matrix */
2743       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2744     }
2745     ierr = MatSetBlockSizes(C,mat->rmap->bs,mat->cmap->bs);CHKERRQ(ierr);
2746     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2747     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2748     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2749   } else {
2750     C = *matredundant;
2751   }
2752 
2753   /* insert local matrix entries */
2754   rptr = sbuf_j;
2755   cols = sbuf_j + rend-rstart + 1;
2756   vals = sbuf_a;
2757   for (i=0; i<rend-rstart; i++) {
2758     row   = i + rstart;
2759     ncols = rptr[i+1] - rptr[i];
2760     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2761     vals += ncols;
2762     cols += ncols;
2763   }
2764   /* insert received matrix entries */
2765   for (imdex=0; imdex<nrecvs; imdex++) {
2766     rstart = rowrange[recv_rank[imdex]];
2767     rend   = rowrange[recv_rank[imdex]+1];
2768     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2769     rptr   = rbuf_j[imdex];
2770     cols   = rbuf_j[imdex] + rend-rstart + 1;
2771     vals   = rbuf_a[imdex];
2772     for (i=0; i<rend-rstart; i++) {
2773       row   = i + rstart;
2774       ncols = rptr[i+1] - rptr[i];
2775       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2776       vals += ncols;
2777       cols += ncols;
2778     }
2779   }
2780   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2781   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2782 
2783   if (reuse == MAT_INITIAL_MATRIX) {
2784     *matredundant = C;
2785 
2786     /* create a supporting struct and attach it to C for reuse */
2787     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2788     if (subsize == 1) {
2789       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2790       c->redundant = redund;
2791     } else {
2792       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2793       c->redundant = redund;
2794     }
2795 
2796     redund->nzlocal   = nzlocal;
2797     redund->nsends    = nsends;
2798     redund->nrecvs    = nrecvs;
2799     redund->send_rank = send_rank;
2800     redund->recv_rank = recv_rank;
2801     redund->sbuf_nz   = sbuf_nz;
2802     redund->rbuf_nz   = rbuf_nz;
2803     redund->sbuf_j    = sbuf_j;
2804     redund->sbuf_a    = sbuf_a;
2805     redund->rbuf_j    = rbuf_j;
2806     redund->rbuf_a    = rbuf_a;
2807     redund->psubcomm  = NULL;
2808 
2809     redund->Destroy = C->ops->destroy;
2810     C->ops->destroy = MatDestroy_MatRedundant;
2811   }
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 #undef __FUNCT__
2816 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2817 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2818 {
2819   PetscErrorCode ierr;
2820   MPI_Comm       comm;
2821   PetscMPIInt    size,subsize;
2822   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2823   Mat_Redundant  *redund=NULL;
2824   PetscSubcomm   psubcomm=NULL;
2825   MPI_Comm       subcomm_in=subcomm;
2826   Mat            *matseq;
2827   IS             isrow,iscol;
2828 
2829   PetscFunctionBegin;
2830   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2831     if (reuse ==  MAT_INITIAL_MATRIX) {
2832       /* create psubcomm, then get subcomm */
2833       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2834       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2835       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2836 
2837       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2838       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2839       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2840       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2841       subcomm = psubcomm->comm;
2842     } else { /* retrieve psubcomm and subcomm */
2843       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2844       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2845       if (subsize == 1) {
2846         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2847         redund = c->redundant;
2848       } else {
2849         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2850         redund = c->redundant;
2851       }
2852       psubcomm = redund->psubcomm;
2853     }
2854     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2855       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2856       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_MatRedundant() */
2857         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2858         if (subsize == 1) {
2859           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2860           c->redundant->psubcomm = psubcomm;
2861         } else {
2862           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2863           c->redundant->psubcomm = psubcomm ;
2864         }
2865       }
2866       PetscFunctionReturn(0);
2867     }
2868   }
2869 
2870   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2871   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2872   if (reuse == MAT_INITIAL_MATRIX) {
2873     /* create a local sequential matrix matseq[0] */
2874     mloc_sub = PETSC_DECIDE;
2875     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2876     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2877     rstart = rend - mloc_sub;
2878     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2879     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2880   } else { /* reuse == MAT_REUSE_MATRIX */
2881     if (subsize == 1) {
2882       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2883       redund = c->redundant;
2884     } else {
2885       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2886       redund = c->redundant;
2887     }
2888 
2889     isrow  = redund->isrow;
2890     iscol  = redund->iscol;
2891     matseq = redund->matseq;
2892   }
2893   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2894   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2895 
2896   if (reuse == MAT_INITIAL_MATRIX) {
2897     /* create a supporting struct and attach it to C for reuse */
2898     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2899     if (subsize == 1) {
2900       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2901       c->redundant = redund;
2902     } else {
2903       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2904       c->redundant = redund;
2905     }
2906     redund->isrow    = isrow;
2907     redund->iscol    = iscol;
2908     redund->matseq   = matseq;
2909     redund->psubcomm = psubcomm;
2910     redund->Destroy               = (*matredundant)->ops->destroy;
2911     (*matredundant)->ops->destroy = MatDestroy_MatRedundant;
2912   }
2913   PetscFunctionReturn(0);
2914 }
2915 
2916 #undef __FUNCT__
2917 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2918 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2919 {
2920   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2921   PetscErrorCode ierr;
2922   PetscInt       i,*idxb = 0;
2923   PetscScalar    *va,*vb;
2924   Vec            vtmp;
2925 
2926   PetscFunctionBegin;
2927   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2928   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2929   if (idx) {
2930     for (i=0; i<A->rmap->n; i++) {
2931       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2932     }
2933   }
2934 
2935   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2936   if (idx) {
2937     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2938   }
2939   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2940   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2941 
2942   for (i=0; i<A->rmap->n; i++) {
2943     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2944       va[i] = vb[i];
2945       if (idx) idx[i] = a->garray[idxb[i]];
2946     }
2947   }
2948 
2949   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2950   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2951   ierr = PetscFree(idxb);CHKERRQ(ierr);
2952   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2953   PetscFunctionReturn(0);
2954 }
2955 
2956 #undef __FUNCT__
2957 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2958 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2959 {
2960   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2961   PetscErrorCode ierr;
2962   PetscInt       i,*idxb = 0;
2963   PetscScalar    *va,*vb;
2964   Vec            vtmp;
2965 
2966   PetscFunctionBegin;
2967   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2968   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2969   if (idx) {
2970     for (i=0; i<A->cmap->n; i++) {
2971       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2972     }
2973   }
2974 
2975   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2976   if (idx) {
2977     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2978   }
2979   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2980   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2981 
2982   for (i=0; i<A->rmap->n; i++) {
2983     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2984       va[i] = vb[i];
2985       if (idx) idx[i] = a->garray[idxb[i]];
2986     }
2987   }
2988 
2989   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2990   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2991   ierr = PetscFree(idxb);CHKERRQ(ierr);
2992   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2993   PetscFunctionReturn(0);
2994 }
2995 
2996 #undef __FUNCT__
2997 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2998 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2999 {
3000   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3001   PetscInt       n      = A->rmap->n;
3002   PetscInt       cstart = A->cmap->rstart;
3003   PetscInt       *cmap  = mat->garray;
3004   PetscInt       *diagIdx, *offdiagIdx;
3005   Vec            diagV, offdiagV;
3006   PetscScalar    *a, *diagA, *offdiagA;
3007   PetscInt       r;
3008   PetscErrorCode ierr;
3009 
3010   PetscFunctionBegin;
3011   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
3012   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
3013   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
3014   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3015   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3016   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3017   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3018   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3019   for (r = 0; r < n; ++r) {
3020     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
3021       a[r]   = diagA[r];
3022       idx[r] = cstart + diagIdx[r];
3023     } else {
3024       a[r]   = offdiagA[r];
3025       idx[r] = cmap[offdiagIdx[r]];
3026     }
3027   }
3028   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3029   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3030   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3031   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3032   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3033   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3034   PetscFunctionReturn(0);
3035 }
3036 
3037 #undef __FUNCT__
3038 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
3039 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
3040 {
3041   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
3042   PetscInt       n      = A->rmap->n;
3043   PetscInt       cstart = A->cmap->rstart;
3044   PetscInt       *cmap  = mat->garray;
3045   PetscInt       *diagIdx, *offdiagIdx;
3046   Vec            diagV, offdiagV;
3047   PetscScalar    *a, *diagA, *offdiagA;
3048   PetscInt       r;
3049   PetscErrorCode ierr;
3050 
3051   PetscFunctionBegin;
3052   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
3053   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3054   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3055   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3056   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3057   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3058   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3059   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3060   for (r = 0; r < n; ++r) {
3061     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3062       a[r]   = diagA[r];
3063       idx[r] = cstart + diagIdx[r];
3064     } else {
3065       a[r]   = offdiagA[r];
3066       idx[r] = cmap[offdiagIdx[r]];
3067     }
3068   }
3069   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3070   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3071   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3072   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3073   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3074   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3075   PetscFunctionReturn(0);
3076 }
3077 
3078 #undef __FUNCT__
3079 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3080 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3081 {
3082   PetscErrorCode ierr;
3083   Mat            *dummy;
3084 
3085   PetscFunctionBegin;
3086   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3087   *newmat = *dummy;
3088   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 #undef __FUNCT__
3093 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3094 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3095 {
3096   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3097   PetscErrorCode ierr;
3098 
3099   PetscFunctionBegin;
3100   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3101   PetscFunctionReturn(0);
3102 }
3103 
3104 #undef __FUNCT__
3105 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3106 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3107 {
3108   PetscErrorCode ierr;
3109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3110 
3111   PetscFunctionBegin;
3112   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3113   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3114   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3115   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3116   PetscFunctionReturn(0);
3117 }
3118 
3119 /* -------------------------------------------------------------------*/
3120 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3121                                        MatGetRow_MPIAIJ,
3122                                        MatRestoreRow_MPIAIJ,
3123                                        MatMult_MPIAIJ,
3124                                 /* 4*/ MatMultAdd_MPIAIJ,
3125                                        MatMultTranspose_MPIAIJ,
3126                                        MatMultTransposeAdd_MPIAIJ,
3127 #if defined(PETSC_HAVE_PBGL)
3128                                        MatSolve_MPIAIJ,
3129 #else
3130                                        0,
3131 #endif
3132                                        0,
3133                                        0,
3134                                 /*10*/ 0,
3135                                        0,
3136                                        0,
3137                                        MatSOR_MPIAIJ,
3138                                        MatTranspose_MPIAIJ,
3139                                 /*15*/ MatGetInfo_MPIAIJ,
3140                                        MatEqual_MPIAIJ,
3141                                        MatGetDiagonal_MPIAIJ,
3142                                        MatDiagonalScale_MPIAIJ,
3143                                        MatNorm_MPIAIJ,
3144                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3145                                        MatAssemblyEnd_MPIAIJ,
3146                                        MatSetOption_MPIAIJ,
3147                                        MatZeroEntries_MPIAIJ,
3148                                 /*24*/ MatZeroRows_MPIAIJ,
3149                                        0,
3150 #if defined(PETSC_HAVE_PBGL)
3151                                        0,
3152 #else
3153                                        0,
3154 #endif
3155                                        0,
3156                                        0,
3157                                 /*29*/ MatSetUp_MPIAIJ,
3158 #if defined(PETSC_HAVE_PBGL)
3159                                        0,
3160 #else
3161                                        0,
3162 #endif
3163                                        0,
3164                                        0,
3165                                        0,
3166                                 /*34*/ MatDuplicate_MPIAIJ,
3167                                        0,
3168                                        0,
3169                                        0,
3170                                        0,
3171                                 /*39*/ MatAXPY_MPIAIJ,
3172                                        MatGetSubMatrices_MPIAIJ,
3173                                        MatIncreaseOverlap_MPIAIJ,
3174                                        MatGetValues_MPIAIJ,
3175                                        MatCopy_MPIAIJ,
3176                                 /*44*/ MatGetRowMax_MPIAIJ,
3177                                        MatScale_MPIAIJ,
3178                                        0,
3179                                        0,
3180                                        MatZeroRowsColumns_MPIAIJ,
3181                                 /*49*/ MatSetRandom_MPIAIJ,
3182                                        0,
3183                                        0,
3184                                        0,
3185                                        0,
3186                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3187                                        0,
3188                                        MatSetUnfactored_MPIAIJ,
3189                                        MatPermute_MPIAIJ,
3190                                        0,
3191                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3192                                        MatDestroy_MPIAIJ,
3193                                        MatView_MPIAIJ,
3194                                        0,
3195                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3196                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3197                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3198                                        0,
3199                                        0,
3200                                        0,
3201                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3202                                        MatGetRowMinAbs_MPIAIJ,
3203                                        0,
3204                                        MatSetColoring_MPIAIJ,
3205                                        0,
3206                                        MatSetValuesAdifor_MPIAIJ,
3207                                 /*75*/ MatFDColoringApply_AIJ,
3208                                        0,
3209                                        0,
3210                                        0,
3211                                        MatFindZeroDiagonals_MPIAIJ,
3212                                 /*80*/ 0,
3213                                        0,
3214                                        0,
3215                                 /*83*/ MatLoad_MPIAIJ,
3216                                        0,
3217                                        0,
3218                                        0,
3219                                        0,
3220                                        0,
3221                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3222                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3223                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3224                                        MatPtAP_MPIAIJ_MPIAIJ,
3225                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3226                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3227                                        0,
3228                                        0,
3229                                        0,
3230                                        0,
3231                                 /*99*/ 0,
3232                                        0,
3233                                        0,
3234                                        MatConjugate_MPIAIJ,
3235                                        0,
3236                                 /*104*/MatSetValuesRow_MPIAIJ,
3237                                        MatRealPart_MPIAIJ,
3238                                        MatImaginaryPart_MPIAIJ,
3239                                        0,
3240                                        0,
3241                                 /*109*/0,
3242                                        MatGetRedundantMatrix_MPIAIJ,
3243                                        MatGetRowMin_MPIAIJ,
3244                                        0,
3245                                        0,
3246                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3247                                        0,
3248                                        0,
3249                                        0,
3250                                        0,
3251                                 /*119*/0,
3252                                        0,
3253                                        0,
3254                                        0,
3255                                        MatGetMultiProcBlock_MPIAIJ,
3256                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3257                                        MatGetColumnNorms_MPIAIJ,
3258                                        MatInvertBlockDiagonal_MPIAIJ,
3259                                        0,
3260                                        MatGetSubMatricesParallel_MPIAIJ,
3261                                 /*129*/0,
3262                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3263                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3264                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3265                                        0,
3266                                 /*134*/0,
3267                                        0,
3268                                        0,
3269                                        0,
3270                                        0,
3271                                 /*139*/0,
3272                                        0,
3273                                        0,
3274                                        MatFDColoringSetUp_MPIXAIJ
3275 };
3276 
3277 /* ----------------------------------------------------------------------------------------*/
3278 
3279 #undef __FUNCT__
3280 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3281 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3282 {
3283   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3284   PetscErrorCode ierr;
3285 
3286   PetscFunctionBegin;
3287   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3288   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3289   PetscFunctionReturn(0);
3290 }
3291 
3292 #undef __FUNCT__
3293 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3294 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3295 {
3296   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3297   PetscErrorCode ierr;
3298 
3299   PetscFunctionBegin;
3300   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3301   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3302   PetscFunctionReturn(0);
3303 }
3304 
3305 #undef __FUNCT__
3306 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3307 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3308 {
3309   Mat_MPIAIJ     *b;
3310   PetscErrorCode ierr;
3311 
3312   PetscFunctionBegin;
3313   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3314   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3315   b = (Mat_MPIAIJ*)B->data;
3316 
3317   if (!B->preallocated) {
3318     /* Explicitly create 2 MATSEQAIJ matrices. */
3319     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3320     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3321     ierr = MatSetBlockSizes(b->A,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3322     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3323     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3324     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3325     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3326     ierr = MatSetBlockSizes(b->B,B->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
3327     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3328     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3329   }
3330 
3331   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3332   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3333   B->preallocated = PETSC_TRUE;
3334   PetscFunctionReturn(0);
3335 }
3336 
3337 #undef __FUNCT__
3338 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3339 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3340 {
3341   Mat            mat;
3342   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3343   PetscErrorCode ierr;
3344 
3345   PetscFunctionBegin;
3346   *newmat = 0;
3347   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3348   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3349   ierr    = MatSetBlockSizes(mat,matin->rmap->bs,matin->cmap->bs);CHKERRQ(ierr);
3350   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3351   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3352   a       = (Mat_MPIAIJ*)mat->data;
3353 
3354   mat->factortype   = matin->factortype;
3355   mat->rmap->bs     = matin->rmap->bs;
3356   mat->cmap->bs     = matin->cmap->bs;
3357   mat->assembled    = PETSC_TRUE;
3358   mat->insertmode   = NOT_SET_VALUES;
3359   mat->preallocated = PETSC_TRUE;
3360 
3361   a->size         = oldmat->size;
3362   a->rank         = oldmat->rank;
3363   a->donotstash   = oldmat->donotstash;
3364   a->roworiented  = oldmat->roworiented;
3365   a->rowindices   = 0;
3366   a->rowvalues    = 0;
3367   a->getrowactive = PETSC_FALSE;
3368 
3369   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3370   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3371 
3372   if (oldmat->colmap) {
3373 #if defined(PETSC_USE_CTABLE)
3374     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3375 #else
3376     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3377     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3378     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3379 #endif
3380   } else a->colmap = 0;
3381   if (oldmat->garray) {
3382     PetscInt len;
3383     len  = oldmat->B->cmap->n;
3384     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3385     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3386     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3387   } else a->garray = 0;
3388 
3389   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3390   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3391   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3392   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3393   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3394   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3395   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3396   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3397   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3398   *newmat = mat;
3399   PetscFunctionReturn(0);
3400 }
3401 
3402 
3403 
3404 #undef __FUNCT__
3405 #define __FUNCT__ "MatLoad_MPIAIJ"
3406 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3407 {
3408   PetscScalar    *vals,*svals;
3409   MPI_Comm       comm;
3410   PetscErrorCode ierr;
3411   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3412   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3413   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3414   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3415   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3416   int            fd;
3417   PetscInt       bs = 1;
3418 
3419   PetscFunctionBegin;
3420   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3421   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3422   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3423   if (!rank) {
3424     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3425     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3426     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3427   }
3428 
3429   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3430   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3431   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3432 
3433   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3434 
3435   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3436   M    = header[1]; N = header[2];
3437   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3438   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3439   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3440 
3441   /* If global sizes are set, check if they are consistent with that given in the file */
3442   if (sizesset) {
3443     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3444   }
3445   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3446   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3447 
3448   /* determine ownership of all (block) rows */
3449   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3450   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3451   else m = newMat->rmap->n; /* Set by user */
3452 
3453   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3454   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3455 
3456   /* First process needs enough room for process with most rows */
3457   if (!rank) {
3458     mmax = rowners[1];
3459     for (i=2; i<=size; i++) {
3460       mmax = PetscMax(mmax, rowners[i]);
3461     }
3462   } else mmax = -1;             /* unused, but compilers complain */
3463 
3464   rowners[0] = 0;
3465   for (i=2; i<=size; i++) {
3466     rowners[i] += rowners[i-1];
3467   }
3468   rstart = rowners[rank];
3469   rend   = rowners[rank+1];
3470 
3471   /* distribute row lengths to all processors */
3472   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3473   if (!rank) {
3474     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3475     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3476     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3477     for (j=0; j<m; j++) {
3478       procsnz[0] += ourlens[j];
3479     }
3480     for (i=1; i<size; i++) {
3481       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3482       /* calculate the number of nonzeros on each processor */
3483       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3484         procsnz[i] += rowlengths[j];
3485       }
3486       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3487     }
3488     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3489   } else {
3490     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3491   }
3492 
3493   if (!rank) {
3494     /* determine max buffer needed and allocate it */
3495     maxnz = 0;
3496     for (i=0; i<size; i++) {
3497       maxnz = PetscMax(maxnz,procsnz[i]);
3498     }
3499     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3500 
3501     /* read in my part of the matrix column indices  */
3502     nz   = procsnz[0];
3503     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3504     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3505 
3506     /* read in every one elses and ship off */
3507     for (i=1; i<size; i++) {
3508       nz   = procsnz[i];
3509       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3510       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3511     }
3512     ierr = PetscFree(cols);CHKERRQ(ierr);
3513   } else {
3514     /* determine buffer space needed for message */
3515     nz = 0;
3516     for (i=0; i<m; i++) {
3517       nz += ourlens[i];
3518     }
3519     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3520 
3521     /* receive message of column indices*/
3522     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3523   }
3524 
3525   /* determine column ownership if matrix is not square */
3526   if (N != M) {
3527     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3528     else n = newMat->cmap->n;
3529     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3530     cstart = cend - n;
3531   } else {
3532     cstart = rstart;
3533     cend   = rend;
3534     n      = cend - cstart;
3535   }
3536 
3537   /* loop over local rows, determining number of off diagonal entries */
3538   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3539   jj   = 0;
3540   for (i=0; i<m; i++) {
3541     for (j=0; j<ourlens[i]; j++) {
3542       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3543       jj++;
3544     }
3545   }
3546 
3547   for (i=0; i<m; i++) {
3548     ourlens[i] -= offlens[i];
3549   }
3550   if (!sizesset) {
3551     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3552   }
3553 
3554   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3555 
3556   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3557 
3558   for (i=0; i<m; i++) {
3559     ourlens[i] += offlens[i];
3560   }
3561 
3562   if (!rank) {
3563     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3564 
3565     /* read in my part of the matrix numerical values  */
3566     nz   = procsnz[0];
3567     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3568 
3569     /* insert into matrix */
3570     jj      = rstart;
3571     smycols = mycols;
3572     svals   = vals;
3573     for (i=0; i<m; i++) {
3574       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3575       smycols += ourlens[i];
3576       svals   += ourlens[i];
3577       jj++;
3578     }
3579 
3580     /* read in other processors and ship out */
3581     for (i=1; i<size; i++) {
3582       nz   = procsnz[i];
3583       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3584       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3585     }
3586     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3587   } else {
3588     /* receive numeric values */
3589     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3590 
3591     /* receive message of values*/
3592     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3593 
3594     /* insert into matrix */
3595     jj      = rstart;
3596     smycols = mycols;
3597     svals   = vals;
3598     for (i=0; i<m; i++) {
3599       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3600       smycols += ourlens[i];
3601       svals   += ourlens[i];
3602       jj++;
3603     }
3604   }
3605   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3606   ierr = PetscFree(vals);CHKERRQ(ierr);
3607   ierr = PetscFree(mycols);CHKERRQ(ierr);
3608   ierr = PetscFree(rowners);CHKERRQ(ierr);
3609   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3610   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3611   PetscFunctionReturn(0);
3612 }
3613 
3614 #undef __FUNCT__
3615 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3616 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3617 {
3618   PetscErrorCode ierr;
3619   IS             iscol_local;
3620   PetscInt       csize;
3621 
3622   PetscFunctionBegin;
3623   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3624   if (call == MAT_REUSE_MATRIX) {
3625     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3626     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3627   } else {
3628     PetscInt cbs;
3629     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3630     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3631     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3632   }
3633   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3634   if (call == MAT_INITIAL_MATRIX) {
3635     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3636     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3637   }
3638   PetscFunctionReturn(0);
3639 }
3640 
3641 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3642 #undef __FUNCT__
3643 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3644 /*
3645     Not great since it makes two copies of the submatrix, first an SeqAIJ
3646   in local and then by concatenating the local matrices the end result.
3647   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3648 
3649   Note: This requires a sequential iscol with all indices.
3650 */
3651 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3652 {
3653   PetscErrorCode ierr;
3654   PetscMPIInt    rank,size;
3655   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3656   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3657   PetscBool      allcolumns, colflag;
3658   Mat            M,Mreuse;
3659   MatScalar      *vwork,*aa;
3660   MPI_Comm       comm;
3661   Mat_SeqAIJ     *aij;
3662 
3663   PetscFunctionBegin;
3664   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3665   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3666   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3667 
3668   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3669   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3670   if (colflag && ncol == mat->cmap->N) {
3671     allcolumns = PETSC_TRUE;
3672   } else {
3673     allcolumns = PETSC_FALSE;
3674   }
3675   if (call ==  MAT_REUSE_MATRIX) {
3676     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3677     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3678     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3679   } else {
3680     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3681   }
3682 
3683   /*
3684       m - number of local rows
3685       n - number of columns (same on all processors)
3686       rstart - first row in new global matrix generated
3687   */
3688   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3689   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3690   if (call == MAT_INITIAL_MATRIX) {
3691     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3692     ii  = aij->i;
3693     jj  = aij->j;
3694 
3695     /*
3696         Determine the number of non-zeros in the diagonal and off-diagonal
3697         portions of the matrix in order to do correct preallocation
3698     */
3699 
3700     /* first get start and end of "diagonal" columns */
3701     if (csize == PETSC_DECIDE) {
3702       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3703       if (mglobal == n) { /* square matrix */
3704         nlocal = m;
3705       } else {
3706         nlocal = n/size + ((n % size) > rank);
3707       }
3708     } else {
3709       nlocal = csize;
3710     }
3711     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3712     rstart = rend - nlocal;
3713     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3714 
3715     /* next, compute all the lengths */
3716     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3717     olens = dlens + m;
3718     for (i=0; i<m; i++) {
3719       jend = ii[i+1] - ii[i];
3720       olen = 0;
3721       dlen = 0;
3722       for (j=0; j<jend; j++) {
3723         if (*jj < rstart || *jj >= rend) olen++;
3724         else dlen++;
3725         jj++;
3726       }
3727       olens[i] = olen;
3728       dlens[i] = dlen;
3729     }
3730     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3731     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3732     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3733     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3734     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3735     ierr = PetscFree(dlens);CHKERRQ(ierr);
3736   } else {
3737     PetscInt ml,nl;
3738 
3739     M    = *newmat;
3740     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3741     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3742     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3743     /*
3744          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3745        rather than the slower MatSetValues().
3746     */
3747     M->was_assembled = PETSC_TRUE;
3748     M->assembled     = PETSC_FALSE;
3749   }
3750   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3751   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3752   ii   = aij->i;
3753   jj   = aij->j;
3754   aa   = aij->a;
3755   for (i=0; i<m; i++) {
3756     row   = rstart + i;
3757     nz    = ii[i+1] - ii[i];
3758     cwork = jj;     jj += nz;
3759     vwork = aa;     aa += nz;
3760     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3761   }
3762 
3763   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3764   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3765   *newmat = M;
3766 
3767   /* save submatrix used in processor for next request */
3768   if (call ==  MAT_INITIAL_MATRIX) {
3769     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3770     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3771   }
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 #undef __FUNCT__
3776 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3777 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3778 {
3779   PetscInt       m,cstart, cend,j,nnz,i,d;
3780   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3781   const PetscInt *JJ;
3782   PetscScalar    *values;
3783   PetscErrorCode ierr;
3784 
3785   PetscFunctionBegin;
3786   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3787 
3788   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3789   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3790   m      = B->rmap->n;
3791   cstart = B->cmap->rstart;
3792   cend   = B->cmap->rend;
3793   rstart = B->rmap->rstart;
3794 
3795   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3796 
3797 #if defined(PETSC_USE_DEBUGGING)
3798   for (i=0; i<m; i++) {
3799     nnz = Ii[i+1]- Ii[i];
3800     JJ  = J + Ii[i];
3801     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3802     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3803     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3804   }
3805 #endif
3806 
3807   for (i=0; i<m; i++) {
3808     nnz     = Ii[i+1]- Ii[i];
3809     JJ      = J + Ii[i];
3810     nnz_max = PetscMax(nnz_max,nnz);
3811     d       = 0;
3812     for (j=0; j<nnz; j++) {
3813       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3814     }
3815     d_nnz[i] = d;
3816     o_nnz[i] = nnz - d;
3817   }
3818   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3819   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3820 
3821   if (v) values = (PetscScalar*)v;
3822   else {
3823     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3824   }
3825 
3826   for (i=0; i<m; i++) {
3827     ii   = i + rstart;
3828     nnz  = Ii[i+1]- Ii[i];
3829     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3830   }
3831   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3832   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3833 
3834   if (!v) {
3835     ierr = PetscFree(values);CHKERRQ(ierr);
3836   }
3837   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3838   PetscFunctionReturn(0);
3839 }
3840 
3841 #undef __FUNCT__
3842 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3843 /*@
3844    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3845    (the default parallel PETSc format).
3846 
3847    Collective on MPI_Comm
3848 
3849    Input Parameters:
3850 +  B - the matrix
3851 .  i - the indices into j for the start of each local row (starts with zero)
3852 .  j - the column indices for each local row (starts with zero)
3853 -  v - optional values in the matrix
3854 
3855    Level: developer
3856 
3857    Notes:
3858        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3859      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3860      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3861 
3862        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3863 
3864        The format which is used for the sparse matrix input, is equivalent to a
3865     row-major ordering.. i.e for the following matrix, the input data expected is
3866     as shown:
3867 
3868         1 0 0
3869         2 0 3     P0
3870        -------
3871         4 5 6     P1
3872 
3873      Process0 [P0]: rows_owned=[0,1]
3874         i =  {0,1,3}  [size = nrow+1  = 2+1]
3875         j =  {0,0,2}  [size = nz = 6]
3876         v =  {1,2,3}  [size = nz = 6]
3877 
3878      Process1 [P1]: rows_owned=[2]
3879         i =  {0,3}    [size = nrow+1  = 1+1]
3880         j =  {0,1,2}  [size = nz = 6]
3881         v =  {4,5,6}  [size = nz = 6]
3882 
3883 .keywords: matrix, aij, compressed row, sparse, parallel
3884 
3885 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3886           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3887 @*/
3888 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3889 {
3890   PetscErrorCode ierr;
3891 
3892   PetscFunctionBegin;
3893   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3894   PetscFunctionReturn(0);
3895 }
3896 
3897 #undef __FUNCT__
3898 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3899 /*@C
3900    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3901    (the default parallel PETSc format).  For good matrix assembly performance
3902    the user should preallocate the matrix storage by setting the parameters
3903    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3904    performance can be increased by more than a factor of 50.
3905 
3906    Collective on MPI_Comm
3907 
3908    Input Parameters:
3909 +  A - the matrix
3910 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3911            (same value is used for all local rows)
3912 .  d_nnz - array containing the number of nonzeros in the various rows of the
3913            DIAGONAL portion of the local submatrix (possibly different for each row)
3914            or NULL, if d_nz is used to specify the nonzero structure.
3915            The size of this array is equal to the number of local rows, i.e 'm'.
3916            For matrices that will be factored, you must leave room for (and set)
3917            the diagonal entry even if it is zero.
3918 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3919            submatrix (same value is used for all local rows).
3920 -  o_nnz - array containing the number of nonzeros in the various rows of the
3921            OFF-DIAGONAL portion of the local submatrix (possibly different for
3922            each row) or NULL, if o_nz is used to specify the nonzero
3923            structure. The size of this array is equal to the number
3924            of local rows, i.e 'm'.
3925 
3926    If the *_nnz parameter is given then the *_nz parameter is ignored
3927 
3928    The AIJ format (also called the Yale sparse matrix format or
3929    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3930    storage.  The stored row and column indices begin with zero.
3931    See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3932 
3933    The parallel matrix is partitioned such that the first m0 rows belong to
3934    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3935    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3936 
3937    The DIAGONAL portion of the local submatrix of a processor can be defined
3938    as the submatrix which is obtained by extraction the part corresponding to
3939    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3940    first row that belongs to the processor, r2 is the last row belonging to
3941    the this processor, and c1-c2 is range of indices of the local part of a
3942    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3943    common case of a square matrix, the row and column ranges are the same and
3944    the DIAGONAL part is also square. The remaining portion of the local
3945    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3946 
3947    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3948 
3949    You can call MatGetInfo() to get information on how effective the preallocation was;
3950    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3951    You can also run with the option -info and look for messages with the string
3952    malloc in them to see if additional memory allocation was needed.
3953 
3954    Example usage:
3955 
3956    Consider the following 8x8 matrix with 34 non-zero values, that is
3957    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3958    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3959    as follows:
3960 
3961 .vb
3962             1  2  0  |  0  3  0  |  0  4
3963     Proc0   0  5  6  |  7  0  0  |  8  0
3964             9  0 10  | 11  0  0  | 12  0
3965     -------------------------------------
3966            13  0 14  | 15 16 17  |  0  0
3967     Proc1   0 18  0  | 19 20 21  |  0  0
3968             0  0  0  | 22 23  0  | 24  0
3969     -------------------------------------
3970     Proc2  25 26 27  |  0  0 28  | 29  0
3971            30  0  0  | 31 32 33  |  0 34
3972 .ve
3973 
3974    This can be represented as a collection of submatrices as:
3975 
3976 .vb
3977       A B C
3978       D E F
3979       G H I
3980 .ve
3981 
3982    Where the submatrices A,B,C are owned by proc0, D,E,F are
3983    owned by proc1, G,H,I are owned by proc2.
3984 
3985    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3986    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3987    The 'M','N' parameters are 8,8, and have the same values on all procs.
3988 
3989    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3990    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3991    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3992    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3993    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3994    matrix, ans [DF] as another SeqAIJ matrix.
3995 
3996    When d_nz, o_nz parameters are specified, d_nz storage elements are
3997    allocated for every row of the local diagonal submatrix, and o_nz
3998    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3999    One way to choose d_nz and o_nz is to use the max nonzerors per local
4000    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4001    In this case, the values of d_nz,o_nz are:
4002 .vb
4003      proc0 : dnz = 2, o_nz = 2
4004      proc1 : dnz = 3, o_nz = 2
4005      proc2 : dnz = 1, o_nz = 4
4006 .ve
4007    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4008    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4009    for proc3. i.e we are using 12+15+10=37 storage locations to store
4010    34 values.
4011 
4012    When d_nnz, o_nnz parameters are specified, the storage is specified
4013    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4014    In the above case the values for d_nnz,o_nnz are:
4015 .vb
4016      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4017      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4018      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4019 .ve
4020    Here the space allocated is sum of all the above values i.e 34, and
4021    hence pre-allocation is perfect.
4022 
4023    Level: intermediate
4024 
4025 .keywords: matrix, aij, compressed row, sparse, parallel
4026 
4027 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4028           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
4029 @*/
4030 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4031 {
4032   PetscErrorCode ierr;
4033 
4034   PetscFunctionBegin;
4035   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4036   PetscValidType(B,1);
4037   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4038   PetscFunctionReturn(0);
4039 }
4040 
4041 #undef __FUNCT__
4042 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
4043 /*@
4044      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4045          CSR format the local rows.
4046 
4047    Collective on MPI_Comm
4048 
4049    Input Parameters:
4050 +  comm - MPI communicator
4051 .  m - number of local rows (Cannot be PETSC_DECIDE)
4052 .  n - This value should be the same as the local size used in creating the
4053        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4054        calculated if N is given) For square matrices n is almost always m.
4055 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4056 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4057 .   i - row indices
4058 .   j - column indices
4059 -   a - matrix values
4060 
4061    Output Parameter:
4062 .   mat - the matrix
4063 
4064    Level: intermediate
4065 
4066    Notes:
4067        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4068      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4069      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4070 
4071        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4072 
4073        The format which is used for the sparse matrix input, is equivalent to a
4074     row-major ordering.. i.e for the following matrix, the input data expected is
4075     as shown:
4076 
4077         1 0 0
4078         2 0 3     P0
4079        -------
4080         4 5 6     P1
4081 
4082      Process0 [P0]: rows_owned=[0,1]
4083         i =  {0,1,3}  [size = nrow+1  = 2+1]
4084         j =  {0,0,2}  [size = nz = 6]
4085         v =  {1,2,3}  [size = nz = 6]
4086 
4087      Process1 [P1]: rows_owned=[2]
4088         i =  {0,3}    [size = nrow+1  = 1+1]
4089         j =  {0,1,2}  [size = nz = 6]
4090         v =  {4,5,6}  [size = nz = 6]
4091 
4092 .keywords: matrix, aij, compressed row, sparse, parallel
4093 
4094 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4095           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4096 @*/
4097 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4098 {
4099   PetscErrorCode ierr;
4100 
4101   PetscFunctionBegin;
4102   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4103   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4104   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4105   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4106   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4107   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4108   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4109   PetscFunctionReturn(0);
4110 }
4111 
4112 #undef __FUNCT__
4113 #define __FUNCT__ "MatCreateAIJ"
4114 /*@C
4115    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4116    (the default parallel PETSc format).  For good matrix assembly performance
4117    the user should preallocate the matrix storage by setting the parameters
4118    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4119    performance can be increased by more than a factor of 50.
4120 
4121    Collective on MPI_Comm
4122 
4123    Input Parameters:
4124 +  comm - MPI communicator
4125 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4126            This value should be the same as the local size used in creating the
4127            y vector for the matrix-vector product y = Ax.
4128 .  n - This value should be the same as the local size used in creating the
4129        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4130        calculated if N is given) For square matrices n is almost always m.
4131 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4132 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4133 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4134            (same value is used for all local rows)
4135 .  d_nnz - array containing the number of nonzeros in the various rows of the
4136            DIAGONAL portion of the local submatrix (possibly different for each row)
4137            or NULL, if d_nz is used to specify the nonzero structure.
4138            The size of this array is equal to the number of local rows, i.e 'm'.
4139 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4140            submatrix (same value is used for all local rows).
4141 -  o_nnz - array containing the number of nonzeros in the various rows of the
4142            OFF-DIAGONAL portion of the local submatrix (possibly different for
4143            each row) or NULL, if o_nz is used to specify the nonzero
4144            structure. The size of this array is equal to the number
4145            of local rows, i.e 'm'.
4146 
4147    Output Parameter:
4148 .  A - the matrix
4149 
4150    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4151    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4152    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4153 
4154    Notes:
4155    If the *_nnz parameter is given then the *_nz parameter is ignored
4156 
4157    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4158    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4159    storage requirements for this matrix.
4160 
4161    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4162    processor than it must be used on all processors that share the object for
4163    that argument.
4164 
4165    The user MUST specify either the local or global matrix dimensions
4166    (possibly both).
4167 
4168    The parallel matrix is partitioned across processors such that the
4169    first m0 rows belong to process 0, the next m1 rows belong to
4170    process 1, the next m2 rows belong to process 2 etc.. where
4171    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4172    values corresponding to [m x N] submatrix.
4173 
4174    The columns are logically partitioned with the n0 columns belonging
4175    to 0th partition, the next n1 columns belonging to the next
4176    partition etc.. where n0,n1,n2... are the the input parameter 'n'.
4177 
4178    The DIAGONAL portion of the local submatrix on any given processor
4179    is the submatrix corresponding to the rows and columns m,n
4180    corresponding to the given processor. i.e diagonal matrix on
4181    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4182    etc. The remaining portion of the local submatrix [m x (N-n)]
4183    constitute the OFF-DIAGONAL portion. The example below better
4184    illustrates this concept.
4185 
4186    For a square global matrix we define each processor's diagonal portion
4187    to be its local rows and the corresponding columns (a square submatrix);
4188    each processor's off-diagonal portion encompasses the remainder of the
4189    local matrix (a rectangular submatrix).
4190 
4191    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4192 
4193    When calling this routine with a single process communicator, a matrix of
4194    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4195    type of communicator, use the construction mechanism:
4196      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4197 
4198    By default, this format uses inodes (identical nodes) when possible.
4199    We search for consecutive rows with the same nonzero structure, thereby
4200    reusing matrix information to achieve increased efficiency.
4201 
4202    Options Database Keys:
4203 +  -mat_no_inode  - Do not use inodes
4204 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4205 -  -mat_aij_oneindex - Internally use indexing starting at 1
4206         rather than 0.  Note that when calling MatSetValues(),
4207         the user still MUST index entries starting at 0!
4208 
4209 
4210    Example usage:
4211 
4212    Consider the following 8x8 matrix with 34 non-zero values, that is
4213    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4214    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4215    as follows:
4216 
4217 .vb
4218             1  2  0  |  0  3  0  |  0  4
4219     Proc0   0  5  6  |  7  0  0  |  8  0
4220             9  0 10  | 11  0  0  | 12  0
4221     -------------------------------------
4222            13  0 14  | 15 16 17  |  0  0
4223     Proc1   0 18  0  | 19 20 21  |  0  0
4224             0  0  0  | 22 23  0  | 24  0
4225     -------------------------------------
4226     Proc2  25 26 27  |  0  0 28  | 29  0
4227            30  0  0  | 31 32 33  |  0 34
4228 .ve
4229 
4230    This can be represented as a collection of submatrices as:
4231 
4232 .vb
4233       A B C
4234       D E F
4235       G H I
4236 .ve
4237 
4238    Where the submatrices A,B,C are owned by proc0, D,E,F are
4239    owned by proc1, G,H,I are owned by proc2.
4240 
4241    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4242    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4243    The 'M','N' parameters are 8,8, and have the same values on all procs.
4244 
4245    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4246    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4247    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4248    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4249    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4250    matrix, ans [DF] as another SeqAIJ matrix.
4251 
4252    When d_nz, o_nz parameters are specified, d_nz storage elements are
4253    allocated for every row of the local diagonal submatrix, and o_nz
4254    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4255    One way to choose d_nz and o_nz is to use the max nonzerors per local
4256    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4257    In this case, the values of d_nz,o_nz are:
4258 .vb
4259      proc0 : dnz = 2, o_nz = 2
4260      proc1 : dnz = 3, o_nz = 2
4261      proc2 : dnz = 1, o_nz = 4
4262 .ve
4263    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4264    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4265    for proc3. i.e we are using 12+15+10=37 storage locations to store
4266    34 values.
4267 
4268    When d_nnz, o_nnz parameters are specified, the storage is specified
4269    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4270    In the above case the values for d_nnz,o_nnz are:
4271 .vb
4272      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4273      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4274      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4275 .ve
4276    Here the space allocated is sum of all the above values i.e 34, and
4277    hence pre-allocation is perfect.
4278 
4279    Level: intermediate
4280 
4281 .keywords: matrix, aij, compressed row, sparse, parallel
4282 
4283 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4284           MPIAIJ, MatCreateMPIAIJWithArrays()
4285 @*/
4286 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4287 {
4288   PetscErrorCode ierr;
4289   PetscMPIInt    size;
4290 
4291   PetscFunctionBegin;
4292   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4293   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4294   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4295   if (size > 1) {
4296     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4297     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4298   } else {
4299     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4300     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4301   }
4302   PetscFunctionReturn(0);
4303 }
4304 
4305 #undef __FUNCT__
4306 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4307 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4308 {
4309   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4310 
4311   PetscFunctionBegin;
4312   *Ad     = a->A;
4313   *Ao     = a->B;
4314   *colmap = a->garray;
4315   PetscFunctionReturn(0);
4316 }
4317 
4318 #undef __FUNCT__
4319 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4320 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4321 {
4322   PetscErrorCode ierr;
4323   PetscInt       i;
4324   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4325 
4326   PetscFunctionBegin;
4327   if (coloring->ctype == IS_COLORING_GLOBAL) {
4328     ISColoringValue *allcolors,*colors;
4329     ISColoring      ocoloring;
4330 
4331     /* set coloring for diagonal portion */
4332     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4333 
4334     /* set coloring for off-diagonal portion */
4335     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4336     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4337     for (i=0; i<a->B->cmap->n; i++) {
4338       colors[i] = allcolors[a->garray[i]];
4339     }
4340     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4341     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4342     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4343     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4344   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4345     ISColoringValue *colors;
4346     PetscInt        *larray;
4347     ISColoring      ocoloring;
4348 
4349     /* set coloring for diagonal portion */
4350     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4351     for (i=0; i<a->A->cmap->n; i++) {
4352       larray[i] = i + A->cmap->rstart;
4353     }
4354     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4355     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4356     for (i=0; i<a->A->cmap->n; i++) {
4357       colors[i] = coloring->colors[larray[i]];
4358     }
4359     ierr = PetscFree(larray);CHKERRQ(ierr);
4360     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4361     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4362     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4363 
4364     /* set coloring for off-diagonal portion */
4365     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4366     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4367     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4368     for (i=0; i<a->B->cmap->n; i++) {
4369       colors[i] = coloring->colors[larray[i]];
4370     }
4371     ierr = PetscFree(larray);CHKERRQ(ierr);
4372     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4373     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4374     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4375   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4376   PetscFunctionReturn(0);
4377 }
4378 
4379 #undef __FUNCT__
4380 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4381 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4382 {
4383   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4384   PetscErrorCode ierr;
4385 
4386   PetscFunctionBegin;
4387   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4388   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4389   PetscFunctionReturn(0);
4390 }
4391 
4392 #undef __FUNCT__
4393 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4394 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4395 {
4396   PetscErrorCode ierr;
4397   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4398   PetscInt       *indx;
4399 
4400   PetscFunctionBegin;
4401   /* This routine will ONLY return MPIAIJ type matrix */
4402   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4403   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4404   if (n == PETSC_DECIDE) {
4405     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4406   }
4407   /* Check sum(n) = N */
4408   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4409   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4410 
4411   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4412   rstart -= m;
4413 
4414   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4415   for (i=0; i<m; i++) {
4416     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4417     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4418     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4419   }
4420 
4421   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4422   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4423   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4424   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4425   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4426   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4427   PetscFunctionReturn(0);
4428 }
4429 
4430 #undef __FUNCT__
4431 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4432 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4433 {
4434   PetscErrorCode ierr;
4435   PetscInt       m,N,i,rstart,nnz,Ii;
4436   PetscInt       *indx;
4437   PetscScalar    *values;
4438 
4439   PetscFunctionBegin;
4440   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4441   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4442   for (i=0; i<m; i++) {
4443     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4444     Ii   = i + rstart;
4445     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4446     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4447   }
4448   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4449   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4450   PetscFunctionReturn(0);
4451 }
4452 
4453 #undef __FUNCT__
4454 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4455 /*@
4456       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4457                  matrices from each processor
4458 
4459     Collective on MPI_Comm
4460 
4461    Input Parameters:
4462 +    comm - the communicators the parallel matrix will live on
4463 .    inmat - the input sequential matrices
4464 .    n - number of local columns (or PETSC_DECIDE)
4465 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4466 
4467    Output Parameter:
4468 .    outmat - the parallel matrix generated
4469 
4470     Level: advanced
4471 
4472    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4473 
4474 @*/
4475 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4476 {
4477   PetscErrorCode ierr;
4478   PetscMPIInt    size;
4479 
4480   PetscFunctionBegin;
4481   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4482   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4483   if (size == 1) {
4484     if (scall == MAT_INITIAL_MATRIX) {
4485       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4486     } else {
4487       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4488     }
4489   } else {
4490     if (scall == MAT_INITIAL_MATRIX) {
4491       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4492     }
4493     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4494   }
4495   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4496   PetscFunctionReturn(0);
4497 }
4498 
4499 #undef __FUNCT__
4500 #define __FUNCT__ "MatFileSplit"
4501 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4502 {
4503   PetscErrorCode    ierr;
4504   PetscMPIInt       rank;
4505   PetscInt          m,N,i,rstart,nnz;
4506   size_t            len;
4507   const PetscInt    *indx;
4508   PetscViewer       out;
4509   char              *name;
4510   Mat               B;
4511   const PetscScalar *values;
4512 
4513   PetscFunctionBegin;
4514   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4515   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4516   /* Should this be the type of the diagonal block of A? */
4517   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4518   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4519   ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
4520   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4521   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4522   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4523   for (i=0; i<m; i++) {
4524     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4525     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4526     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4527   }
4528   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4529   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4530 
4531   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4532   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4533   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4534   sprintf(name,"%s.%d",outfile,rank);
4535   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4536   ierr = PetscFree(name);CHKERRQ(ierr);
4537   ierr = MatView(B,out);CHKERRQ(ierr);
4538   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4539   ierr = MatDestroy(&B);CHKERRQ(ierr);
4540   PetscFunctionReturn(0);
4541 }
4542 
4543 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4544 #undef __FUNCT__
4545 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4546 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4547 {
4548   PetscErrorCode      ierr;
4549   Mat_Merge_SeqsToMPI *merge;
4550   PetscContainer      container;
4551 
4552   PetscFunctionBegin;
4553   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4554   if (container) {
4555     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4556     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4557     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4558     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4559     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4560     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4561     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4562     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4563     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4564     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4565     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4566     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4567     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4568     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4569     ierr = PetscFree(merge);CHKERRQ(ierr);
4570     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4571   }
4572   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4573   PetscFunctionReturn(0);
4574 }
4575 
4576 #include <../src/mat/utils/freespace.h>
4577 #include <petscbt.h>
4578 
4579 #undef __FUNCT__
4580 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4581 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4582 {
4583   PetscErrorCode      ierr;
4584   MPI_Comm            comm;
4585   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4586   PetscMPIInt         size,rank,taga,*len_s;
4587   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4588   PetscInt            proc,m;
4589   PetscInt            **buf_ri,**buf_rj;
4590   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4591   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4592   MPI_Request         *s_waits,*r_waits;
4593   MPI_Status          *status;
4594   MatScalar           *aa=a->a;
4595   MatScalar           **abuf_r,*ba_i;
4596   Mat_Merge_SeqsToMPI *merge;
4597   PetscContainer      container;
4598 
4599   PetscFunctionBegin;
4600   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4601   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4602 
4603   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4604   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4605 
4606   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4607   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4608 
4609   bi     = merge->bi;
4610   bj     = merge->bj;
4611   buf_ri = merge->buf_ri;
4612   buf_rj = merge->buf_rj;
4613 
4614   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4615   owners = merge->rowmap->range;
4616   len_s  = merge->len_s;
4617 
4618   /* send and recv matrix values */
4619   /*-----------------------------*/
4620   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4621   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4622 
4623   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4624   for (proc=0,k=0; proc<size; proc++) {
4625     if (!len_s[proc]) continue;
4626     i    = owners[proc];
4627     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4628     k++;
4629   }
4630 
4631   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4632   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4633   ierr = PetscFree(status);CHKERRQ(ierr);
4634 
4635   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4636   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4637 
4638   /* insert mat values of mpimat */
4639   /*----------------------------*/
4640   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4641   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4642 
4643   for (k=0; k<merge->nrecv; k++) {
4644     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4645     nrows       = *(buf_ri_k[k]);
4646     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4647     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4648   }
4649 
4650   /* set values of ba */
4651   m = merge->rowmap->n;
4652   for (i=0; i<m; i++) {
4653     arow = owners[rank] + i;
4654     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4655     bnzi = bi[i+1] - bi[i];
4656     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4657 
4658     /* add local non-zero vals of this proc's seqmat into ba */
4659     anzi   = ai[arow+1] - ai[arow];
4660     aj     = a->j + ai[arow];
4661     aa     = a->a + ai[arow];
4662     nextaj = 0;
4663     for (j=0; nextaj<anzi; j++) {
4664       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4665         ba_i[j] += aa[nextaj++];
4666       }
4667     }
4668 
4669     /* add received vals into ba */
4670     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4671       /* i-th row */
4672       if (i == *nextrow[k]) {
4673         anzi   = *(nextai[k]+1) - *nextai[k];
4674         aj     = buf_rj[k] + *(nextai[k]);
4675         aa     = abuf_r[k] + *(nextai[k]);
4676         nextaj = 0;
4677         for (j=0; nextaj<anzi; j++) {
4678           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4679             ba_i[j] += aa[nextaj++];
4680           }
4681         }
4682         nextrow[k]++; nextai[k]++;
4683       }
4684     }
4685     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4686   }
4687   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4688   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4689 
4690   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4691   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4692   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4693   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4694   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4695   PetscFunctionReturn(0);
4696 }
4697 
4698 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4699 
4700 #undef __FUNCT__
4701 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4702 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4703 {
4704   PetscErrorCode      ierr;
4705   Mat                 B_mpi;
4706   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4707   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4708   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4709   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4710   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4711   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4712   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4713   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4714   MPI_Status          *status;
4715   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4716   PetscBT             lnkbt;
4717   Mat_Merge_SeqsToMPI *merge;
4718   PetscContainer      container;
4719 
4720   PetscFunctionBegin;
4721   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4722 
4723   /* make sure it is a PETSc comm */
4724   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4725   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4726   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4727 
4728   ierr = PetscNew(&merge);CHKERRQ(ierr);
4729   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4730 
4731   /* determine row ownership */
4732   /*---------------------------------------------------------*/
4733   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4734   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4735   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4736   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4737   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4738   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4739   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4740 
4741   m      = merge->rowmap->n;
4742   owners = merge->rowmap->range;
4743 
4744   /* determine the number of messages to send, their lengths */
4745   /*---------------------------------------------------------*/
4746   len_s = merge->len_s;
4747 
4748   len          = 0; /* length of buf_si[] */
4749   merge->nsend = 0;
4750   for (proc=0; proc<size; proc++) {
4751     len_si[proc] = 0;
4752     if (proc == rank) {
4753       len_s[proc] = 0;
4754     } else {
4755       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4756       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4757     }
4758     if (len_s[proc]) {
4759       merge->nsend++;
4760       nrows = 0;
4761       for (i=owners[proc]; i<owners[proc+1]; i++) {
4762         if (ai[i+1] > ai[i]) nrows++;
4763       }
4764       len_si[proc] = 2*(nrows+1);
4765       len         += len_si[proc];
4766     }
4767   }
4768 
4769   /* determine the number and length of messages to receive for ij-structure */
4770   /*-------------------------------------------------------------------------*/
4771   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4772   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4773 
4774   /* post the Irecv of j-structure */
4775   /*-------------------------------*/
4776   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4777   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4778 
4779   /* post the Isend of j-structure */
4780   /*--------------------------------*/
4781   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4782 
4783   for (proc=0, k=0; proc<size; proc++) {
4784     if (!len_s[proc]) continue;
4785     i    = owners[proc];
4786     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4787     k++;
4788   }
4789 
4790   /* receives and sends of j-structure are complete */
4791   /*------------------------------------------------*/
4792   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4793   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4794 
4795   /* send and recv i-structure */
4796   /*---------------------------*/
4797   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4798   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4799 
4800   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4801   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4802   for (proc=0,k=0; proc<size; proc++) {
4803     if (!len_s[proc]) continue;
4804     /* form outgoing message for i-structure:
4805          buf_si[0]:                 nrows to be sent
4806                [1:nrows]:           row index (global)
4807                [nrows+1:2*nrows+1]: i-structure index
4808     */
4809     /*-------------------------------------------*/
4810     nrows       = len_si[proc]/2 - 1;
4811     buf_si_i    = buf_si + nrows+1;
4812     buf_si[0]   = nrows;
4813     buf_si_i[0] = 0;
4814     nrows       = 0;
4815     for (i=owners[proc]; i<owners[proc+1]; i++) {
4816       anzi = ai[i+1] - ai[i];
4817       if (anzi) {
4818         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4819         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4820         nrows++;
4821       }
4822     }
4823     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4824     k++;
4825     buf_si += len_si[proc];
4826   }
4827 
4828   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4829   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4830 
4831   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4832   for (i=0; i<merge->nrecv; i++) {
4833     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4834   }
4835 
4836   ierr = PetscFree(len_si);CHKERRQ(ierr);
4837   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4838   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4839   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4840   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4841   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4842   ierr = PetscFree(status);CHKERRQ(ierr);
4843 
4844   /* compute a local seq matrix in each processor */
4845   /*----------------------------------------------*/
4846   /* allocate bi array and free space for accumulating nonzero column info */
4847   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4848   bi[0] = 0;
4849 
4850   /* create and initialize a linked list */
4851   nlnk = N+1;
4852   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4853 
4854   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4855   len  = ai[owners[rank+1]] - ai[owners[rank]];
4856   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4857 
4858   current_space = free_space;
4859 
4860   /* determine symbolic info for each local row */
4861   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4862 
4863   for (k=0; k<merge->nrecv; k++) {
4864     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4865     nrows       = *buf_ri_k[k];
4866     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4867     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4868   }
4869 
4870   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4871   len  = 0;
4872   for (i=0; i<m; i++) {
4873     bnzi = 0;
4874     /* add local non-zero cols of this proc's seqmat into lnk */
4875     arow  = owners[rank] + i;
4876     anzi  = ai[arow+1] - ai[arow];
4877     aj    = a->j + ai[arow];
4878     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4879     bnzi += nlnk;
4880     /* add received col data into lnk */
4881     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4882       if (i == *nextrow[k]) { /* i-th row */
4883         anzi  = *(nextai[k]+1) - *nextai[k];
4884         aj    = buf_rj[k] + *nextai[k];
4885         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4886         bnzi += nlnk;
4887         nextrow[k]++; nextai[k]++;
4888       }
4889     }
4890     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4891 
4892     /* if free space is not available, make more free space */
4893     if (current_space->local_remaining<bnzi) {
4894       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4895       nspacedouble++;
4896     }
4897     /* copy data into free space, then initialize lnk */
4898     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4899     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4900 
4901     current_space->array           += bnzi;
4902     current_space->local_used      += bnzi;
4903     current_space->local_remaining -= bnzi;
4904 
4905     bi[i+1] = bi[i] + bnzi;
4906   }
4907 
4908   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4909 
4910   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4911   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4912   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4913 
4914   /* create symbolic parallel matrix B_mpi */
4915   /*---------------------------------------*/
4916   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4917   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4918   if (n==PETSC_DECIDE) {
4919     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4920   } else {
4921     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4922   }
4923   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4924   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4925   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4926   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4927   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4928 
4929   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4930   B_mpi->assembled    = PETSC_FALSE;
4931   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4932   merge->bi           = bi;
4933   merge->bj           = bj;
4934   merge->buf_ri       = buf_ri;
4935   merge->buf_rj       = buf_rj;
4936   merge->coi          = NULL;
4937   merge->coj          = NULL;
4938   merge->owners_co    = NULL;
4939 
4940   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4941 
4942   /* attach the supporting struct to B_mpi for reuse */
4943   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4944   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4945   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4946   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4947   *mpimat = B_mpi;
4948 
4949   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4950   PetscFunctionReturn(0);
4951 }
4952 
4953 #undef __FUNCT__
4954 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4955 /*@C
4956       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4957                  matrices from each processor
4958 
4959     Collective on MPI_Comm
4960 
4961    Input Parameters:
4962 +    comm - the communicators the parallel matrix will live on
4963 .    seqmat - the input sequential matrices
4964 .    m - number of local rows (or PETSC_DECIDE)
4965 .    n - number of local columns (or PETSC_DECIDE)
4966 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4967 
4968    Output Parameter:
4969 .    mpimat - the parallel matrix generated
4970 
4971     Level: advanced
4972 
4973    Notes:
4974      The dimensions of the sequential matrix in each processor MUST be the same.
4975      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4976      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4977 @*/
4978 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4979 {
4980   PetscErrorCode ierr;
4981   PetscMPIInt    size;
4982 
4983   PetscFunctionBegin;
4984   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4985   if (size == 1) {
4986     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4987     if (scall == MAT_INITIAL_MATRIX) {
4988       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4989     } else {
4990       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4991     }
4992     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4993     PetscFunctionReturn(0);
4994   }
4995   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4996   if (scall == MAT_INITIAL_MATRIX) {
4997     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4998   }
4999   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5000   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5001   PetscFunctionReturn(0);
5002 }
5003 
5004 #undef __FUNCT__
5005 #define __FUNCT__ "MatMPIAIJGetLocalMat"
5006 /*@
5007      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
5008           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5009           with MatGetSize()
5010 
5011     Not Collective
5012 
5013    Input Parameters:
5014 +    A - the matrix
5015 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5016 
5017    Output Parameter:
5018 .    A_loc - the local sequential matrix generated
5019 
5020     Level: developer
5021 
5022 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
5023 
5024 @*/
5025 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5026 {
5027   PetscErrorCode ierr;
5028   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
5029   Mat_SeqAIJ     *mat,*a=(Mat_SeqAIJ*)(mpimat->A)->data,*b=(Mat_SeqAIJ*)(mpimat->B)->data;
5030   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j,*cmap=mpimat->garray;
5031   MatScalar      *aa=a->a,*ba=b->a,*cam;
5032   PetscScalar    *ca;
5033   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5034   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
5035   PetscBool      match;
5036 
5037   PetscFunctionBegin;
5038   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5039   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5040   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5041   if (scall == MAT_INITIAL_MATRIX) {
5042     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
5043     ci[0] = 0;
5044     for (i=0; i<am; i++) {
5045       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5046     }
5047     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
5048     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
5049     k    = 0;
5050     for (i=0; i<am; i++) {
5051       ncols_o = bi[i+1] - bi[i];
5052       ncols_d = ai[i+1] - ai[i];
5053       /* off-diagonal portion of A */
5054       for (jo=0; jo<ncols_o; jo++) {
5055         col = cmap[*bj];
5056         if (col >= cstart) break;
5057         cj[k]   = col; bj++;
5058         ca[k++] = *ba++;
5059       }
5060       /* diagonal portion of A */
5061       for (j=0; j<ncols_d; j++) {
5062         cj[k]   = cstart + *aj++;
5063         ca[k++] = *aa++;
5064       }
5065       /* off-diagonal portion of A */
5066       for (j=jo; j<ncols_o; j++) {
5067         cj[k]   = cmap[*bj++];
5068         ca[k++] = *ba++;
5069       }
5070     }
5071     /* put together the new matrix */
5072     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5073     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5074     /* Since these are PETSc arrays, change flags to free them as necessary. */
5075     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5076     mat->free_a  = PETSC_TRUE;
5077     mat->free_ij = PETSC_TRUE;
5078     mat->nonew   = 0;
5079   } else if (scall == MAT_REUSE_MATRIX) {
5080     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5081     ci = mat->i; cj = mat->j; cam = mat->a;
5082     for (i=0; i<am; i++) {
5083       /* off-diagonal portion of A */
5084       ncols_o = bi[i+1] - bi[i];
5085       for (jo=0; jo<ncols_o; jo++) {
5086         col = cmap[*bj];
5087         if (col >= cstart) break;
5088         *cam++ = *ba++; bj++;
5089       }
5090       /* diagonal portion of A */
5091       ncols_d = ai[i+1] - ai[i];
5092       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5093       /* off-diagonal portion of A */
5094       for (j=jo; j<ncols_o; j++) {
5095         *cam++ = *ba++; bj++;
5096       }
5097     }
5098   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5099   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5100   PetscFunctionReturn(0);
5101 }
5102 
5103 #undef __FUNCT__
5104 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5105 /*@C
5106      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5107 
5108     Not Collective
5109 
5110    Input Parameters:
5111 +    A - the matrix
5112 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5113 -    row, col - index sets of rows and columns to extract (or NULL)
5114 
5115    Output Parameter:
5116 .    A_loc - the local sequential matrix generated
5117 
5118     Level: developer
5119 
5120 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5121 
5122 @*/
5123 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5124 {
5125   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5126   PetscErrorCode ierr;
5127   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5128   IS             isrowa,iscola;
5129   Mat            *aloc;
5130   PetscBool      match;
5131 
5132   PetscFunctionBegin;
5133   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5134   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5135   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5136   if (!row) {
5137     start = A->rmap->rstart; end = A->rmap->rend;
5138     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5139   } else {
5140     isrowa = *row;
5141   }
5142   if (!col) {
5143     start = A->cmap->rstart;
5144     cmap  = a->garray;
5145     nzA   = a->A->cmap->n;
5146     nzB   = a->B->cmap->n;
5147     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5148     ncols = 0;
5149     for (i=0; i<nzB; i++) {
5150       if (cmap[i] < start) idx[ncols++] = cmap[i];
5151       else break;
5152     }
5153     imark = i;
5154     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5155     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5156     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5157   } else {
5158     iscola = *col;
5159   }
5160   if (scall != MAT_INITIAL_MATRIX) {
5161     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5162     aloc[0] = *A_loc;
5163   }
5164   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5165   *A_loc = aloc[0];
5166   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5167   if (!row) {
5168     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5169   }
5170   if (!col) {
5171     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5172   }
5173   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5174   PetscFunctionReturn(0);
5175 }
5176 
5177 #undef __FUNCT__
5178 #define __FUNCT__ "MatGetBrowsOfAcols"
5179 /*@C
5180     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5181 
5182     Collective on Mat
5183 
5184    Input Parameters:
5185 +    A,B - the matrices in mpiaij format
5186 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5187 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5188 
5189    Output Parameter:
5190 +    rowb, colb - index sets of rows and columns of B to extract
5191 -    B_seq - the sequential matrix generated
5192 
5193     Level: developer
5194 
5195 @*/
5196 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5197 {
5198   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5199   PetscErrorCode ierr;
5200   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5201   IS             isrowb,iscolb;
5202   Mat            *bseq=NULL;
5203 
5204   PetscFunctionBegin;
5205   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5206     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5207   }
5208   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5209 
5210   if (scall == MAT_INITIAL_MATRIX) {
5211     start = A->cmap->rstart;
5212     cmap  = a->garray;
5213     nzA   = a->A->cmap->n;
5214     nzB   = a->B->cmap->n;
5215     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5216     ncols = 0;
5217     for (i=0; i<nzB; i++) {  /* row < local row index */
5218       if (cmap[i] < start) idx[ncols++] = cmap[i];
5219       else break;
5220     }
5221     imark = i;
5222     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5223     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5224     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5225     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5226   } else {
5227     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5228     isrowb  = *rowb; iscolb = *colb;
5229     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5230     bseq[0] = *B_seq;
5231   }
5232   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5233   *B_seq = bseq[0];
5234   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5235   if (!rowb) {
5236     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5237   } else {
5238     *rowb = isrowb;
5239   }
5240   if (!colb) {
5241     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5242   } else {
5243     *colb = iscolb;
5244   }
5245   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5246   PetscFunctionReturn(0);
5247 }
5248 
5249 #undef __FUNCT__
5250 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5251 /*
5252     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5253     of the OFF-DIAGONAL portion of local A
5254 
5255     Collective on Mat
5256 
5257    Input Parameters:
5258 +    A,B - the matrices in mpiaij format
5259 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5260 
5261    Output Parameter:
5262 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5263 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5264 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5265 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5266 
5267     Level: developer
5268 
5269 */
5270 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5271 {
5272   VecScatter_MPI_General *gen_to,*gen_from;
5273   PetscErrorCode         ierr;
5274   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5275   Mat_SeqAIJ             *b_oth;
5276   VecScatter             ctx =a->Mvctx;
5277   MPI_Comm               comm;
5278   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5279   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5280   PetscScalar            *rvalues,*svalues;
5281   MatScalar              *b_otha,*bufa,*bufA;
5282   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5283   MPI_Request            *rwaits = NULL,*swaits = NULL;
5284   MPI_Status             *sstatus,rstatus;
5285   PetscMPIInt            jj;
5286   PetscInt               *cols,sbs,rbs;
5287   PetscScalar            *vals;
5288 
5289   PetscFunctionBegin;
5290   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5291   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5292     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5293   }
5294   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5295   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5296 
5297   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5298   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5299   rvalues  = gen_from->values; /* holds the length of receiving row */
5300   svalues  = gen_to->values;   /* holds the length of sending row */
5301   nrecvs   = gen_from->n;
5302   nsends   = gen_to->n;
5303 
5304   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5305   srow    = gen_to->indices;    /* local row index to be sent */
5306   sstarts = gen_to->starts;
5307   sprocs  = gen_to->procs;
5308   sstatus = gen_to->sstatus;
5309   sbs     = gen_to->bs;
5310   rstarts = gen_from->starts;
5311   rprocs  = gen_from->procs;
5312   rbs     = gen_from->bs;
5313 
5314   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5315   if (scall == MAT_INITIAL_MATRIX) {
5316     /* i-array */
5317     /*---------*/
5318     /*  post receives */
5319     for (i=0; i<nrecvs; i++) {
5320       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5321       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5322       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5323     }
5324 
5325     /* pack the outgoing message */
5326     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5327 
5328     sstartsj[0] = 0;
5329     rstartsj[0] = 0;
5330     len         = 0; /* total length of j or a array to be sent */
5331     k           = 0;
5332     for (i=0; i<nsends; i++) {
5333       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5334       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5335       for (j=0; j<nrows; j++) {
5336         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5337         for (l=0; l<sbs; l++) {
5338           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5339 
5340           rowlen[j*sbs+l] = ncols;
5341 
5342           len += ncols;
5343           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5344         }
5345         k++;
5346       }
5347       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5348 
5349       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5350     }
5351     /* recvs and sends of i-array are completed */
5352     i = nrecvs;
5353     while (i--) {
5354       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5355     }
5356     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5357 
5358     /* allocate buffers for sending j and a arrays */
5359     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5360     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5361 
5362     /* create i-array of B_oth */
5363     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5364 
5365     b_othi[0] = 0;
5366     len       = 0; /* total length of j or a array to be received */
5367     k         = 0;
5368     for (i=0; i<nrecvs; i++) {
5369       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5370       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5371       for (j=0; j<nrows; j++) {
5372         b_othi[k+1] = b_othi[k] + rowlen[j];
5373         len        += rowlen[j]; k++;
5374       }
5375       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5376     }
5377 
5378     /* allocate space for j and a arrrays of B_oth */
5379     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5380     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5381 
5382     /* j-array */
5383     /*---------*/
5384     /*  post receives of j-array */
5385     for (i=0; i<nrecvs; i++) {
5386       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5387       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5388     }
5389 
5390     /* pack the outgoing message j-array */
5391     k = 0;
5392     for (i=0; i<nsends; i++) {
5393       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5394       bufJ  = bufj+sstartsj[i];
5395       for (j=0; j<nrows; j++) {
5396         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5397         for (ll=0; ll<sbs; ll++) {
5398           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5399           for (l=0; l<ncols; l++) {
5400             *bufJ++ = cols[l];
5401           }
5402           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5403         }
5404       }
5405       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5406     }
5407 
5408     /* recvs and sends of j-array are completed */
5409     i = nrecvs;
5410     while (i--) {
5411       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5412     }
5413     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5414   } else if (scall == MAT_REUSE_MATRIX) {
5415     sstartsj = *startsj_s;
5416     rstartsj = *startsj_r;
5417     bufa     = *bufa_ptr;
5418     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5419     b_otha   = b_oth->a;
5420   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5421 
5422   /* a-array */
5423   /*---------*/
5424   /*  post receives of a-array */
5425   for (i=0; i<nrecvs; i++) {
5426     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5427     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5428   }
5429 
5430   /* pack the outgoing message a-array */
5431   k = 0;
5432   for (i=0; i<nsends; i++) {
5433     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5434     bufA  = bufa+sstartsj[i];
5435     for (j=0; j<nrows; j++) {
5436       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5437       for (ll=0; ll<sbs; ll++) {
5438         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5439         for (l=0; l<ncols; l++) {
5440           *bufA++ = vals[l];
5441         }
5442         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5443       }
5444     }
5445     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5446   }
5447   /* recvs and sends of a-array are completed */
5448   i = nrecvs;
5449   while (i--) {
5450     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5451   }
5452   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5453   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5454 
5455   if (scall == MAT_INITIAL_MATRIX) {
5456     /* put together the new matrix */
5457     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5458 
5459     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5460     /* Since these are PETSc arrays, change flags to free them as necessary. */
5461     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5462     b_oth->free_a  = PETSC_TRUE;
5463     b_oth->free_ij = PETSC_TRUE;
5464     b_oth->nonew   = 0;
5465 
5466     ierr = PetscFree(bufj);CHKERRQ(ierr);
5467     if (!startsj_s || !bufa_ptr) {
5468       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5469       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5470     } else {
5471       *startsj_s = sstartsj;
5472       *startsj_r = rstartsj;
5473       *bufa_ptr  = bufa;
5474     }
5475   }
5476   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5477   PetscFunctionReturn(0);
5478 }
5479 
5480 #undef __FUNCT__
5481 #define __FUNCT__ "MatGetCommunicationStructs"
5482 /*@C
5483   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5484 
5485   Not Collective
5486 
5487   Input Parameters:
5488 . A - The matrix in mpiaij format
5489 
5490   Output Parameter:
5491 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5492 . colmap - A map from global column index to local index into lvec
5493 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5494 
5495   Level: developer
5496 
5497 @*/
5498 #if defined(PETSC_USE_CTABLE)
5499 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5500 #else
5501 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5502 #endif
5503 {
5504   Mat_MPIAIJ *a;
5505 
5506   PetscFunctionBegin;
5507   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5508   PetscValidPointer(lvec, 2);
5509   PetscValidPointer(colmap, 3);
5510   PetscValidPointer(multScatter, 4);
5511   a = (Mat_MPIAIJ*) A->data;
5512   if (lvec) *lvec = a->lvec;
5513   if (colmap) *colmap = a->colmap;
5514   if (multScatter) *multScatter = a->Mvctx;
5515   PetscFunctionReturn(0);
5516 }
5517 
5518 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5519 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5520 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5521 
5522 #undef __FUNCT__
5523 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5524 /*
5525     Computes (B'*A')' since computing B*A directly is untenable
5526 
5527                n                       p                          p
5528         (              )       (              )         (                  )
5529       m (      A       )  *  n (       B      )   =   m (         C        )
5530         (              )       (              )         (                  )
5531 
5532 */
5533 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5534 {
5535   PetscErrorCode ierr;
5536   Mat            At,Bt,Ct;
5537 
5538   PetscFunctionBegin;
5539   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5540   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5541   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5542   ierr = MatDestroy(&At);CHKERRQ(ierr);
5543   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5544   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5545   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5546   PetscFunctionReturn(0);
5547 }
5548 
5549 #undef __FUNCT__
5550 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5551 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5552 {
5553   PetscErrorCode ierr;
5554   PetscInt       m=A->rmap->n,n=B->cmap->n;
5555   Mat            Cmat;
5556 
5557   PetscFunctionBegin;
5558   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5559   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5560   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5561   ierr = MatSetBlockSizes(Cmat,A->rmap->bs,B->cmap->bs);CHKERRQ(ierr);
5562   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5563   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5564   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5565   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5566 
5567   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5568 
5569   *C = Cmat;
5570   PetscFunctionReturn(0);
5571 }
5572 
5573 /* ----------------------------------------------------------------*/
5574 #undef __FUNCT__
5575 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5576 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5577 {
5578   PetscErrorCode ierr;
5579 
5580   PetscFunctionBegin;
5581   if (scall == MAT_INITIAL_MATRIX) {
5582     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5583     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5584     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5585   }
5586   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5587   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5588   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5589   PetscFunctionReturn(0);
5590 }
5591 
5592 #if defined(PETSC_HAVE_MUMPS)
5593 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5594 #endif
5595 #if defined(PETSC_HAVE_PASTIX)
5596 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5597 #endif
5598 #if defined(PETSC_HAVE_SUPERLU_DIST)
5599 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5600 #endif
5601 #if defined(PETSC_HAVE_CLIQUE)
5602 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5603 #endif
5604 
5605 /*MC
5606    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5607 
5608    Options Database Keys:
5609 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5610 
5611   Level: beginner
5612 
5613 .seealso: MatCreateAIJ()
5614 M*/
5615 
5616 #undef __FUNCT__
5617 #define __FUNCT__ "MatCreate_MPIAIJ"
5618 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5619 {
5620   Mat_MPIAIJ     *b;
5621   PetscErrorCode ierr;
5622   PetscMPIInt    size;
5623 
5624   PetscFunctionBegin;
5625   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5626 
5627   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5628   B->data       = (void*)b;
5629   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5630   B->assembled  = PETSC_FALSE;
5631   B->insertmode = NOT_SET_VALUES;
5632   b->size       = size;
5633 
5634   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5635 
5636   /* build cache for off array entries formed */
5637   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5638 
5639   b->donotstash  = PETSC_FALSE;
5640   b->colmap      = 0;
5641   b->garray      = 0;
5642   b->roworiented = PETSC_TRUE;
5643 
5644   /* stuff used for matrix vector multiply */
5645   b->lvec  = NULL;
5646   b->Mvctx = NULL;
5647 
5648   /* stuff for MatGetRow() */
5649   b->rowindices   = 0;
5650   b->rowvalues    = 0;
5651   b->getrowactive = PETSC_FALSE;
5652 
5653   /* flexible pointer used in CUSP/CUSPARSE classes */
5654   b->spptr = NULL;
5655 
5656 #if defined(PETSC_HAVE_MUMPS)
5657   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5658 #endif
5659 #if defined(PETSC_HAVE_PASTIX)
5660   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5661 #endif
5662 #if defined(PETSC_HAVE_SUPERLU_DIST)
5663   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5664 #endif
5665 #if defined(PETSC_HAVE_CLIQUE)
5666   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5667 #endif
5668   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5669   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5670   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5671   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5672   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5673   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5674   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5675   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5676   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5677   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5678   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5681   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5682   PetscFunctionReturn(0);
5683 }
5684 
5685 #undef __FUNCT__
5686 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5687 /*@
5688      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5689          and "off-diagonal" part of the matrix in CSR format.
5690 
5691    Collective on MPI_Comm
5692 
5693    Input Parameters:
5694 +  comm - MPI communicator
5695 .  m - number of local rows (Cannot be PETSC_DECIDE)
5696 .  n - This value should be the same as the local size used in creating the
5697        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5698        calculated if N is given) For square matrices n is almost always m.
5699 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5700 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5701 .   i - row indices for "diagonal" portion of matrix
5702 .   j - column indices
5703 .   a - matrix values
5704 .   oi - row indices for "off-diagonal" portion of matrix
5705 .   oj - column indices
5706 -   oa - matrix values
5707 
5708    Output Parameter:
5709 .   mat - the matrix
5710 
5711    Level: advanced
5712 
5713    Notes:
5714        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5715        must free the arrays once the matrix has been destroyed and not before.
5716 
5717        The i and j indices are 0 based
5718 
5719        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5720 
5721        This sets local rows and cannot be used to set off-processor values.
5722 
5723        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5724        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5725        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5726        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5727        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5728        communication if it is known that only local entries will be set.
5729 
5730 .keywords: matrix, aij, compressed row, sparse, parallel
5731 
5732 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5733           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5734 @*/
5735 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5736 {
5737   PetscErrorCode ierr;
5738   Mat_MPIAIJ     *maij;
5739 
5740   PetscFunctionBegin;
5741   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5742   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5743   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5744   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5745   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5746   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5747   maij = (Mat_MPIAIJ*) (*mat)->data;
5748 
5749   (*mat)->preallocated = PETSC_TRUE;
5750 
5751   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5752   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5753 
5754   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5755   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5756 
5757   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5758   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5759   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5760   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5761 
5762   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5763   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5764   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5765   PetscFunctionReturn(0);
5766 }
5767 
5768 /*
5769     Special version for direct calls from Fortran
5770 */
5771 #include <petsc-private/fortranimpl.h>
5772 
5773 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5774 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5775 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5776 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5777 #endif
5778 
5779 /* Change these macros so can be used in void function */
5780 #undef CHKERRQ
5781 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5782 #undef SETERRQ2
5783 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5784 #undef SETERRQ3
5785 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5786 #undef SETERRQ
5787 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5788 
5789 #undef __FUNCT__
5790 #define __FUNCT__ "matsetvaluesmpiaij_"
5791 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5792 {
5793   Mat            mat  = *mmat;
5794   PetscInt       m    = *mm, n = *mn;
5795   InsertMode     addv = *maddv;
5796   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5797   PetscScalar    value;
5798   PetscErrorCode ierr;
5799 
5800   MatCheckPreallocated(mat,1);
5801   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5802 
5803 #if defined(PETSC_USE_DEBUG)
5804   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5805 #endif
5806   {
5807     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5808     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5809     PetscBool roworiented = aij->roworiented;
5810 
5811     /* Some Variables required in the macro */
5812     Mat        A                 = aij->A;
5813     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5814     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5815     MatScalar  *aa               = a->a;
5816     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5817     Mat        B                 = aij->B;
5818     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5819     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5820     MatScalar  *ba               = b->a;
5821 
5822     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5823     PetscInt  nonew = a->nonew;
5824     MatScalar *ap1,*ap2;
5825 
5826     PetscFunctionBegin;
5827     for (i=0; i<m; i++) {
5828       if (im[i] < 0) continue;
5829 #if defined(PETSC_USE_DEBUG)
5830       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5831 #endif
5832       if (im[i] >= rstart && im[i] < rend) {
5833         row      = im[i] - rstart;
5834         lastcol1 = -1;
5835         rp1      = aj + ai[row];
5836         ap1      = aa + ai[row];
5837         rmax1    = aimax[row];
5838         nrow1    = ailen[row];
5839         low1     = 0;
5840         high1    = nrow1;
5841         lastcol2 = -1;
5842         rp2      = bj + bi[row];
5843         ap2      = ba + bi[row];
5844         rmax2    = bimax[row];
5845         nrow2    = bilen[row];
5846         low2     = 0;
5847         high2    = nrow2;
5848 
5849         for (j=0; j<n; j++) {
5850           if (roworiented) value = v[i*n+j];
5851           else value = v[i+j*m];
5852           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5853           if (in[j] >= cstart && in[j] < cend) {
5854             col = in[j] - cstart;
5855             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5856           } else if (in[j] < 0) continue;
5857 #if defined(PETSC_USE_DEBUG)
5858           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5859 #endif
5860           else {
5861             if (mat->was_assembled) {
5862               if (!aij->colmap) {
5863                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5864               }
5865 #if defined(PETSC_USE_CTABLE)
5866               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5867               col--;
5868 #else
5869               col = aij->colmap[in[j]] - 1;
5870 #endif
5871               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5872                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5873                 col  =  in[j];
5874                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5875                 B     = aij->B;
5876                 b     = (Mat_SeqAIJ*)B->data;
5877                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5878                 rp2   = bj + bi[row];
5879                 ap2   = ba + bi[row];
5880                 rmax2 = bimax[row];
5881                 nrow2 = bilen[row];
5882                 low2  = 0;
5883                 high2 = nrow2;
5884                 bm    = aij->B->rmap->n;
5885                 ba    = b->a;
5886               }
5887             } else col = in[j];
5888             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5889           }
5890         }
5891       } else if (!aij->donotstash) {
5892         if (roworiented) {
5893           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5894         } else {
5895           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5896         }
5897       }
5898     }
5899   }
5900   PetscFunctionReturnVoid();
5901 }
5902 
5903