xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision cbfebe2e3d702f86f165d2c287dad1fbd9bf01f6)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
109 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
110 {
111   PetscErrorCode    ierr;
112   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
113 
114   PetscFunctionBegin;
115   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
116     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
117   } else {
118     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
119   }
120   PetscFunctionReturn(0);
121 }
122 
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
127 {
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
129   PetscErrorCode ierr;
130   PetscInt       i,rstart,nrows,*rows;
131 
132   PetscFunctionBegin;
133   *zrows = NULL;
134   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
135   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
136   for (i=0; i<nrows; i++) rows[i] += rstart;
137   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
144 {
145   PetscErrorCode ierr;
146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
147   PetscInt       i,n,*garray = aij->garray;
148   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
149   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
150   PetscReal      *work;
151 
152   PetscFunctionBegin;
153   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
154   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
155   if (type == NORM_2) {
156     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
157       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
158     }
159     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
160       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
161     }
162   } else if (type == NORM_1) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
168     }
169   } else if (type == NORM_INFINITY) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
175     }
176 
177   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
178   if (type == NORM_INFINITY) {
179     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
180   } else {
181     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
182   }
183   ierr = PetscFree(work);CHKERRQ(ierr);
184   if (type == NORM_2) {
185     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatDistribute_MPIAIJ"
192 /*
193     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
194     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
195 
196     Only for square matrices
197 
198     Used by a preconditioner, hence PETSC_EXTERN
199 */
200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
201 {
202   PetscMPIInt    rank,size;
203   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
204   PetscErrorCode ierr;
205   Mat            mat;
206   Mat_SeqAIJ     *gmata;
207   PetscMPIInt    tag;
208   MPI_Status     status;
209   PetscBool      aij;
210   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215   if (!rank) {
216     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
217     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
218   }
219   if (reuse == MAT_INITIAL_MATRIX) {
220     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
221     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
222     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
223     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
224     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
225     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
226     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
227     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
228     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
229 
230     rowners[0] = 0;
231     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
232     rstart = rowners[rank];
233     rend   = rowners[rank+1];
234     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
235     if (!rank) {
236       gmata = (Mat_SeqAIJ*) gmat->data;
237       /* send row lengths to all processors */
238       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
239       for (i=1; i<size; i++) {
240         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242       /* determine number diagonal and off-diagonal counts */
243       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
244       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
245       jj   = 0;
246       for (i=0; i<m; i++) {
247         for (j=0; j<dlens[i]; j++) {
248           if (gmata->j[jj] < rstart) ld[i]++;
249           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
250           jj++;
251         }
252       }
253       /* send column indices to other processes */
254       for (i=1; i<size; i++) {
255         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
256         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
257         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259 
260       /* send numerical values to other processes */
261       for (i=1; i<size; i++) {
262         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
263         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
264       }
265       gmataa = gmata->a;
266       gmataj = gmata->j;
267 
268     } else {
269       /* receive row lengths */
270       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
271       /* receive column indices */
272       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
273       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
275       /* determine number diagonal and off-diagonal counts */
276       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
277       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
278       jj   = 0;
279       for (i=0; i<m; i++) {
280         for (j=0; j<dlens[i]; j++) {
281           if (gmataj[jj] < rstart) ld[i]++;
282           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
283           jj++;
284         }
285       }
286       /* receive numerical values */
287       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
288       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
289     }
290     /* set preallocation */
291     for (i=0; i<m; i++) {
292       dlens[i] -= olens[i];
293     }
294     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
295     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
296 
297     for (i=0; i<m; i++) {
298       dlens[i] += olens[i];
299     }
300     cnt = 0;
301     for (i=0; i<m; i++) {
302       row  = rstart + i;
303       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
304       cnt += dlens[i];
305     }
306     if (rank) {
307       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
308     }
309     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
310     ierr = PetscFree(rowners);CHKERRQ(ierr);
311 
312     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
313 
314     *inmat = mat;
315   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
316     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
317     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
318     mat  = *inmat;
319     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
320     if (!rank) {
321       /* send numerical values to other processes */
322       gmata  = (Mat_SeqAIJ*) gmat->data;
323       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
324       gmataa = gmata->a;
325       for (i=1; i<size; i++) {
326         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
327         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
328       }
329       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
330     } else {
331       /* receive numerical values from process 0*/
332       nz   = Ad->nz + Ao->nz;
333       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
334       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
335     }
336     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
337     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
338     ad = Ad->a;
339     ao = Ao->a;
340     if (mat->rmap->n) {
341       i  = 0;
342       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
343       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
344     }
345     for (i=1; i<mat->rmap->n; i++) {
346       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
347       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
348     }
349     i--;
350     if (mat->rmap->n) {
351       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
352     }
353     if (rank) {
354       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
355     }
356   }
357   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to acess.
368 */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
372 {
373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
374   PetscErrorCode ierr;
375   PetscInt       n = aij->B->cmap->n,i;
376 
377   PetscFunctionBegin;
378   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
379 #if defined(PETSC_USE_CTABLE)
380   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
381   for (i=0; i<n; i++) {
382     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
383   }
384 #else
385   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
386   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
387   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
388 #endif
389   PetscFunctionReturn(0);
390 }
391 
392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
393 { \
394     if (col <= lastcol1)  low1 = 0;     \
395     else                 high1 = nrow1; \
396     lastcol1 = col;\
397     while (high1-low1 > 5) { \
398       t = (low1+high1)/2; \
399       if (rp1[t] > col) high1 = t; \
400       else              low1  = t; \
401     } \
402       for (_i=low1; _i<high1; _i++) { \
403         if (rp1[_i] > col) break; \
404         if (rp1[_i] == col) { \
405           if (addv == ADD_VALUES) ap1[_i] += value;   \
406           else                    ap1[_i] = value; \
407           goto a_noinsert; \
408         } \
409       }  \
410       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
411       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
412       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
413       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
414       N = nrow1++ - 1; a->nz++; high1++; \
415       /* shift up all the later entries in this row */ \
416       for (ii=N; ii>=_i; ii--) { \
417         rp1[ii+1] = rp1[ii]; \
418         ap1[ii+1] = ap1[ii]; \
419       } \
420       rp1[_i] = col;  \
421       ap1[_i] = value;  \
422       A->nonzerostate++;\
423       a_noinsert: ; \
424       ailen[row] = nrow1; \
425 }
426 
427 
428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
429   { \
430     if (col <= lastcol2) low2 = 0;                        \
431     else high2 = nrow2;                                   \
432     lastcol2 = col;                                       \
433     while (high2-low2 > 5) {                              \
434       t = (low2+high2)/2;                                 \
435       if (rp2[t] > col) high2 = t;                        \
436       else             low2  = t;                         \
437     }                                                     \
438     for (_i=low2; _i<high2; _i++) {                       \
439       if (rp2[_i] > col) break;                           \
440       if (rp2[_i] == col) {                               \
441         if (addv == ADD_VALUES) ap2[_i] += value;         \
442         else                    ap2[_i] = value;          \
443         goto b_noinsert;                                  \
444       }                                                   \
445     }                                                     \
446     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
447     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
448     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
449     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
450     N = nrow2++ - 1; b->nz++; high2++;                    \
451     /* shift up all the later entries in this row */      \
452     for (ii=N; ii>=_i; ii--) {                            \
453       rp2[ii+1] = rp2[ii];                                \
454       ap2[ii+1] = ap2[ii];                                \
455     }                                                     \
456     rp2[_i] = col;                                        \
457     ap2[_i] = value;                                      \
458     B->nonzerostate++;                                    \
459     b_noinsert: ;                                         \
460     bilen[row] = nrow2;                                   \
461   }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
466 {
467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
468   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
469   PetscErrorCode ierr;
470   PetscInt       l,*garray = mat->garray,diag;
471 
472   PetscFunctionBegin;
473   /* code only works for square matrices A */
474 
475   /* find size of row to the left of the diagonal part */
476   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
477   row  = row - diag;
478   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
479     if (garray[b->j[b->i[row]+l]] > diag) break;
480   }
481   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
482 
483   /* diagonal part */
484   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
485 
486   /* right of diagonal part */
487   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetValues_MPIAIJ"
493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
494 {
495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
496   PetscScalar    value;
497   PetscErrorCode ierr;
498   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
499   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
500   PetscBool      roworiented = aij->roworiented;
501 
502   /* Some Variables required in the macro */
503   Mat        A                 = aij->A;
504   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
505   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
506   MatScalar  *aa               = a->a;
507   PetscBool  ignorezeroentries = a->ignorezeroentries;
508   Mat        B                 = aij->B;
509   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
510   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
511   MatScalar  *ba               = b->a;
512 
513   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
514   PetscInt  nonew;
515   MatScalar *ap1,*ap2;
516 
517   PetscFunctionBegin;
518   for (i=0; i<m; i++) {
519     if (im[i] < 0) continue;
520 #if defined(PETSC_USE_DEBUG)
521     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
522 #endif
523     if (im[i] >= rstart && im[i] < rend) {
524       row      = im[i] - rstart;
525       lastcol1 = -1;
526       rp1      = aj + ai[row];
527       ap1      = aa + ai[row];
528       rmax1    = aimax[row];
529       nrow1    = ailen[row];
530       low1     = 0;
531       high1    = nrow1;
532       lastcol2 = -1;
533       rp2      = bj + bi[row];
534       ap2      = ba + bi[row];
535       rmax2    = bimax[row];
536       nrow2    = bilen[row];
537       low2     = 0;
538       high2    = nrow2;
539 
540       for (j=0; j<n; j++) {
541         if (roworiented) value = v[i*n+j];
542         else             value = v[i+j*m];
543         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
544         if (in[j] >= cstart && in[j] < cend) {
545           col   = in[j] - cstart;
546           nonew = a->nonew;
547           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
548         } else if (in[j] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
551 #endif
552         else {
553           if (mat->was_assembled) {
554             if (!aij->colmap) {
555               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
556             }
557 #if defined(PETSC_USE_CTABLE)
558             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
559             col--;
560 #else
561             col = aij->colmap[in[j]] - 1;
562 #endif
563             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
564               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
565               col  =  in[j];
566               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
567               B     = aij->B;
568               b     = (Mat_SeqAIJ*)B->data;
569               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
570               rp2   = bj + bi[row];
571               ap2   = ba + bi[row];
572               rmax2 = bimax[row];
573               nrow2 = bilen[row];
574               low2  = 0;
575               high2 = nrow2;
576               bm    = aij->B->rmap->n;
577               ba    = b->a;
578             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
579           } else col = in[j];
580           nonew = b->nonew;
581           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
582         }
583       }
584     } else {
585       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
586       if (!aij->donotstash) {
587         mat->assembled = PETSC_FALSE;
588         if (roworiented) {
589           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
590         } else {
591           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
592         }
593       }
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatGetValues_MPIAIJ"
601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
602 {
603   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
604   PetscErrorCode ierr;
605   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
607 
608   PetscFunctionBegin;
609   for (i=0; i<m; i++) {
610     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
611     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
612     if (idxm[i] >= rstart && idxm[i] < rend) {
613       row = idxm[i] - rstart;
614       for (j=0; j<n; j++) {
615         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
616         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
617         if (idxn[j] >= cstart && idxn[j] < cend) {
618           col  = idxn[j] - cstart;
619           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620         } else {
621           if (!aij->colmap) {
622             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
623           }
624 #if defined(PETSC_USE_CTABLE)
625           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
626           col--;
627 #else
628           col = aij->colmap[idxn[j]] - 1;
629 #endif
630           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
631           else {
632             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   /* make sure all processors are either in INSERTMODE or ADDMODE */
656   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
658   mat->insertmode = addv; /* in case this processor had no cache */
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
672   PetscErrorCode ierr;
673   PetscMPIInt    n;
674   PetscInt       i,j,rstart,ncols,flg;
675   PetscInt       *row,*col;
676   PetscBool      other_disassembled;
677   PetscScalar    *val;
678   InsertMode     addv = mat->insertmode;
679 
680   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
681 
682   PetscFunctionBegin;
683   if (!aij->donotstash && !mat->nooffprocentries) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       if (!flg) break;
687 
688       for (i=0; i<n; ) {
689         /* Now identify the consecutive vals belonging to the same row */
690         for (j=i,rstart=row[j]; j<n; j++) {
691           if (row[j] != rstart) break;
692         }
693         if (j < n) ncols = j-i;
694         else       ncols = n-i;
695         /* Now assemble all these values with a single function call */
696         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
697 
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702   }
703   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
704   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
705 
706   /* determine if any processor has disassembled, if so we must
707      also disassemble ourselfs, in order that we may reassemble. */
708   /*
709      if nonzero structure of submatrix B cannot change then we know that
710      no processor disassembled thus we can skip this stuff
711   */
712   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
713     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714     if (mat->was_assembled && !other_disassembled) {
715       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
716     }
717   }
718   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
719     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
720   }
721   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
722   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
723   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
724 
725   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
726 
727   aij->rowvalues = 0;
728 
729   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
730   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
731 
732   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
733   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
734     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
735     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
742 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
743 {
744   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
749   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
750   PetscFunctionReturn(0);
751 }
752 
753 #undef __FUNCT__
754 #define __FUNCT__ "MatZeroRows_MPIAIJ"
755 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
756 {
757   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
758   PetscInt      *owners = A->rmap->range;
759   PetscInt       n      = A->rmap->n;
760   PetscSF        sf;
761   PetscInt      *lrows;
762   PetscSFNode   *rrows;
763   PetscInt       r, p = 0, len = 0;
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   /* Create SF where leaves are input rows and roots are owned rows */
768   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
769   for (r = 0; r < n; ++r) lrows[r] = -1;
770   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
771   for (r = 0; r < N; ++r) {
772     const PetscInt idx   = rows[r];
773     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
774     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
775       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
776     }
777     if (A->nooffproczerorows) {
778       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
779       lrows[len++] = idx - owners[p];
780     } else {
781       rrows[r].rank = p;
782       rrows[r].index = rows[r] - owners[p];
783     }
784   }
785   if (!A->nooffproczerorows) {
786     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
787     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
788     /* Collect flags for rows to be zeroed */
789     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
790     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
791     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
792     /* Compress and put in row numbers */
793     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
794   }
795   /* fix right hand side if needed */
796   if (x && b) {
797     const PetscScalar *xx;
798     PetscScalar       *bb;
799 
800     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
801     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
802     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
803     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
804     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
805   }
806   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
807   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
808   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
809     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
810   } else if (diag != 0.0) {
811     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
813     for (r = 0; r < len; ++r) {
814       const PetscInt row = lrows[r] + A->rmap->rstart;
815       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
816     }
817     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
818     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
819   } else {
820     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
821   }
822   ierr = PetscFree(lrows);CHKERRQ(ierr);
823 
824   /* only change matrix nonzero state if pattern was allowed to be changed */
825   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
826     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
827     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
828   }
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
834 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
835 {
836   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
837   PetscErrorCode    ierr;
838   PetscMPIInt       n = A->rmap->n;
839   PetscInt          i,j,r,m,p = 0,len = 0;
840   PetscInt          *lrows,*owners = A->rmap->range;
841   PetscSFNode       *rrows;
842   PetscSF           sf;
843   const PetscScalar *xx;
844   PetscScalar       *bb,*mask;
845   Vec               xmask,lmask;
846   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
847   const PetscInt    *aj, *ii,*ridx;
848   PetscScalar       *aa;
849 
850   PetscFunctionBegin;
851   /* Create SF where leaves are input rows and roots are owned rows */
852   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
853   for (r = 0; r < n; ++r) lrows[r] = -1;
854   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
855   for (r = 0; r < N; ++r) {
856     const PetscInt idx   = rows[r];
857     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
858     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
859       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
860     }
861     rrows[r].rank  = p;
862     rrows[r].index = rows[r] - owners[p];
863   }
864   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
865   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
866   /* Collect flags for rows to be zeroed */
867   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
868   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
869   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
870   /* Compress and put in row numbers */
871   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
872   /* zero diagonal part of matrix */
873   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
874   /* handle off diagonal part of matrix */
875   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
876   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
877   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
878   for (i=0; i<len; i++) bb[lrows[i]] = 1;
879   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
880   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
883   if (x) {
884     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
887     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
888   }
889   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
890   /* remove zeroed rows of off diagonal matrix */
891   ii = aij->i;
892   for (i=0; i<len; i++) {
893     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
894   }
895   /* loop over all elements of off process part of matrix zeroing removed columns*/
896   if (aij->compressedrow.use) {
897     m    = aij->compressedrow.nrows;
898     ii   = aij->compressedrow.i;
899     ridx = aij->compressedrow.rindex;
900     for (i=0; i<m; i++) {
901       n  = ii[i+1] - ii[i];
902       aj = aij->j + ii[i];
903       aa = aij->a + ii[i];
904 
905       for (j=0; j<n; j++) {
906         if (PetscAbsScalar(mask[*aj])) {
907           if (b) bb[*ridx] -= *aa*xx[*aj];
908           *aa = 0.0;
909         }
910         aa++;
911         aj++;
912       }
913       ridx++;
914     }
915   } else { /* do not use compressed row format */
916     m = l->B->rmap->n;
917     for (i=0; i<m; i++) {
918       n  = ii[i+1] - ii[i];
919       aj = aij->j + ii[i];
920       aa = aij->a + ii[i];
921       for (j=0; j<n; j++) {
922         if (PetscAbsScalar(mask[*aj])) {
923           if (b) bb[i] -= *aa*xx[*aj];
924           *aa = 0.0;
925         }
926         aa++;
927         aj++;
928       }
929     }
930   }
931   if (x) {
932     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
933     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
934   }
935   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
936   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
937   ierr = PetscFree(lrows);CHKERRQ(ierr);
938 
939   /* only change matrix nonzero state if pattern was allowed to be changed */
940   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
941     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
942     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
943   }
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatMult_MPIAIJ"
949 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
950 {
951   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
952   PetscErrorCode ierr;
953   PetscInt       nt;
954 
955   PetscFunctionBegin;
956   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
957   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
958   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
960   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
967 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
968 {
969   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatMultAdd_MPIAIJ"
979 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
980 {
981   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
986   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
987   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
988   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
994 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
995 {
996   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
997   PetscErrorCode ierr;
998   PetscBool      merged;
999 
1000   PetscFunctionBegin;
1001   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1002   /* do nondiagonal part */
1003   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1004   if (!merged) {
1005     /* send it on its way */
1006     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1007     /* do local part */
1008     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1009     /* receive remote parts: note this assumes the values are not actually */
1010     /* added in yy until the next line, */
1011     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1012   } else {
1013     /* do local part */
1014     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1015     /* send it on its way */
1016     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1017     /* values actually were received in the Begin() but we need to call this nop */
1018     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1019   }
1020   PetscFunctionReturn(0);
1021 }
1022 
1023 #undef __FUNCT__
1024 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1025 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1026 {
1027   MPI_Comm       comm;
1028   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1029   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1030   IS             Me,Notme;
1031   PetscErrorCode ierr;
1032   PetscInt       M,N,first,last,*notme,i;
1033   PetscMPIInt    size;
1034 
1035   PetscFunctionBegin;
1036   /* Easy test: symmetric diagonal block */
1037   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1038   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1039   if (!*f) PetscFunctionReturn(0);
1040   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1041   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1042   if (size == 1) PetscFunctionReturn(0);
1043 
1044   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1045   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1046   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1047   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1048   for (i=0; i<first; i++) notme[i] = i;
1049   for (i=last; i<M; i++) notme[i-last+first] = i;
1050   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1051   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1052   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1053   Aoff = Aoffs[0];
1054   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1055   Boff = Boffs[0];
1056   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1057   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1058   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1059   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1060   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1061   ierr = PetscFree(notme);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1067 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1068 {
1069   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1070   PetscErrorCode ierr;
1071 
1072   PetscFunctionBegin;
1073   /* do nondiagonal part */
1074   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1075   /* send it on its way */
1076   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1077   /* do local part */
1078   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1079   /* receive remote parts */
1080   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 /*
1085   This only works correctly for square matrices where the subblock A->A is the
1086    diagonal block
1087 */
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1090 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1091 {
1092   PetscErrorCode ierr;
1093   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1094 
1095   PetscFunctionBegin;
1096   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1097   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1098   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatScale_MPIAIJ"
1104 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1105 {
1106   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1111   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatDestroy_Redundant"
1117 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1118 {
1119   PetscErrorCode ierr;
1120   Mat_Redundant  *redund = *redundant;
1121   PetscInt       i;
1122 
1123   PetscFunctionBegin;
1124   *redundant = NULL;
1125   if (redund){
1126     if (redund->matseq) { /* via MatGetSubMatrices()  */
1127       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1128       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1129       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1130       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1131     } else {
1132       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1133       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1134       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1135       for (i=0; i<redund->nrecvs; i++) {
1136         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1137         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1138       }
1139       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1140     }
1141 
1142     if (redund->psubcomm) {
1143       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1144     }
1145     ierr = PetscFree(redund);CHKERRQ(ierr);
1146   }
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 #undef __FUNCT__
1151 #define __FUNCT__ "MatDestroy_MPIAIJ"
1152 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1153 {
1154   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1155   PetscErrorCode ierr;
1156 
1157   PetscFunctionBegin;
1158 #if defined(PETSC_USE_LOG)
1159   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1160 #endif
1161   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1162   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1163   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1164   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1165   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1166 #if defined(PETSC_USE_CTABLE)
1167   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1168 #else
1169   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1170 #endif
1171   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1172   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1173   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1174   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1175   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1176   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1177 
1178   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1179   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1180   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1181   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1182   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1187 #if defined(PETSC_HAVE_ELEMENTAL)
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1189 #endif
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1195 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1196 {
1197   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1198   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1199   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1200   PetscErrorCode ierr;
1201   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1202   int            fd;
1203   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1204   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1205   PetscScalar    *column_values;
1206   PetscInt       message_count,flowcontrolcount;
1207   FILE           *file;
1208 
1209   PetscFunctionBegin;
1210   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1211   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1212   nz   = A->nz + B->nz;
1213   if (!rank) {
1214     header[0] = MAT_FILE_CLASSID;
1215     header[1] = mat->rmap->N;
1216     header[2] = mat->cmap->N;
1217 
1218     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1220     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1221     /* get largest number of rows any processor has */
1222     rlen  = mat->rmap->n;
1223     range = mat->rmap->range;
1224     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1225   } else {
1226     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227     rlen = mat->rmap->n;
1228   }
1229 
1230   /* load up the local row counts */
1231   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1232   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1233 
1234   /* store the row lengths to the file */
1235   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1236   if (!rank) {
1237     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1238     for (i=1; i<size; i++) {
1239       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1240       rlen = range[i+1] - range[i];
1241       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1242       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1243     }
1244     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1245   } else {
1246     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1247     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1248     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1249   }
1250   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1251 
1252   /* load up the local column indices */
1253   nzmax = nz; /* th processor needs space a largest processor needs */
1254   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1255   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1256   cnt   = 0;
1257   for (i=0; i<mat->rmap->n; i++) {
1258     for (j=B->i[i]; j<B->i[i+1]; j++) {
1259       if ((col = garray[B->j[j]]) > cstart) break;
1260       column_indices[cnt++] = col;
1261     }
1262     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1263     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1264   }
1265   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1266 
1267   /* store the column indices to the file */
1268   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1269   if (!rank) {
1270     MPI_Status status;
1271     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1272     for (i=1; i<size; i++) {
1273       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1274       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1275       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1276       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1277       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1278     }
1279     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1280   } else {
1281     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1282     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1283     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1284     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1285   }
1286   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1287 
1288   /* load up the local column values */
1289   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1290   cnt  = 0;
1291   for (i=0; i<mat->rmap->n; i++) {
1292     for (j=B->i[i]; j<B->i[i+1]; j++) {
1293       if (garray[B->j[j]] > cstart) break;
1294       column_values[cnt++] = B->a[j];
1295     }
1296     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1297     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1298   }
1299   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1300 
1301   /* store the column values to the file */
1302   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1303   if (!rank) {
1304     MPI_Status status;
1305     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1306     for (i=1; i<size; i++) {
1307       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1308       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1309       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1310       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1311       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1312     }
1313     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1314   } else {
1315     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1316     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1317     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1318     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1319   }
1320   ierr = PetscFree(column_values);CHKERRQ(ierr);
1321 
1322   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1323   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #include <petscdraw.h>
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1330 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1331 {
1332   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1333   PetscErrorCode    ierr;
1334   PetscMPIInt       rank = aij->rank,size = aij->size;
1335   PetscBool         isdraw,iascii,isbinary;
1336   PetscViewer       sviewer;
1337   PetscViewerFormat format;
1338 
1339   PetscFunctionBegin;
1340   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1341   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1342   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1343   if (iascii) {
1344     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1345     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1346       MatInfo   info;
1347       PetscBool inodes;
1348 
1349       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1350       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1351       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1352       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1353       if (!inodes) {
1354         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1355                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1356       } else {
1357         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1358                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1359       }
1360       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1361       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1362       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1363       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1364       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1365       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1366       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1367       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1368       PetscFunctionReturn(0);
1369     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1370       PetscInt inodecount,inodelimit,*inodes;
1371       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1372       if (inodes) {
1373         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1374       } else {
1375         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1376       }
1377       PetscFunctionReturn(0);
1378     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1379       PetscFunctionReturn(0);
1380     }
1381   } else if (isbinary) {
1382     if (size == 1) {
1383       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1384       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1385     } else {
1386       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1387     }
1388     PetscFunctionReturn(0);
1389   } else if (isdraw) {
1390     PetscDraw draw;
1391     PetscBool isnull;
1392     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1393     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1394   }
1395 
1396   {
1397     /* assemble the entire matrix onto first processor. */
1398     Mat        A;
1399     Mat_SeqAIJ *Aloc;
1400     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1401     MatScalar  *a;
1402 
1403     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1404     if (!rank) {
1405       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1406     } else {
1407       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1408     }
1409     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1410     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1411     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1412     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1413     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1414 
1415     /* copy over the A part */
1416     Aloc = (Mat_SeqAIJ*)aij->A->data;
1417     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1418     row  = mat->rmap->rstart;
1419     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1420     for (i=0; i<m; i++) {
1421       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1422       row++;
1423       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1424     }
1425     aj = Aloc->j;
1426     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1427 
1428     /* copy over the B part */
1429     Aloc = (Mat_SeqAIJ*)aij->B->data;
1430     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1431     row  = mat->rmap->rstart;
1432     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1433     ct   = cols;
1434     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1435     for (i=0; i<m; i++) {
1436       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1437       row++;
1438       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1439     }
1440     ierr = PetscFree(ct);CHKERRQ(ierr);
1441     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1442     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1443     /*
1444        Everyone has to call to draw the matrix since the graphics waits are
1445        synchronized across all processors that share the PetscDraw object
1446     */
1447     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1448     if (!rank) {
1449       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1450     }
1451     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1452     ierr = MatDestroy(&A);CHKERRQ(ierr);
1453   }
1454   PetscFunctionReturn(0);
1455 }
1456 
1457 #undef __FUNCT__
1458 #define __FUNCT__ "MatView_MPIAIJ"
1459 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1460 {
1461   PetscErrorCode ierr;
1462   PetscBool      iascii,isdraw,issocket,isbinary;
1463 
1464   PetscFunctionBegin;
1465   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1469   if (iascii || isdraw || isbinary || issocket) {
1470     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1471   }
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatSOR_MPIAIJ"
1477 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1478 {
1479   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1480   PetscErrorCode ierr;
1481   Vec            bb1 = 0;
1482   PetscBool      hasop;
1483 
1484   PetscFunctionBegin;
1485   if (flag == SOR_APPLY_UPPER) {
1486     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1487     PetscFunctionReturn(0);
1488   }
1489 
1490   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1491     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1492   }
1493 
1494   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1495     if (flag & SOR_ZERO_INITIAL_GUESS) {
1496       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1497       its--;
1498     }
1499 
1500     while (its--) {
1501       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1502       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503 
1504       /* update rhs: bb1 = bb - B*x */
1505       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1506       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1507 
1508       /* local sweep */
1509       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1510     }
1511   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1512     if (flag & SOR_ZERO_INITIAL_GUESS) {
1513       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1514       its--;
1515     }
1516     while (its--) {
1517       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519 
1520       /* update rhs: bb1 = bb - B*x */
1521       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1522       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1523 
1524       /* local sweep */
1525       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1526     }
1527   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1528     if (flag & SOR_ZERO_INITIAL_GUESS) {
1529       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1530       its--;
1531     }
1532     while (its--) {
1533       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1534       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535 
1536       /* update rhs: bb1 = bb - B*x */
1537       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1538       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1539 
1540       /* local sweep */
1541       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1542     }
1543   } else if (flag & SOR_EISENSTAT) {
1544     Vec xx1;
1545 
1546     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1547     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1548 
1549     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1550     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     if (!mat->diag) {
1552       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1553       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1554     }
1555     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1556     if (hasop) {
1557       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1558     } else {
1559       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1560     }
1561     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1562 
1563     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1564 
1565     /* local sweep */
1566     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1567     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1568     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1569   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1570 
1571   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatPermute_MPIAIJ"
1577 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1578 {
1579   Mat            aA,aB,Aperm;
1580   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1581   PetscScalar    *aa,*ba;
1582   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1583   PetscSF        rowsf,sf;
1584   IS             parcolp = NULL;
1585   PetscBool      done;
1586   PetscErrorCode ierr;
1587 
1588   PetscFunctionBegin;
1589   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1590   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1591   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1592   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1593 
1594   /* Invert row permutation to find out where my rows should go */
1595   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1596   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1597   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1598   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1599   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1600   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1601 
1602   /* Invert column permutation to find out where my columns should go */
1603   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1604   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1605   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1606   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1607   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1608   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1609   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1610 
1611   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1612   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1613   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1614 
1615   /* Find out where my gcols should go */
1616   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1617   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1618   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1619   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1620   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1621   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1622   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1623   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1624 
1625   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1626   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1627   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1628   for (i=0; i<m; i++) {
1629     PetscInt row = rdest[i],rowner;
1630     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1631     for (j=ai[i]; j<ai[i+1]; j++) {
1632       PetscInt cowner,col = cdest[aj[j]];
1633       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1634       if (rowner == cowner) dnnz[i]++;
1635       else onnz[i]++;
1636     }
1637     for (j=bi[i]; j<bi[i+1]; j++) {
1638       PetscInt cowner,col = gcdest[bj[j]];
1639       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1640       if (rowner == cowner) dnnz[i]++;
1641       else onnz[i]++;
1642     }
1643   }
1644   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1645   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1646   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1647   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1648   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1649 
1650   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1651   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1652   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1653   for (i=0; i<m; i++) {
1654     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1655     PetscInt j0,rowlen;
1656     rowlen = ai[i+1] - ai[i];
1657     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1658       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1659       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1660     }
1661     rowlen = bi[i+1] - bi[i];
1662     for (j0=j=0; j<rowlen; j0=j) {
1663       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1664       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1665     }
1666   }
1667   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1670   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1671   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1672   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1673   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1674   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1675   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1676   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1677   *B = Aperm;
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1683 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1684 {
1685   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1686   Mat            A    = mat->A,B = mat->B;
1687   PetscErrorCode ierr;
1688   PetscReal      isend[5],irecv[5];
1689 
1690   PetscFunctionBegin;
1691   info->block_size = 1.0;
1692   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1693 
1694   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1695   isend[3] = info->memory;  isend[4] = info->mallocs;
1696 
1697   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1698 
1699   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1700   isend[3] += info->memory;  isend[4] += info->mallocs;
1701   if (flag == MAT_LOCAL) {
1702     info->nz_used      = isend[0];
1703     info->nz_allocated = isend[1];
1704     info->nz_unneeded  = isend[2];
1705     info->memory       = isend[3];
1706     info->mallocs      = isend[4];
1707   } else if (flag == MAT_GLOBAL_MAX) {
1708     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1709 
1710     info->nz_used      = irecv[0];
1711     info->nz_allocated = irecv[1];
1712     info->nz_unneeded  = irecv[2];
1713     info->memory       = irecv[3];
1714     info->mallocs      = irecv[4];
1715   } else if (flag == MAT_GLOBAL_SUM) {
1716     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1717 
1718     info->nz_used      = irecv[0];
1719     info->nz_allocated = irecv[1];
1720     info->nz_unneeded  = irecv[2];
1721     info->memory       = irecv[3];
1722     info->mallocs      = irecv[4];
1723   }
1724   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1725   info->fill_ratio_needed = 0;
1726   info->factor_mallocs    = 0;
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatSetOption_MPIAIJ"
1732 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1733 {
1734   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1735   PetscErrorCode ierr;
1736 
1737   PetscFunctionBegin;
1738   switch (op) {
1739   case MAT_NEW_NONZERO_LOCATIONS:
1740   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1741   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1742   case MAT_KEEP_NONZERO_PATTERN:
1743   case MAT_NEW_NONZERO_LOCATION_ERR:
1744   case MAT_USE_INODES:
1745   case MAT_IGNORE_ZERO_ENTRIES:
1746     MatCheckPreallocated(A,1);
1747     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1748     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1749     break;
1750   case MAT_ROW_ORIENTED:
1751     a->roworiented = flg;
1752 
1753     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1754     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1755     break;
1756   case MAT_NEW_DIAGONALS:
1757     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1758     break;
1759   case MAT_IGNORE_OFF_PROC_ENTRIES:
1760     a->donotstash = flg;
1761     break;
1762   case MAT_SPD:
1763     A->spd_set = PETSC_TRUE;
1764     A->spd     = flg;
1765     if (flg) {
1766       A->symmetric                  = PETSC_TRUE;
1767       A->structurally_symmetric     = PETSC_TRUE;
1768       A->symmetric_set              = PETSC_TRUE;
1769       A->structurally_symmetric_set = PETSC_TRUE;
1770     }
1771     break;
1772   case MAT_SYMMETRIC:
1773     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1774     break;
1775   case MAT_STRUCTURALLY_SYMMETRIC:
1776     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1777     break;
1778   case MAT_HERMITIAN:
1779     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1780     break;
1781   case MAT_SYMMETRY_ETERNAL:
1782     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1783     break;
1784   default:
1785     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1786   }
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatGetRow_MPIAIJ"
1792 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1793 {
1794   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1795   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1796   PetscErrorCode ierr;
1797   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1798   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1799   PetscInt       *cmap,*idx_p;
1800 
1801   PetscFunctionBegin;
1802   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1803   mat->getrowactive = PETSC_TRUE;
1804 
1805   if (!mat->rowvalues && (idx || v)) {
1806     /*
1807         allocate enough space to hold information from the longest row.
1808     */
1809     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1810     PetscInt   max = 1,tmp;
1811     for (i=0; i<matin->rmap->n; i++) {
1812       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1813       if (max < tmp) max = tmp;
1814     }
1815     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1816   }
1817 
1818   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1819   lrow = row - rstart;
1820 
1821   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1822   if (!v)   {pvA = 0; pvB = 0;}
1823   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1824   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1825   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1826   nztot = nzA + nzB;
1827 
1828   cmap = mat->garray;
1829   if (v  || idx) {
1830     if (nztot) {
1831       /* Sort by increasing column numbers, assuming A and B already sorted */
1832       PetscInt imark = -1;
1833       if (v) {
1834         *v = v_p = mat->rowvalues;
1835         for (i=0; i<nzB; i++) {
1836           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1837           else break;
1838         }
1839         imark = i;
1840         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1841         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1842       }
1843       if (idx) {
1844         *idx = idx_p = mat->rowindices;
1845         if (imark > -1) {
1846           for (i=0; i<imark; i++) {
1847             idx_p[i] = cmap[cworkB[i]];
1848           }
1849         } else {
1850           for (i=0; i<nzB; i++) {
1851             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1852             else break;
1853           }
1854           imark = i;
1855         }
1856         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1857         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1858       }
1859     } else {
1860       if (idx) *idx = 0;
1861       if (v)   *v   = 0;
1862     }
1863   }
1864   *nz  = nztot;
1865   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1866   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1872 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1873 {
1874   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1875 
1876   PetscFunctionBegin;
1877   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1878   aij->getrowactive = PETSC_FALSE;
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatNorm_MPIAIJ"
1884 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1885 {
1886   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1887   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1888   PetscErrorCode ierr;
1889   PetscInt       i,j,cstart = mat->cmap->rstart;
1890   PetscReal      sum = 0.0;
1891   MatScalar      *v;
1892 
1893   PetscFunctionBegin;
1894   if (aij->size == 1) {
1895     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1896   } else {
1897     if (type == NORM_FROBENIUS) {
1898       v = amat->a;
1899       for (i=0; i<amat->nz; i++) {
1900         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1901       }
1902       v = bmat->a;
1903       for (i=0; i<bmat->nz; i++) {
1904         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1905       }
1906       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1907       *norm = PetscSqrtReal(*norm);
1908     } else if (type == NORM_1) { /* max column norm */
1909       PetscReal *tmp,*tmp2;
1910       PetscInt  *jj,*garray = aij->garray;
1911       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1912       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1913       *norm = 0.0;
1914       v     = amat->a; jj = amat->j;
1915       for (j=0; j<amat->nz; j++) {
1916         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1917       }
1918       v = bmat->a; jj = bmat->j;
1919       for (j=0; j<bmat->nz; j++) {
1920         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1921       }
1922       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1923       for (j=0; j<mat->cmap->N; j++) {
1924         if (tmp2[j] > *norm) *norm = tmp2[j];
1925       }
1926       ierr = PetscFree(tmp);CHKERRQ(ierr);
1927       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1928     } else if (type == NORM_INFINITY) { /* max row norm */
1929       PetscReal ntemp = 0.0;
1930       for (j=0; j<aij->A->rmap->n; j++) {
1931         v   = amat->a + amat->i[j];
1932         sum = 0.0;
1933         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1934           sum += PetscAbsScalar(*v); v++;
1935         }
1936         v = bmat->a + bmat->i[j];
1937         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1938           sum += PetscAbsScalar(*v); v++;
1939         }
1940         if (sum > ntemp) ntemp = sum;
1941       }
1942       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1943     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatTranspose_MPIAIJ"
1950 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1951 {
1952   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1953   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1954   PetscErrorCode ierr;
1955   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1956   PetscInt       cstart = A->cmap->rstart,ncol;
1957   Mat            B;
1958   MatScalar      *array;
1959 
1960   PetscFunctionBegin;
1961   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1962 
1963   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1964   ai = Aloc->i; aj = Aloc->j;
1965   bi = Bloc->i; bj = Bloc->j;
1966   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1967     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1968     PetscSFNode          *oloc;
1969     PETSC_UNUSED PetscSF sf;
1970 
1971     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1972     /* compute d_nnz for preallocation */
1973     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1974     for (i=0; i<ai[ma]; i++) {
1975       d_nnz[aj[i]]++;
1976       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1977     }
1978     /* compute local off-diagonal contributions */
1979     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1980     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1981     /* map those to global */
1982     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1983     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1984     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1985     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1986     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1987     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1988     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1989 
1990     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1991     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1992     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1993     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1994     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1995     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1996   } else {
1997     B    = *matout;
1998     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1999     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2000   }
2001 
2002   /* copy over the A part */
2003   array = Aloc->a;
2004   row   = A->rmap->rstart;
2005   for (i=0; i<ma; i++) {
2006     ncol = ai[i+1]-ai[i];
2007     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2008     row++;
2009     array += ncol; aj += ncol;
2010   }
2011   aj = Aloc->j;
2012   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2013 
2014   /* copy over the B part */
2015   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2016   array = Bloc->a;
2017   row   = A->rmap->rstart;
2018   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2019   cols_tmp = cols;
2020   for (i=0; i<mb; i++) {
2021     ncol = bi[i+1]-bi[i];
2022     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2023     row++;
2024     array += ncol; cols_tmp += ncol;
2025   }
2026   ierr = PetscFree(cols);CHKERRQ(ierr);
2027 
2028   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2029   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2030   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2031     *matout = B;
2032   } else {
2033     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2034   }
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 #undef __FUNCT__
2039 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2040 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2041 {
2042   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2043   Mat            a    = aij->A,b = aij->B;
2044   PetscErrorCode ierr;
2045   PetscInt       s1,s2,s3;
2046 
2047   PetscFunctionBegin;
2048   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2049   if (rr) {
2050     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2051     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2052     /* Overlap communication with computation. */
2053     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2054   }
2055   if (ll) {
2056     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2057     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2058     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2059   }
2060   /* scale  the diagonal block */
2061   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2062 
2063   if (rr) {
2064     /* Do a scatter end and then right scale the off-diagonal block */
2065     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2066     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2067   }
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2073 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2074 {
2075   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2076   PetscErrorCode ierr;
2077 
2078   PetscFunctionBegin;
2079   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 #undef __FUNCT__
2084 #define __FUNCT__ "MatEqual_MPIAIJ"
2085 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2086 {
2087   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2088   Mat            a,b,c,d;
2089   PetscBool      flg;
2090   PetscErrorCode ierr;
2091 
2092   PetscFunctionBegin;
2093   a = matA->A; b = matA->B;
2094   c = matB->A; d = matB->B;
2095 
2096   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2097   if (flg) {
2098     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2099   }
2100   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2101   PetscFunctionReturn(0);
2102 }
2103 
2104 #undef __FUNCT__
2105 #define __FUNCT__ "MatCopy_MPIAIJ"
2106 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2107 {
2108   PetscErrorCode ierr;
2109   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2110   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2111 
2112   PetscFunctionBegin;
2113   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2114   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2115     /* because of the column compression in the off-processor part of the matrix a->B,
2116        the number of columns in a->B and b->B may be different, hence we cannot call
2117        the MatCopy() directly on the two parts. If need be, we can provide a more
2118        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2119        then copying the submatrices */
2120     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2121   } else {
2122     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2123     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2124   }
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #undef __FUNCT__
2129 #define __FUNCT__ "MatSetUp_MPIAIJ"
2130 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2131 {
2132   PetscErrorCode ierr;
2133 
2134   PetscFunctionBegin;
2135   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 /*
2140    Computes the number of nonzeros per row needed for preallocation when X and Y
2141    have different nonzero structure.
2142 */
2143 #undef __FUNCT__
2144 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2145 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2146 {
2147   PetscInt       i,j,k,nzx,nzy;
2148 
2149   PetscFunctionBegin;
2150   /* Set the number of nonzeros in the new matrix */
2151   for (i=0; i<m; i++) {
2152     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2153     nzx = xi[i+1] - xi[i];
2154     nzy = yi[i+1] - yi[i];
2155     nnz[i] = 0;
2156     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2157       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2158       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2159       nnz[i]++;
2160     }
2161     for (; k<nzy; k++) nnz[i]++;
2162   }
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2169 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2170 {
2171   PetscErrorCode ierr;
2172   PetscInt       m = Y->rmap->N;
2173   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2174   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2175 
2176   PetscFunctionBegin;
2177   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatAXPY_MPIAIJ"
2183 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2184 {
2185   PetscErrorCode ierr;
2186   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2187   PetscBLASInt   bnz,one=1;
2188   Mat_SeqAIJ     *x,*y;
2189 
2190   PetscFunctionBegin;
2191   if (str == SAME_NONZERO_PATTERN) {
2192     PetscScalar alpha = a;
2193     x    = (Mat_SeqAIJ*)xx->A->data;
2194     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2195     y    = (Mat_SeqAIJ*)yy->A->data;
2196     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2197     x    = (Mat_SeqAIJ*)xx->B->data;
2198     y    = (Mat_SeqAIJ*)yy->B->data;
2199     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2200     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2201     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2202   } else {
2203     Mat      B;
2204     PetscInt *nnz_d,*nnz_o;
2205     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2206     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2207     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2208     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2209     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2210     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2211     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2212     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2213     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2214     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2215     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2216     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2217     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2218     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2219   }
2220   PetscFunctionReturn(0);
2221 }
2222 
2223 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2224 
2225 #undef __FUNCT__
2226 #define __FUNCT__ "MatConjugate_MPIAIJ"
2227 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2228 {
2229 #if defined(PETSC_USE_COMPLEX)
2230   PetscErrorCode ierr;
2231   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2232 
2233   PetscFunctionBegin;
2234   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2235   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2236 #else
2237   PetscFunctionBegin;
2238 #endif
2239   PetscFunctionReturn(0);
2240 }
2241 
2242 #undef __FUNCT__
2243 #define __FUNCT__ "MatRealPart_MPIAIJ"
2244 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2245 {
2246   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2247   PetscErrorCode ierr;
2248 
2249   PetscFunctionBegin;
2250   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2251   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 #undef __FUNCT__
2256 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2257 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2258 {
2259   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2260   PetscErrorCode ierr;
2261 
2262   PetscFunctionBegin;
2263   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2264   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #if defined(PETSC_HAVE_PBGL)
2269 
2270 #include <boost/parallel/mpi/bsp_process_group.hpp>
2271 #include <boost/graph/distributed/ilu_default_graph.hpp>
2272 #include <boost/graph/distributed/ilu_0_block.hpp>
2273 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2274 #include <boost/graph/distributed/petsc/interface.hpp>
2275 #include <boost/multi_array.hpp>
2276 #include <boost/parallel/distributed_property_map->hpp>
2277 
2278 #undef __FUNCT__
2279 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2280 /*
2281   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2282 */
2283 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2284 {
2285   namespace petsc = boost::distributed::petsc;
2286 
2287   namespace graph_dist = boost::graph::distributed;
2288   using boost::graph::distributed::ilu_default::process_group_type;
2289   using boost::graph::ilu_permuted;
2290 
2291   PetscBool      row_identity, col_identity;
2292   PetscContainer c;
2293   PetscInt       m, n, M, N;
2294   PetscErrorCode ierr;
2295 
2296   PetscFunctionBegin;
2297   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2298   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2299   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2300   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2301 
2302   process_group_type pg;
2303   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2304   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2305   lgraph_type& level_graph = *lgraph_p;
2306   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2307 
2308   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2309   ilu_permuted(level_graph);
2310 
2311   /* put together the new matrix */
2312   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2313   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2314   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2315   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2316   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2317   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2318   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2319   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2320 
2321   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2322   ierr = PetscContainerSetPointer(c, lgraph_p);
2323   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2324   ierr = PetscContainerDestroy(&c);
2325   PetscFunctionReturn(0);
2326 }
2327 
2328 #undef __FUNCT__
2329 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2330 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2331 {
2332   PetscFunctionBegin;
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatSolve_MPIAIJ"
2338 /*
2339   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2340 */
2341 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2342 {
2343   namespace graph_dist = boost::graph::distributed;
2344 
2345   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2346   lgraph_type    *lgraph_p;
2347   PetscContainer c;
2348   PetscErrorCode ierr;
2349 
2350   PetscFunctionBegin;
2351   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2352   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2353   ierr = VecCopy(b, x);CHKERRQ(ierr);
2354 
2355   PetscScalar *array_x;
2356   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2357   PetscInt sx;
2358   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2359 
2360   PetscScalar *array_b;
2361   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2362   PetscInt sb;
2363   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2364 
2365   lgraph_type& level_graph = *lgraph_p;
2366   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2367 
2368   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2369   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2370   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2371 
2372   typedef boost::iterator_property_map<array_ref_type::iterator,
2373                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2374   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2375   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2376 
2377   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2378   PetscFunctionReturn(0);
2379 }
2380 #endif
2381 
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2385 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2386 {
2387   PetscMPIInt    rank,size;
2388   MPI_Comm       comm;
2389   PetscErrorCode ierr;
2390   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2391   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2392   PetscInt       *rowrange = mat->rmap->range;
2393   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2394   Mat            A = aij->A,B=aij->B,C=*matredundant;
2395   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2396   PetscScalar    *sbuf_a;
2397   PetscInt       nzlocal=a->nz+b->nz;
2398   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2399   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2400   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2401   MatScalar      *aworkA,*aworkB;
2402   PetscScalar    *vals;
2403   PetscMPIInt    tag1,tag2,tag3,imdex;
2404   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2405   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2406   MPI_Status     recv_status,*send_status;
2407   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2408   PetscInt       **rbuf_j=NULL;
2409   PetscScalar    **rbuf_a=NULL;
2410   Mat_Redundant  *redund =NULL;
2411 
2412   PetscFunctionBegin;
2413   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2414   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2415   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2416   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2417   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2418 
2419   if (reuse == MAT_REUSE_MATRIX) {
2420     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2421     if (subsize == 1) {
2422       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2423       redund = c->redundant;
2424     } else {
2425       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2426       redund = c->redundant;
2427     }
2428     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2429 
2430     nsends    = redund->nsends;
2431     nrecvs    = redund->nrecvs;
2432     send_rank = redund->send_rank;
2433     recv_rank = redund->recv_rank;
2434     sbuf_nz   = redund->sbuf_nz;
2435     rbuf_nz   = redund->rbuf_nz;
2436     sbuf_j    = redund->sbuf_j;
2437     sbuf_a    = redund->sbuf_a;
2438     rbuf_j    = redund->rbuf_j;
2439     rbuf_a    = redund->rbuf_a;
2440   }
2441 
2442   if (reuse == MAT_INITIAL_MATRIX) {
2443     PetscInt    nleftover,np_subcomm;
2444 
2445     /* get the destination processors' id send_rank, nsends and nrecvs */
2446     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2447 
2448     np_subcomm = size/nsubcomm;
2449     nleftover  = size - nsubcomm*np_subcomm;
2450 
2451     /* block of codes below is specific for INTERLACED */
2452     /* ------------------------------------------------*/
2453     nsends = 0; nrecvs = 0;
2454     for (i=0; i<size; i++) {
2455       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2456         send_rank[nsends++] = i;
2457         recv_rank[nrecvs++] = i;
2458       }
2459     }
2460     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2461       i = size-nleftover-1;
2462       j = 0;
2463       while (j < nsubcomm - nleftover) {
2464         send_rank[nsends++] = i;
2465         i--; j++;
2466       }
2467     }
2468 
2469     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2470       for (i=0; i<nleftover; i++) {
2471         recv_rank[nrecvs++] = size-nleftover+i;
2472       }
2473     }
2474     /*----------------------------------------------*/
2475 
2476     /* allocate sbuf_j, sbuf_a */
2477     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2478     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2479     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2480     /*
2481     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2482     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2483      */
2484   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2485 
2486   /* copy mat's local entries into the buffers */
2487   if (reuse == MAT_INITIAL_MATRIX) {
2488     rownz_max = 0;
2489     rptr      = sbuf_j;
2490     cols      = sbuf_j + rend-rstart + 1;
2491     vals      = sbuf_a;
2492     rptr[0]   = 0;
2493     for (i=0; i<rend-rstart; i++) {
2494       row    = i + rstart;
2495       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2496       ncols  = nzA + nzB;
2497       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2498       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2499       /* load the column indices for this row into cols */
2500       lwrite = 0;
2501       for (l=0; l<nzB; l++) {
2502         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2503           vals[lwrite]   = aworkB[l];
2504           cols[lwrite++] = ctmp;
2505         }
2506       }
2507       for (l=0; l<nzA; l++) {
2508         vals[lwrite]   = aworkA[l];
2509         cols[lwrite++] = cstart + cworkA[l];
2510       }
2511       for (l=0; l<nzB; l++) {
2512         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2513           vals[lwrite]   = aworkB[l];
2514           cols[lwrite++] = ctmp;
2515         }
2516       }
2517       vals     += ncols;
2518       cols     += ncols;
2519       rptr[i+1] = rptr[i] + ncols;
2520       if (rownz_max < ncols) rownz_max = ncols;
2521     }
2522     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2523   } else { /* only copy matrix values into sbuf_a */
2524     rptr    = sbuf_j;
2525     vals    = sbuf_a;
2526     rptr[0] = 0;
2527     for (i=0; i<rend-rstart; i++) {
2528       row    = i + rstart;
2529       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2530       ncols  = nzA + nzB;
2531       cworkB = b->j + b->i[i];
2532       aworkA = a->a + a->i[i];
2533       aworkB = b->a + b->i[i];
2534       lwrite = 0;
2535       for (l=0; l<nzB; l++) {
2536         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2537       }
2538       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2539       for (l=0; l<nzB; l++) {
2540         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2541       }
2542       vals     += ncols;
2543       rptr[i+1] = rptr[i] + ncols;
2544     }
2545   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2546 
2547   /* send nzlocal to others, and recv other's nzlocal */
2548   /*--------------------------------------------------*/
2549   if (reuse == MAT_INITIAL_MATRIX) {
2550     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2551 
2552     s_waits2 = s_waits3 + nsends;
2553     s_waits1 = s_waits2 + nsends;
2554     r_waits1 = s_waits1 + nsends;
2555     r_waits2 = r_waits1 + nrecvs;
2556     r_waits3 = r_waits2 + nrecvs;
2557   } else {
2558     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2559 
2560     r_waits3 = s_waits3 + nsends;
2561   }
2562 
2563   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2564   if (reuse == MAT_INITIAL_MATRIX) {
2565     /* get new tags to keep the communication clean */
2566     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2567     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2568     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2569 
2570     /* post receives of other's nzlocal */
2571     for (i=0; i<nrecvs; i++) {
2572       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2573     }
2574     /* send nzlocal to others */
2575     for (i=0; i<nsends; i++) {
2576       sbuf_nz[i] = nzlocal;
2577       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2578     }
2579     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2580     count = nrecvs;
2581     while (count) {
2582       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2583 
2584       recv_rank[imdex] = recv_status.MPI_SOURCE;
2585       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2586       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2587 
2588       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2589 
2590       rbuf_nz[imdex] += i + 2;
2591 
2592       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2593       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2594       count--;
2595     }
2596     /* wait on sends of nzlocal */
2597     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2598     /* send mat->i,j to others, and recv from other's */
2599     /*------------------------------------------------*/
2600     for (i=0; i<nsends; i++) {
2601       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2602       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2603     }
2604     /* wait on receives of mat->i,j */
2605     /*------------------------------*/
2606     count = nrecvs;
2607     while (count) {
2608       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2609       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2610       count--;
2611     }
2612     /* wait on sends of mat->i,j */
2613     /*---------------------------*/
2614     if (nsends) {
2615       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2616     }
2617   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2618 
2619   /* post receives, send and receive mat->a */
2620   /*----------------------------------------*/
2621   for (imdex=0; imdex<nrecvs; imdex++) {
2622     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2623   }
2624   for (i=0; i<nsends; i++) {
2625     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2626   }
2627   count = nrecvs;
2628   while (count) {
2629     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2630     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2631     count--;
2632   }
2633   if (nsends) {
2634     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2635   }
2636 
2637   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2638 
2639   /* create redundant matrix */
2640   /*-------------------------*/
2641   if (reuse == MAT_INITIAL_MATRIX) {
2642     const PetscInt *range;
2643     PetscInt       rstart_sub,rend_sub,mloc_sub;
2644 
2645     /* compute rownz_max for preallocation */
2646     for (imdex=0; imdex<nrecvs; imdex++) {
2647       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2648       rptr = rbuf_j[imdex];
2649       for (i=0; i<j; i++) {
2650         ncols = rptr[i+1] - rptr[i];
2651         if (rownz_max < ncols) rownz_max = ncols;
2652       }
2653     }
2654 
2655     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2656 
2657     /* get local size of redundant matrix
2658        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2659     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2660     rstart_sub = range[nsubcomm*subrank];
2661     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2662       rend_sub = range[nsubcomm*(subrank+1)];
2663     } else {
2664       rend_sub = mat->rmap->N;
2665     }
2666     mloc_sub = rend_sub - rstart_sub;
2667 
2668     if (M == N) {
2669       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2670     } else { /* non-square matrix */
2671       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2672     }
2673     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2674     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2675     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2676     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2677   } else {
2678     C = *matredundant;
2679   }
2680 
2681   /* insert local matrix entries */
2682   rptr = sbuf_j;
2683   cols = sbuf_j + rend-rstart + 1;
2684   vals = sbuf_a;
2685   for (i=0; i<rend-rstart; i++) {
2686     row   = i + rstart;
2687     ncols = rptr[i+1] - rptr[i];
2688     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2689     vals += ncols;
2690     cols += ncols;
2691   }
2692   /* insert received matrix entries */
2693   for (imdex=0; imdex<nrecvs; imdex++) {
2694     rstart = rowrange[recv_rank[imdex]];
2695     rend   = rowrange[recv_rank[imdex]+1];
2696     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2697     rptr   = rbuf_j[imdex];
2698     cols   = rbuf_j[imdex] + rend-rstart + 1;
2699     vals   = rbuf_a[imdex];
2700     for (i=0; i<rend-rstart; i++) {
2701       row   = i + rstart;
2702       ncols = rptr[i+1] - rptr[i];
2703       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2704       vals += ncols;
2705       cols += ncols;
2706     }
2707   }
2708   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2709   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2710 
2711   if (reuse == MAT_INITIAL_MATRIX) {
2712     *matredundant = C;
2713 
2714     /* create a supporting struct and attach it to C for reuse */
2715     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2716     if (subsize == 1) {
2717       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2718       c->redundant = redund;
2719     } else {
2720       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2721       c->redundant = redund;
2722     }
2723 
2724     redund->nzlocal   = nzlocal;
2725     redund->nsends    = nsends;
2726     redund->nrecvs    = nrecvs;
2727     redund->send_rank = send_rank;
2728     redund->recv_rank = recv_rank;
2729     redund->sbuf_nz   = sbuf_nz;
2730     redund->rbuf_nz   = rbuf_nz;
2731     redund->sbuf_j    = sbuf_j;
2732     redund->sbuf_a    = sbuf_a;
2733     redund->rbuf_j    = rbuf_j;
2734     redund->rbuf_a    = rbuf_a;
2735     redund->psubcomm  = NULL;
2736   }
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2742 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2743 {
2744   PetscErrorCode ierr;
2745   MPI_Comm       comm;
2746   PetscMPIInt    size,subsize;
2747   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2748   Mat_Redundant  *redund=NULL;
2749   PetscSubcomm   psubcomm=NULL;
2750   MPI_Comm       subcomm_in=subcomm;
2751   Mat            *matseq;
2752   IS             isrow,iscol;
2753 
2754   PetscFunctionBegin;
2755   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2756     if (reuse ==  MAT_INITIAL_MATRIX) {
2757       /* create psubcomm, then get subcomm */
2758       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2759       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2760       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2761 
2762       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2763       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2764       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2765       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2766       subcomm = psubcomm->comm;
2767     } else { /* retrieve psubcomm and subcomm */
2768       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2769       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2770       if (subsize == 1) {
2771         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2772         redund = c->redundant;
2773       } else {
2774         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2775         redund = c->redundant;
2776       }
2777       psubcomm = redund->psubcomm;
2778     }
2779     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2780       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2781       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2782         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2783         if (subsize == 1) {
2784           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2785           c->redundant->psubcomm = psubcomm;
2786         } else {
2787           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2788           c->redundant->psubcomm = psubcomm ;
2789         }
2790       }
2791       PetscFunctionReturn(0);
2792     }
2793   }
2794 
2795   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2796   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2797   if (reuse == MAT_INITIAL_MATRIX) {
2798     /* create a local sequential matrix matseq[0] */
2799     mloc_sub = PETSC_DECIDE;
2800     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2801     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2802     rstart = rend - mloc_sub;
2803     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2804     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2805   } else { /* reuse == MAT_REUSE_MATRIX */
2806     if (subsize == 1) {
2807       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2808       redund = c->redundant;
2809     } else {
2810       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2811       redund = c->redundant;
2812     }
2813 
2814     isrow  = redund->isrow;
2815     iscol  = redund->iscol;
2816     matseq = redund->matseq;
2817   }
2818   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2819   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2820 
2821   if (reuse == MAT_INITIAL_MATRIX) {
2822     /* create a supporting struct and attach it to C for reuse */
2823     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2824     if (subsize == 1) {
2825       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2826       c->redundant = redund;
2827     } else {
2828       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2829       c->redundant = redund;
2830     }
2831     redund->isrow    = isrow;
2832     redund->iscol    = iscol;
2833     redund->matseq   = matseq;
2834     redund->psubcomm = psubcomm;
2835   }
2836   PetscFunctionReturn(0);
2837 }
2838 
2839 #undef __FUNCT__
2840 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2841 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2842 {
2843   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2844   PetscErrorCode ierr;
2845   PetscInt       i,*idxb = 0;
2846   PetscScalar    *va,*vb;
2847   Vec            vtmp;
2848 
2849   PetscFunctionBegin;
2850   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2851   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2852   if (idx) {
2853     for (i=0; i<A->rmap->n; i++) {
2854       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2855     }
2856   }
2857 
2858   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2859   if (idx) {
2860     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2861   }
2862   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2863   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2864 
2865   for (i=0; i<A->rmap->n; i++) {
2866     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2867       va[i] = vb[i];
2868       if (idx) idx[i] = a->garray[idxb[i]];
2869     }
2870   }
2871 
2872   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2873   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2874   ierr = PetscFree(idxb);CHKERRQ(ierr);
2875   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2876   PetscFunctionReturn(0);
2877 }
2878 
2879 #undef __FUNCT__
2880 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2881 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2882 {
2883   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2884   PetscErrorCode ierr;
2885   PetscInt       i,*idxb = 0;
2886   PetscScalar    *va,*vb;
2887   Vec            vtmp;
2888 
2889   PetscFunctionBegin;
2890   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2891   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2892   if (idx) {
2893     for (i=0; i<A->cmap->n; i++) {
2894       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2895     }
2896   }
2897 
2898   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2899   if (idx) {
2900     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2901   }
2902   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2903   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2904 
2905   for (i=0; i<A->rmap->n; i++) {
2906     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2907       va[i] = vb[i];
2908       if (idx) idx[i] = a->garray[idxb[i]];
2909     }
2910   }
2911 
2912   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2913   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2914   ierr = PetscFree(idxb);CHKERRQ(ierr);
2915   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2916   PetscFunctionReturn(0);
2917 }
2918 
2919 #undef __FUNCT__
2920 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2921 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2922 {
2923   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2924   PetscInt       n      = A->rmap->n;
2925   PetscInt       cstart = A->cmap->rstart;
2926   PetscInt       *cmap  = mat->garray;
2927   PetscInt       *diagIdx, *offdiagIdx;
2928   Vec            diagV, offdiagV;
2929   PetscScalar    *a, *diagA, *offdiagA;
2930   PetscInt       r;
2931   PetscErrorCode ierr;
2932 
2933   PetscFunctionBegin;
2934   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2935   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2936   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2937   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2938   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2939   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2940   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2941   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2942   for (r = 0; r < n; ++r) {
2943     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2944       a[r]   = diagA[r];
2945       idx[r] = cstart + diagIdx[r];
2946     } else {
2947       a[r]   = offdiagA[r];
2948       idx[r] = cmap[offdiagIdx[r]];
2949     }
2950   }
2951   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2952   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2953   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2954   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2955   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2956   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2957   PetscFunctionReturn(0);
2958 }
2959 
2960 #undef __FUNCT__
2961 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2962 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2963 {
2964   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2965   PetscInt       n      = A->rmap->n;
2966   PetscInt       cstart = A->cmap->rstart;
2967   PetscInt       *cmap  = mat->garray;
2968   PetscInt       *diagIdx, *offdiagIdx;
2969   Vec            diagV, offdiagV;
2970   PetscScalar    *a, *diagA, *offdiagA;
2971   PetscInt       r;
2972   PetscErrorCode ierr;
2973 
2974   PetscFunctionBegin;
2975   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2976   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2977   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2978   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2979   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2980   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2981   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2982   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2983   for (r = 0; r < n; ++r) {
2984     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
2985       a[r]   = diagA[r];
2986       idx[r] = cstart + diagIdx[r];
2987     } else {
2988       a[r]   = offdiagA[r];
2989       idx[r] = cmap[offdiagIdx[r]];
2990     }
2991   }
2992   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2993   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2994   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2995   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2996   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2997   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2998   PetscFunctionReturn(0);
2999 }
3000 
3001 #undef __FUNCT__
3002 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3003 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3004 {
3005   PetscErrorCode ierr;
3006   Mat            *dummy;
3007 
3008   PetscFunctionBegin;
3009   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3010   *newmat = *dummy;
3011   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3012   PetscFunctionReturn(0);
3013 }
3014 
3015 #undef __FUNCT__
3016 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3017 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3018 {
3019   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3020   PetscErrorCode ierr;
3021 
3022   PetscFunctionBegin;
3023   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3024   PetscFunctionReturn(0);
3025 }
3026 
3027 #undef __FUNCT__
3028 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3029 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3030 {
3031   PetscErrorCode ierr;
3032   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3033 
3034   PetscFunctionBegin;
3035   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3036   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3037   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3038   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3039   PetscFunctionReturn(0);
3040 }
3041 
3042 /* -------------------------------------------------------------------*/
3043 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3044                                        MatGetRow_MPIAIJ,
3045                                        MatRestoreRow_MPIAIJ,
3046                                        MatMult_MPIAIJ,
3047                                 /* 4*/ MatMultAdd_MPIAIJ,
3048                                        MatMultTranspose_MPIAIJ,
3049                                        MatMultTransposeAdd_MPIAIJ,
3050 #if defined(PETSC_HAVE_PBGL)
3051                                        MatSolve_MPIAIJ,
3052 #else
3053                                        0,
3054 #endif
3055                                        0,
3056                                        0,
3057                                 /*10*/ 0,
3058                                        0,
3059                                        0,
3060                                        MatSOR_MPIAIJ,
3061                                        MatTranspose_MPIAIJ,
3062                                 /*15*/ MatGetInfo_MPIAIJ,
3063                                        MatEqual_MPIAIJ,
3064                                        MatGetDiagonal_MPIAIJ,
3065                                        MatDiagonalScale_MPIAIJ,
3066                                        MatNorm_MPIAIJ,
3067                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3068                                        MatAssemblyEnd_MPIAIJ,
3069                                        MatSetOption_MPIAIJ,
3070                                        MatZeroEntries_MPIAIJ,
3071                                 /*24*/ MatZeroRows_MPIAIJ,
3072                                        0,
3073 #if defined(PETSC_HAVE_PBGL)
3074                                        0,
3075 #else
3076                                        0,
3077 #endif
3078                                        0,
3079                                        0,
3080                                 /*29*/ MatSetUp_MPIAIJ,
3081 #if defined(PETSC_HAVE_PBGL)
3082                                        0,
3083 #else
3084                                        0,
3085 #endif
3086                                        0,
3087                                        0,
3088                                        0,
3089                                 /*34*/ MatDuplicate_MPIAIJ,
3090                                        0,
3091                                        0,
3092                                        0,
3093                                        0,
3094                                 /*39*/ MatAXPY_MPIAIJ,
3095                                        MatGetSubMatrices_MPIAIJ,
3096                                        MatIncreaseOverlap_MPIAIJ,
3097                                        MatGetValues_MPIAIJ,
3098                                        MatCopy_MPIAIJ,
3099                                 /*44*/ MatGetRowMax_MPIAIJ,
3100                                        MatScale_MPIAIJ,
3101                                        0,
3102                                        MatDiagonalSet_MPIAIJ,
3103                                        MatZeroRowsColumns_MPIAIJ,
3104                                 /*49*/ MatSetRandom_MPIAIJ,
3105                                        0,
3106                                        0,
3107                                        0,
3108                                        0,
3109                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3110                                        0,
3111                                        MatSetUnfactored_MPIAIJ,
3112                                        MatPermute_MPIAIJ,
3113                                        0,
3114                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3115                                        MatDestroy_MPIAIJ,
3116                                        MatView_MPIAIJ,
3117                                        0,
3118                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3119                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3120                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3121                                        0,
3122                                        0,
3123                                        0,
3124                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3125                                        MatGetRowMinAbs_MPIAIJ,
3126                                        0,
3127                                        MatSetColoring_MPIAIJ,
3128                                        0,
3129                                        MatSetValuesAdifor_MPIAIJ,
3130                                 /*75*/ MatFDColoringApply_AIJ,
3131                                        0,
3132                                        0,
3133                                        0,
3134                                        MatFindZeroDiagonals_MPIAIJ,
3135                                 /*80*/ 0,
3136                                        0,
3137                                        0,
3138                                 /*83*/ MatLoad_MPIAIJ,
3139                                        0,
3140                                        0,
3141                                        0,
3142                                        0,
3143                                        0,
3144                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3145                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3146                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3147                                        MatPtAP_MPIAIJ_MPIAIJ,
3148                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3149                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3150                                        0,
3151                                        0,
3152                                        0,
3153                                        0,
3154                                 /*99*/ 0,
3155                                        0,
3156                                        0,
3157                                        MatConjugate_MPIAIJ,
3158                                        0,
3159                                 /*104*/MatSetValuesRow_MPIAIJ,
3160                                        MatRealPart_MPIAIJ,
3161                                        MatImaginaryPart_MPIAIJ,
3162                                        0,
3163                                        0,
3164                                 /*109*/0,
3165                                        MatGetRedundantMatrix_MPIAIJ,
3166                                        MatGetRowMin_MPIAIJ,
3167                                        0,
3168                                        0,
3169                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3170                                        0,
3171                                        0,
3172                                        0,
3173                                        0,
3174                                 /*119*/0,
3175                                        0,
3176                                        0,
3177                                        0,
3178                                        MatGetMultiProcBlock_MPIAIJ,
3179                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3180                                        MatGetColumnNorms_MPIAIJ,
3181                                        MatInvertBlockDiagonal_MPIAIJ,
3182                                        0,
3183                                        MatGetSubMatricesParallel_MPIAIJ,
3184                                 /*129*/0,
3185                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3186                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3187                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3188                                        0,
3189                                 /*134*/0,
3190                                        0,
3191                                        0,
3192                                        0,
3193                                        0,
3194                                 /*139*/0,
3195                                        0,
3196                                        0,
3197                                        MatFDColoringSetUp_MPIXAIJ
3198 };
3199 
3200 /* ----------------------------------------------------------------------------------------*/
3201 
3202 #undef __FUNCT__
3203 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3204 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3205 {
3206   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3207   PetscErrorCode ierr;
3208 
3209   PetscFunctionBegin;
3210   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3211   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3212   PetscFunctionReturn(0);
3213 }
3214 
3215 #undef __FUNCT__
3216 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3217 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3218 {
3219   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3220   PetscErrorCode ierr;
3221 
3222   PetscFunctionBegin;
3223   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3224   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3225   PetscFunctionReturn(0);
3226 }
3227 
3228 #undef __FUNCT__
3229 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3230 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3231 {
3232   Mat_MPIAIJ     *b;
3233   PetscErrorCode ierr;
3234 
3235   PetscFunctionBegin;
3236   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3237   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3238   b = (Mat_MPIAIJ*)B->data;
3239 
3240   if (!B->preallocated) {
3241     /* Explicitly create 2 MATSEQAIJ matrices. */
3242     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3243     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3244     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3245     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3246     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3247     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3248     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3249     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3250     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3251     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3252   }
3253 
3254   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3255   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3256   B->preallocated = PETSC_TRUE;
3257   PetscFunctionReturn(0);
3258 }
3259 
3260 #undef __FUNCT__
3261 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3262 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3263 {
3264   Mat            mat;
3265   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3266   PetscErrorCode ierr;
3267 
3268   PetscFunctionBegin;
3269   *newmat = 0;
3270   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3271   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3272   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3273   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3274   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3275   a       = (Mat_MPIAIJ*)mat->data;
3276 
3277   mat->factortype   = matin->factortype;
3278   mat->assembled    = PETSC_TRUE;
3279   mat->insertmode   = NOT_SET_VALUES;
3280   mat->preallocated = PETSC_TRUE;
3281 
3282   a->size         = oldmat->size;
3283   a->rank         = oldmat->rank;
3284   a->donotstash   = oldmat->donotstash;
3285   a->roworiented  = oldmat->roworiented;
3286   a->rowindices   = 0;
3287   a->rowvalues    = 0;
3288   a->getrowactive = PETSC_FALSE;
3289 
3290   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3291   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3292 
3293   if (oldmat->colmap) {
3294 #if defined(PETSC_USE_CTABLE)
3295     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3296 #else
3297     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3298     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3299     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3300 #endif
3301   } else a->colmap = 0;
3302   if (oldmat->garray) {
3303     PetscInt len;
3304     len  = oldmat->B->cmap->n;
3305     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3306     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3307     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3308   } else a->garray = 0;
3309 
3310   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3311   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3312   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3313   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3314   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3315   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3316   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3317   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3318   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3319   *newmat = mat;
3320   PetscFunctionReturn(0);
3321 }
3322 
3323 
3324 
3325 #undef __FUNCT__
3326 #define __FUNCT__ "MatLoad_MPIAIJ"
3327 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3328 {
3329   PetscScalar    *vals,*svals;
3330   MPI_Comm       comm;
3331   PetscErrorCode ierr;
3332   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3333   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3334   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3335   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3336   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3337   int            fd;
3338   PetscInt       bs = 1;
3339 
3340   PetscFunctionBegin;
3341   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3342   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3343   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3344   if (!rank) {
3345     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3346     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3347     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3348   }
3349 
3350   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3351   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3352   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3353 
3354   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3355 
3356   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3357   M    = header[1]; N = header[2];
3358   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3359   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3360   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3361 
3362   /* If global sizes are set, check if they are consistent with that given in the file */
3363   if (sizesset) {
3364     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3365   }
3366   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3367   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3368 
3369   /* determine ownership of all (block) rows */
3370   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3371   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3372   else m = newMat->rmap->n; /* Set by user */
3373 
3374   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3375   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3376 
3377   /* First process needs enough room for process with most rows */
3378   if (!rank) {
3379     mmax = rowners[1];
3380     for (i=2; i<=size; i++) {
3381       mmax = PetscMax(mmax, rowners[i]);
3382     }
3383   } else mmax = -1;             /* unused, but compilers complain */
3384 
3385   rowners[0] = 0;
3386   for (i=2; i<=size; i++) {
3387     rowners[i] += rowners[i-1];
3388   }
3389   rstart = rowners[rank];
3390   rend   = rowners[rank+1];
3391 
3392   /* distribute row lengths to all processors */
3393   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3394   if (!rank) {
3395     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3396     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3397     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3398     for (j=0; j<m; j++) {
3399       procsnz[0] += ourlens[j];
3400     }
3401     for (i=1; i<size; i++) {
3402       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3403       /* calculate the number of nonzeros on each processor */
3404       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3405         procsnz[i] += rowlengths[j];
3406       }
3407       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3408     }
3409     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3410   } else {
3411     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3412   }
3413 
3414   if (!rank) {
3415     /* determine max buffer needed and allocate it */
3416     maxnz = 0;
3417     for (i=0; i<size; i++) {
3418       maxnz = PetscMax(maxnz,procsnz[i]);
3419     }
3420     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3421 
3422     /* read in my part of the matrix column indices  */
3423     nz   = procsnz[0];
3424     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3425     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3426 
3427     /* read in every one elses and ship off */
3428     for (i=1; i<size; i++) {
3429       nz   = procsnz[i];
3430       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3431       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3432     }
3433     ierr = PetscFree(cols);CHKERRQ(ierr);
3434   } else {
3435     /* determine buffer space needed for message */
3436     nz = 0;
3437     for (i=0; i<m; i++) {
3438       nz += ourlens[i];
3439     }
3440     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3441 
3442     /* receive message of column indices*/
3443     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3444   }
3445 
3446   /* determine column ownership if matrix is not square */
3447   if (N != M) {
3448     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3449     else n = newMat->cmap->n;
3450     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3451     cstart = cend - n;
3452   } else {
3453     cstart = rstart;
3454     cend   = rend;
3455     n      = cend - cstart;
3456   }
3457 
3458   /* loop over local rows, determining number of off diagonal entries */
3459   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3460   jj   = 0;
3461   for (i=0; i<m; i++) {
3462     for (j=0; j<ourlens[i]; j++) {
3463       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3464       jj++;
3465     }
3466   }
3467 
3468   for (i=0; i<m; i++) {
3469     ourlens[i] -= offlens[i];
3470   }
3471   if (!sizesset) {
3472     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3473   }
3474 
3475   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3476 
3477   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3478 
3479   for (i=0; i<m; i++) {
3480     ourlens[i] += offlens[i];
3481   }
3482 
3483   if (!rank) {
3484     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3485 
3486     /* read in my part of the matrix numerical values  */
3487     nz   = procsnz[0];
3488     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3489 
3490     /* insert into matrix */
3491     jj      = rstart;
3492     smycols = mycols;
3493     svals   = vals;
3494     for (i=0; i<m; i++) {
3495       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3496       smycols += ourlens[i];
3497       svals   += ourlens[i];
3498       jj++;
3499     }
3500 
3501     /* read in other processors and ship out */
3502     for (i=1; i<size; i++) {
3503       nz   = procsnz[i];
3504       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3505       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3506     }
3507     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3508   } else {
3509     /* receive numeric values */
3510     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3511 
3512     /* receive message of values*/
3513     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3514 
3515     /* insert into matrix */
3516     jj      = rstart;
3517     smycols = mycols;
3518     svals   = vals;
3519     for (i=0; i<m; i++) {
3520       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3521       smycols += ourlens[i];
3522       svals   += ourlens[i];
3523       jj++;
3524     }
3525   }
3526   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3527   ierr = PetscFree(vals);CHKERRQ(ierr);
3528   ierr = PetscFree(mycols);CHKERRQ(ierr);
3529   ierr = PetscFree(rowners);CHKERRQ(ierr);
3530   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3531   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3532   PetscFunctionReturn(0);
3533 }
3534 
3535 #undef __FUNCT__
3536 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3537 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3538 {
3539   PetscErrorCode ierr;
3540   IS             iscol_local;
3541   PetscInt       csize;
3542 
3543   PetscFunctionBegin;
3544   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3545   if (call == MAT_REUSE_MATRIX) {
3546     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3547     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3548   } else {
3549     PetscInt cbs;
3550     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3551     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3552     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3553   }
3554   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3555   if (call == MAT_INITIAL_MATRIX) {
3556     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3557     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3558   }
3559   PetscFunctionReturn(0);
3560 }
3561 
3562 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3563 #undef __FUNCT__
3564 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3565 /*
3566     Not great since it makes two copies of the submatrix, first an SeqAIJ
3567   in local and then by concatenating the local matrices the end result.
3568   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3569 
3570   Note: This requires a sequential iscol with all indices.
3571 */
3572 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3573 {
3574   PetscErrorCode ierr;
3575   PetscMPIInt    rank,size;
3576   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3577   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3578   PetscBool      allcolumns, colflag;
3579   Mat            M,Mreuse;
3580   MatScalar      *vwork,*aa;
3581   MPI_Comm       comm;
3582   Mat_SeqAIJ     *aij;
3583 
3584   PetscFunctionBegin;
3585   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3586   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3587   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3588 
3589   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3590   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3591   if (colflag && ncol == mat->cmap->N) {
3592     allcolumns = PETSC_TRUE;
3593   } else {
3594     allcolumns = PETSC_FALSE;
3595   }
3596   if (call ==  MAT_REUSE_MATRIX) {
3597     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3598     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3599     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3600   } else {
3601     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3602   }
3603 
3604   /*
3605       m - number of local rows
3606       n - number of columns (same on all processors)
3607       rstart - first row in new global matrix generated
3608   */
3609   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3610   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3611   if (call == MAT_INITIAL_MATRIX) {
3612     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3613     ii  = aij->i;
3614     jj  = aij->j;
3615 
3616     /*
3617         Determine the number of non-zeros in the diagonal and off-diagonal
3618         portions of the matrix in order to do correct preallocation
3619     */
3620 
3621     /* first get start and end of "diagonal" columns */
3622     if (csize == PETSC_DECIDE) {
3623       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3624       if (mglobal == n) { /* square matrix */
3625         nlocal = m;
3626       } else {
3627         nlocal = n/size + ((n % size) > rank);
3628       }
3629     } else {
3630       nlocal = csize;
3631     }
3632     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3633     rstart = rend - nlocal;
3634     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3635 
3636     /* next, compute all the lengths */
3637     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3638     olens = dlens + m;
3639     for (i=0; i<m; i++) {
3640       jend = ii[i+1] - ii[i];
3641       olen = 0;
3642       dlen = 0;
3643       for (j=0; j<jend; j++) {
3644         if (*jj < rstart || *jj >= rend) olen++;
3645         else dlen++;
3646         jj++;
3647       }
3648       olens[i] = olen;
3649       dlens[i] = dlen;
3650     }
3651     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3652     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3653     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3654     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3655     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3656     ierr = PetscFree(dlens);CHKERRQ(ierr);
3657   } else {
3658     PetscInt ml,nl;
3659 
3660     M    = *newmat;
3661     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3662     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3663     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3664     /*
3665          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3666        rather than the slower MatSetValues().
3667     */
3668     M->was_assembled = PETSC_TRUE;
3669     M->assembled     = PETSC_FALSE;
3670   }
3671   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3672   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3673   ii   = aij->i;
3674   jj   = aij->j;
3675   aa   = aij->a;
3676   for (i=0; i<m; i++) {
3677     row   = rstart + i;
3678     nz    = ii[i+1] - ii[i];
3679     cwork = jj;     jj += nz;
3680     vwork = aa;     aa += nz;
3681     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3682   }
3683 
3684   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3685   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3686   *newmat = M;
3687 
3688   /* save submatrix used in processor for next request */
3689   if (call ==  MAT_INITIAL_MATRIX) {
3690     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3691     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3692   }
3693   PetscFunctionReturn(0);
3694 }
3695 
3696 #undef __FUNCT__
3697 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3698 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3699 {
3700   PetscInt       m,cstart, cend,j,nnz,i,d;
3701   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3702   const PetscInt *JJ;
3703   PetscScalar    *values;
3704   PetscErrorCode ierr;
3705 
3706   PetscFunctionBegin;
3707   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3708 
3709   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3710   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3711   m      = B->rmap->n;
3712   cstart = B->cmap->rstart;
3713   cend   = B->cmap->rend;
3714   rstart = B->rmap->rstart;
3715 
3716   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3717 
3718 #if defined(PETSC_USE_DEBUGGING)
3719   for (i=0; i<m; i++) {
3720     nnz = Ii[i+1]- Ii[i];
3721     JJ  = J + Ii[i];
3722     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3723     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3724     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3725   }
3726 #endif
3727 
3728   for (i=0; i<m; i++) {
3729     nnz     = Ii[i+1]- Ii[i];
3730     JJ      = J + Ii[i];
3731     nnz_max = PetscMax(nnz_max,nnz);
3732     d       = 0;
3733     for (j=0; j<nnz; j++) {
3734       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3735     }
3736     d_nnz[i] = d;
3737     o_nnz[i] = nnz - d;
3738   }
3739   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3740   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3741 
3742   if (v) values = (PetscScalar*)v;
3743   else {
3744     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3745   }
3746 
3747   for (i=0; i<m; i++) {
3748     ii   = i + rstart;
3749     nnz  = Ii[i+1]- Ii[i];
3750     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3751   }
3752   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3753   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3754 
3755   if (!v) {
3756     ierr = PetscFree(values);CHKERRQ(ierr);
3757   }
3758   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3759   PetscFunctionReturn(0);
3760 }
3761 
3762 #undef __FUNCT__
3763 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3764 /*@
3765    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3766    (the default parallel PETSc format).
3767 
3768    Collective on MPI_Comm
3769 
3770    Input Parameters:
3771 +  B - the matrix
3772 .  i - the indices into j for the start of each local row (starts with zero)
3773 .  j - the column indices for each local row (starts with zero)
3774 -  v - optional values in the matrix
3775 
3776    Level: developer
3777 
3778    Notes:
3779        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3780      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3781      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3782 
3783        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3784 
3785        The format which is used for the sparse matrix input, is equivalent to a
3786     row-major ordering.. i.e for the following matrix, the input data expected is
3787     as shown:
3788 
3789         1 0 0
3790         2 0 3     P0
3791        -------
3792         4 5 6     P1
3793 
3794      Process0 [P0]: rows_owned=[0,1]
3795         i =  {0,1,3}  [size = nrow+1  = 2+1]
3796         j =  {0,0,2}  [size = nz = 6]
3797         v =  {1,2,3}  [size = nz = 6]
3798 
3799      Process1 [P1]: rows_owned=[2]
3800         i =  {0,3}    [size = nrow+1  = 1+1]
3801         j =  {0,1,2}  [size = nz = 6]
3802         v =  {4,5,6}  [size = nz = 6]
3803 
3804 .keywords: matrix, aij, compressed row, sparse, parallel
3805 
3806 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3807           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3808 @*/
3809 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3810 {
3811   PetscErrorCode ierr;
3812 
3813   PetscFunctionBegin;
3814   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3815   PetscFunctionReturn(0);
3816 }
3817 
3818 #undef __FUNCT__
3819 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3820 /*@C
3821    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3822    (the default parallel PETSc format).  For good matrix assembly performance
3823    the user should preallocate the matrix storage by setting the parameters
3824    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3825    performance can be increased by more than a factor of 50.
3826 
3827    Collective on MPI_Comm
3828 
3829    Input Parameters:
3830 +  B - the matrix
3831 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3832            (same value is used for all local rows)
3833 .  d_nnz - array containing the number of nonzeros in the various rows of the
3834            DIAGONAL portion of the local submatrix (possibly different for each row)
3835            or NULL, if d_nz is used to specify the nonzero structure.
3836            The size of this array is equal to the number of local rows, i.e 'm'.
3837            For matrices that will be factored, you must leave room for (and set)
3838            the diagonal entry even if it is zero.
3839 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3840            submatrix (same value is used for all local rows).
3841 -  o_nnz - array containing the number of nonzeros in the various rows of the
3842            OFF-DIAGONAL portion of the local submatrix (possibly different for
3843            each row) or NULL, if o_nz is used to specify the nonzero
3844            structure. The size of this array is equal to the number
3845            of local rows, i.e 'm'.
3846 
3847    If the *_nnz parameter is given then the *_nz parameter is ignored
3848 
3849    The AIJ format (also called the Yale sparse matrix format or
3850    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3851    storage.  The stored row and column indices begin with zero.
3852    See Users-Manual: ch_mat for details.
3853 
3854    The parallel matrix is partitioned such that the first m0 rows belong to
3855    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3856    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3857 
3858    The DIAGONAL portion of the local submatrix of a processor can be defined
3859    as the submatrix which is obtained by extraction the part corresponding to
3860    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3861    first row that belongs to the processor, r2 is the last row belonging to
3862    the this processor, and c1-c2 is range of indices of the local part of a
3863    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3864    common case of a square matrix, the row and column ranges are the same and
3865    the DIAGONAL part is also square. The remaining portion of the local
3866    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3867 
3868    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3869 
3870    You can call MatGetInfo() to get information on how effective the preallocation was;
3871    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3872    You can also run with the option -info and look for messages with the string
3873    malloc in them to see if additional memory allocation was needed.
3874 
3875    Example usage:
3876 
3877    Consider the following 8x8 matrix with 34 non-zero values, that is
3878    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3879    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3880    as follows:
3881 
3882 .vb
3883             1  2  0  |  0  3  0  |  0  4
3884     Proc0   0  5  6  |  7  0  0  |  8  0
3885             9  0 10  | 11  0  0  | 12  0
3886     -------------------------------------
3887            13  0 14  | 15 16 17  |  0  0
3888     Proc1   0 18  0  | 19 20 21  |  0  0
3889             0  0  0  | 22 23  0  | 24  0
3890     -------------------------------------
3891     Proc2  25 26 27  |  0  0 28  | 29  0
3892            30  0  0  | 31 32 33  |  0 34
3893 .ve
3894 
3895    This can be represented as a collection of submatrices as:
3896 
3897 .vb
3898       A B C
3899       D E F
3900       G H I
3901 .ve
3902 
3903    Where the submatrices A,B,C are owned by proc0, D,E,F are
3904    owned by proc1, G,H,I are owned by proc2.
3905 
3906    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3907    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3908    The 'M','N' parameters are 8,8, and have the same values on all procs.
3909 
3910    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3911    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3912    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3913    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3914    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3915    matrix, ans [DF] as another SeqAIJ matrix.
3916 
3917    When d_nz, o_nz parameters are specified, d_nz storage elements are
3918    allocated for every row of the local diagonal submatrix, and o_nz
3919    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3920    One way to choose d_nz and o_nz is to use the max nonzerors per local
3921    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3922    In this case, the values of d_nz,o_nz are:
3923 .vb
3924      proc0 : dnz = 2, o_nz = 2
3925      proc1 : dnz = 3, o_nz = 2
3926      proc2 : dnz = 1, o_nz = 4
3927 .ve
3928    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3929    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3930    for proc3. i.e we are using 12+15+10=37 storage locations to store
3931    34 values.
3932 
3933    When d_nnz, o_nnz parameters are specified, the storage is specified
3934    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3935    In the above case the values for d_nnz,o_nnz are:
3936 .vb
3937      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3938      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3939      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3940 .ve
3941    Here the space allocated is sum of all the above values i.e 34, and
3942    hence pre-allocation is perfect.
3943 
3944    Level: intermediate
3945 
3946 .keywords: matrix, aij, compressed row, sparse, parallel
3947 
3948 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3949           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3950 @*/
3951 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3952 {
3953   PetscErrorCode ierr;
3954 
3955   PetscFunctionBegin;
3956   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3957   PetscValidType(B,1);
3958   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3959   PetscFunctionReturn(0);
3960 }
3961 
3962 #undef __FUNCT__
3963 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3964 /*@
3965      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3966          CSR format the local rows.
3967 
3968    Collective on MPI_Comm
3969 
3970    Input Parameters:
3971 +  comm - MPI communicator
3972 .  m - number of local rows (Cannot be PETSC_DECIDE)
3973 .  n - This value should be the same as the local size used in creating the
3974        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3975        calculated if N is given) For square matrices n is almost always m.
3976 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3977 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3978 .   i - row indices
3979 .   j - column indices
3980 -   a - matrix values
3981 
3982    Output Parameter:
3983 .   mat - the matrix
3984 
3985    Level: intermediate
3986 
3987    Notes:
3988        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3989      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3990      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3991 
3992        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3993 
3994        The format which is used for the sparse matrix input, is equivalent to a
3995     row-major ordering.. i.e for the following matrix, the input data expected is
3996     as shown:
3997 
3998         1 0 0
3999         2 0 3     P0
4000        -------
4001         4 5 6     P1
4002 
4003      Process0 [P0]: rows_owned=[0,1]
4004         i =  {0,1,3}  [size = nrow+1  = 2+1]
4005         j =  {0,0,2}  [size = nz = 6]
4006         v =  {1,2,3}  [size = nz = 6]
4007 
4008      Process1 [P1]: rows_owned=[2]
4009         i =  {0,3}    [size = nrow+1  = 1+1]
4010         j =  {0,1,2}  [size = nz = 6]
4011         v =  {4,5,6}  [size = nz = 6]
4012 
4013 .keywords: matrix, aij, compressed row, sparse, parallel
4014 
4015 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4016           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4017 @*/
4018 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4019 {
4020   PetscErrorCode ierr;
4021 
4022   PetscFunctionBegin;
4023   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4024   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4025   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4026   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4027   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4028   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4029   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4030   PetscFunctionReturn(0);
4031 }
4032 
4033 #undef __FUNCT__
4034 #define __FUNCT__ "MatCreateAIJ"
4035 /*@C
4036    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4037    (the default parallel PETSc format).  For good matrix assembly performance
4038    the user should preallocate the matrix storage by setting the parameters
4039    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4040    performance can be increased by more than a factor of 50.
4041 
4042    Collective on MPI_Comm
4043 
4044    Input Parameters:
4045 +  comm - MPI communicator
4046 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4047            This value should be the same as the local size used in creating the
4048            y vector for the matrix-vector product y = Ax.
4049 .  n - This value should be the same as the local size used in creating the
4050        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4051        calculated if N is given) For square matrices n is almost always m.
4052 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4053 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4054 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4055            (same value is used for all local rows)
4056 .  d_nnz - array containing the number of nonzeros in the various rows of the
4057            DIAGONAL portion of the local submatrix (possibly different for each row)
4058            or NULL, if d_nz is used to specify the nonzero structure.
4059            The size of this array is equal to the number of local rows, i.e 'm'.
4060 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4061            submatrix (same value is used for all local rows).
4062 -  o_nnz - array containing the number of nonzeros in the various rows of the
4063            OFF-DIAGONAL portion of the local submatrix (possibly different for
4064            each row) or NULL, if o_nz is used to specify the nonzero
4065            structure. The size of this array is equal to the number
4066            of local rows, i.e 'm'.
4067 
4068    Output Parameter:
4069 .  A - the matrix
4070 
4071    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4072    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4073    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4074 
4075    Notes:
4076    If the *_nnz parameter is given then the *_nz parameter is ignored
4077 
4078    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4079    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4080    storage requirements for this matrix.
4081 
4082    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4083    processor than it must be used on all processors that share the object for
4084    that argument.
4085 
4086    The user MUST specify either the local or global matrix dimensions
4087    (possibly both).
4088 
4089    The parallel matrix is partitioned across processors such that the
4090    first m0 rows belong to process 0, the next m1 rows belong to
4091    process 1, the next m2 rows belong to process 2 etc.. where
4092    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4093    values corresponding to [m x N] submatrix.
4094 
4095    The columns are logically partitioned with the n0 columns belonging
4096    to 0th partition, the next n1 columns belonging to the next
4097    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4098 
4099    The DIAGONAL portion of the local submatrix on any given processor
4100    is the submatrix corresponding to the rows and columns m,n
4101    corresponding to the given processor. i.e diagonal matrix on
4102    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4103    etc. The remaining portion of the local submatrix [m x (N-n)]
4104    constitute the OFF-DIAGONAL portion. The example below better
4105    illustrates this concept.
4106 
4107    For a square global matrix we define each processor's diagonal portion
4108    to be its local rows and the corresponding columns (a square submatrix);
4109    each processor's off-diagonal portion encompasses the remainder of the
4110    local matrix (a rectangular submatrix).
4111 
4112    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4113 
4114    When calling this routine with a single process communicator, a matrix of
4115    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4116    type of communicator, use the construction mechanism:
4117      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4118 
4119    By default, this format uses inodes (identical nodes) when possible.
4120    We search for consecutive rows with the same nonzero structure, thereby
4121    reusing matrix information to achieve increased efficiency.
4122 
4123    Options Database Keys:
4124 +  -mat_no_inode  - Do not use inodes
4125 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4126 -  -mat_aij_oneindex - Internally use indexing starting at 1
4127         rather than 0.  Note that when calling MatSetValues(),
4128         the user still MUST index entries starting at 0!
4129 
4130 
4131    Example usage:
4132 
4133    Consider the following 8x8 matrix with 34 non-zero values, that is
4134    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4135    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4136    as follows:
4137 
4138 .vb
4139             1  2  0  |  0  3  0  |  0  4
4140     Proc0   0  5  6  |  7  0  0  |  8  0
4141             9  0 10  | 11  0  0  | 12  0
4142     -------------------------------------
4143            13  0 14  | 15 16 17  |  0  0
4144     Proc1   0 18  0  | 19 20 21  |  0  0
4145             0  0  0  | 22 23  0  | 24  0
4146     -------------------------------------
4147     Proc2  25 26 27  |  0  0 28  | 29  0
4148            30  0  0  | 31 32 33  |  0 34
4149 .ve
4150 
4151    This can be represented as a collection of submatrices as:
4152 
4153 .vb
4154       A B C
4155       D E F
4156       G H I
4157 .ve
4158 
4159    Where the submatrices A,B,C are owned by proc0, D,E,F are
4160    owned by proc1, G,H,I are owned by proc2.
4161 
4162    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4163    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4164    The 'M','N' parameters are 8,8, and have the same values on all procs.
4165 
4166    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4167    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4168    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4169    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4170    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4171    matrix, ans [DF] as another SeqAIJ matrix.
4172 
4173    When d_nz, o_nz parameters are specified, d_nz storage elements are
4174    allocated for every row of the local diagonal submatrix, and o_nz
4175    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4176    One way to choose d_nz and o_nz is to use the max nonzerors per local
4177    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4178    In this case, the values of d_nz,o_nz are:
4179 .vb
4180      proc0 : dnz = 2, o_nz = 2
4181      proc1 : dnz = 3, o_nz = 2
4182      proc2 : dnz = 1, o_nz = 4
4183 .ve
4184    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4185    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4186    for proc3. i.e we are using 12+15+10=37 storage locations to store
4187    34 values.
4188 
4189    When d_nnz, o_nnz parameters are specified, the storage is specified
4190    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4191    In the above case the values for d_nnz,o_nnz are:
4192 .vb
4193      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4194      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4195      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4196 .ve
4197    Here the space allocated is sum of all the above values i.e 34, and
4198    hence pre-allocation is perfect.
4199 
4200    Level: intermediate
4201 
4202 .keywords: matrix, aij, compressed row, sparse, parallel
4203 
4204 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4205           MPIAIJ, MatCreateMPIAIJWithArrays()
4206 @*/
4207 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4208 {
4209   PetscErrorCode ierr;
4210   PetscMPIInt    size;
4211 
4212   PetscFunctionBegin;
4213   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4214   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4215   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4216   if (size > 1) {
4217     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4218     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4219   } else {
4220     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4221     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4222   }
4223   PetscFunctionReturn(0);
4224 }
4225 
4226 #undef __FUNCT__
4227 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4228 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4229 {
4230   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4231 
4232   PetscFunctionBegin;
4233   if (Ad)     *Ad     = a->A;
4234   if (Ao)     *Ao     = a->B;
4235   if (colmap) *colmap = a->garray;
4236   PetscFunctionReturn(0);
4237 }
4238 
4239 #undef __FUNCT__
4240 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4241 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4242 {
4243   PetscErrorCode ierr;
4244   PetscInt       i;
4245   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4246 
4247   PetscFunctionBegin;
4248   if (coloring->ctype == IS_COLORING_GLOBAL) {
4249     ISColoringValue *allcolors,*colors;
4250     ISColoring      ocoloring;
4251 
4252     /* set coloring for diagonal portion */
4253     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4254 
4255     /* set coloring for off-diagonal portion */
4256     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4257     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4258     for (i=0; i<a->B->cmap->n; i++) {
4259       colors[i] = allcolors[a->garray[i]];
4260     }
4261     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4262     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4263     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4264     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4265   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4266     ISColoringValue *colors;
4267     PetscInt        *larray;
4268     ISColoring      ocoloring;
4269 
4270     /* set coloring for diagonal portion */
4271     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4272     for (i=0; i<a->A->cmap->n; i++) {
4273       larray[i] = i + A->cmap->rstart;
4274     }
4275     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4276     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4277     for (i=0; i<a->A->cmap->n; i++) {
4278       colors[i] = coloring->colors[larray[i]];
4279     }
4280     ierr = PetscFree(larray);CHKERRQ(ierr);
4281     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4282     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4283     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4284 
4285     /* set coloring for off-diagonal portion */
4286     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4287     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4288     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4289     for (i=0; i<a->B->cmap->n; i++) {
4290       colors[i] = coloring->colors[larray[i]];
4291     }
4292     ierr = PetscFree(larray);CHKERRQ(ierr);
4293     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4294     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4295     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4296   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4297   PetscFunctionReturn(0);
4298 }
4299 
4300 #undef __FUNCT__
4301 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4302 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4303 {
4304   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4305   PetscErrorCode ierr;
4306 
4307   PetscFunctionBegin;
4308   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4309   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4310   PetscFunctionReturn(0);
4311 }
4312 
4313 #undef __FUNCT__
4314 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4315 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4316 {
4317   PetscErrorCode ierr;
4318   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4319   PetscInt       *indx;
4320 
4321   PetscFunctionBegin;
4322   /* This routine will ONLY return MPIAIJ type matrix */
4323   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4324   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4325   if (n == PETSC_DECIDE) {
4326     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4327   }
4328   /* Check sum(n) = N */
4329   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4330   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4331 
4332   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4333   rstart -= m;
4334 
4335   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4336   for (i=0; i<m; i++) {
4337     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4338     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4339     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4340   }
4341 
4342   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4343   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4344   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4345   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4346   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4347   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4348   PetscFunctionReturn(0);
4349 }
4350 
4351 #undef __FUNCT__
4352 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4353 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4354 {
4355   PetscErrorCode ierr;
4356   PetscInt       m,N,i,rstart,nnz,Ii;
4357   PetscInt       *indx;
4358   PetscScalar    *values;
4359 
4360   PetscFunctionBegin;
4361   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4362   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4363   for (i=0; i<m; i++) {
4364     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4365     Ii   = i + rstart;
4366     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4367     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4368   }
4369   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4370   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4371   PetscFunctionReturn(0);
4372 }
4373 
4374 #undef __FUNCT__
4375 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4376 /*@
4377       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4378                  matrices from each processor
4379 
4380     Collective on MPI_Comm
4381 
4382    Input Parameters:
4383 +    comm - the communicators the parallel matrix will live on
4384 .    inmat - the input sequential matrices
4385 .    n - number of local columns (or PETSC_DECIDE)
4386 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4387 
4388    Output Parameter:
4389 .    outmat - the parallel matrix generated
4390 
4391     Level: advanced
4392 
4393    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4394 
4395 @*/
4396 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4397 {
4398   PetscErrorCode ierr;
4399   PetscMPIInt    size;
4400 
4401   PetscFunctionBegin;
4402   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4403   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4404   if (size == 1) {
4405     if (scall == MAT_INITIAL_MATRIX) {
4406       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4407     } else {
4408       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4409     }
4410   } else {
4411     if (scall == MAT_INITIAL_MATRIX) {
4412       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4413     }
4414     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4415   }
4416   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4417   PetscFunctionReturn(0);
4418 }
4419 
4420 #undef __FUNCT__
4421 #define __FUNCT__ "MatFileSplit"
4422 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4423 {
4424   PetscErrorCode    ierr;
4425   PetscMPIInt       rank;
4426   PetscInt          m,N,i,rstart,nnz;
4427   size_t            len;
4428   const PetscInt    *indx;
4429   PetscViewer       out;
4430   char              *name;
4431   Mat               B;
4432   const PetscScalar *values;
4433 
4434   PetscFunctionBegin;
4435   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4436   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4437   /* Should this be the type of the diagonal block of A? */
4438   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4439   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4440   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4441   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4442   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4443   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4444   for (i=0; i<m; i++) {
4445     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4446     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4447     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4448   }
4449   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4450   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4451 
4452   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4453   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4454   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4455   sprintf(name,"%s.%d",outfile,rank);
4456   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4457   ierr = PetscFree(name);CHKERRQ(ierr);
4458   ierr = MatView(B,out);CHKERRQ(ierr);
4459   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4460   ierr = MatDestroy(&B);CHKERRQ(ierr);
4461   PetscFunctionReturn(0);
4462 }
4463 
4464 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4465 #undef __FUNCT__
4466 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4467 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4468 {
4469   PetscErrorCode      ierr;
4470   Mat_Merge_SeqsToMPI *merge;
4471   PetscContainer      container;
4472 
4473   PetscFunctionBegin;
4474   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4475   if (container) {
4476     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4477     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4478     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4479     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4480     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4481     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4482     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4483     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4484     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4485     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4486     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4487     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4488     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4489     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4490     ierr = PetscFree(merge);CHKERRQ(ierr);
4491     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4492   }
4493   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4494   PetscFunctionReturn(0);
4495 }
4496 
4497 #include <../src/mat/utils/freespace.h>
4498 #include <petscbt.h>
4499 
4500 #undef __FUNCT__
4501 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4502 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4503 {
4504   PetscErrorCode      ierr;
4505   MPI_Comm            comm;
4506   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4507   PetscMPIInt         size,rank,taga,*len_s;
4508   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4509   PetscInt            proc,m;
4510   PetscInt            **buf_ri,**buf_rj;
4511   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4512   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4513   MPI_Request         *s_waits,*r_waits;
4514   MPI_Status          *status;
4515   MatScalar           *aa=a->a;
4516   MatScalar           **abuf_r,*ba_i;
4517   Mat_Merge_SeqsToMPI *merge;
4518   PetscContainer      container;
4519 
4520   PetscFunctionBegin;
4521   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4522   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4523 
4524   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4525   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4526 
4527   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4528   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4529 
4530   bi     = merge->bi;
4531   bj     = merge->bj;
4532   buf_ri = merge->buf_ri;
4533   buf_rj = merge->buf_rj;
4534 
4535   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4536   owners = merge->rowmap->range;
4537   len_s  = merge->len_s;
4538 
4539   /* send and recv matrix values */
4540   /*-----------------------------*/
4541   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4542   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4543 
4544   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4545   for (proc=0,k=0; proc<size; proc++) {
4546     if (!len_s[proc]) continue;
4547     i    = owners[proc];
4548     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4549     k++;
4550   }
4551 
4552   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4553   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4554   ierr = PetscFree(status);CHKERRQ(ierr);
4555 
4556   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4557   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4558 
4559   /* insert mat values of mpimat */
4560   /*----------------------------*/
4561   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4562   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4563 
4564   for (k=0; k<merge->nrecv; k++) {
4565     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4566     nrows       = *(buf_ri_k[k]);
4567     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4568     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4569   }
4570 
4571   /* set values of ba */
4572   m = merge->rowmap->n;
4573   for (i=0; i<m; i++) {
4574     arow = owners[rank] + i;
4575     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4576     bnzi = bi[i+1] - bi[i];
4577     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4578 
4579     /* add local non-zero vals of this proc's seqmat into ba */
4580     anzi   = ai[arow+1] - ai[arow];
4581     aj     = a->j + ai[arow];
4582     aa     = a->a + ai[arow];
4583     nextaj = 0;
4584     for (j=0; nextaj<anzi; j++) {
4585       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4586         ba_i[j] += aa[nextaj++];
4587       }
4588     }
4589 
4590     /* add received vals into ba */
4591     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4592       /* i-th row */
4593       if (i == *nextrow[k]) {
4594         anzi   = *(nextai[k]+1) - *nextai[k];
4595         aj     = buf_rj[k] + *(nextai[k]);
4596         aa     = abuf_r[k] + *(nextai[k]);
4597         nextaj = 0;
4598         for (j=0; nextaj<anzi; j++) {
4599           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4600             ba_i[j] += aa[nextaj++];
4601           }
4602         }
4603         nextrow[k]++; nextai[k]++;
4604       }
4605     }
4606     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4607   }
4608   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4609   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4610 
4611   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4612   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4613   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4614   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4615   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4616   PetscFunctionReturn(0);
4617 }
4618 
4619 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4620 
4621 #undef __FUNCT__
4622 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4623 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4624 {
4625   PetscErrorCode      ierr;
4626   Mat                 B_mpi;
4627   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4628   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4629   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4630   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4631   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4632   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4633   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4634   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4635   MPI_Status          *status;
4636   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4637   PetscBT             lnkbt;
4638   Mat_Merge_SeqsToMPI *merge;
4639   PetscContainer      container;
4640 
4641   PetscFunctionBegin;
4642   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4643 
4644   /* make sure it is a PETSc comm */
4645   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4646   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4647   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4648 
4649   ierr = PetscNew(&merge);CHKERRQ(ierr);
4650   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4651 
4652   /* determine row ownership */
4653   /*---------------------------------------------------------*/
4654   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4655   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4656   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4657   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4658   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4659   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4660   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4661 
4662   m      = merge->rowmap->n;
4663   owners = merge->rowmap->range;
4664 
4665   /* determine the number of messages to send, their lengths */
4666   /*---------------------------------------------------------*/
4667   len_s = merge->len_s;
4668 
4669   len          = 0; /* length of buf_si[] */
4670   merge->nsend = 0;
4671   for (proc=0; proc<size; proc++) {
4672     len_si[proc] = 0;
4673     if (proc == rank) {
4674       len_s[proc] = 0;
4675     } else {
4676       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4677       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4678     }
4679     if (len_s[proc]) {
4680       merge->nsend++;
4681       nrows = 0;
4682       for (i=owners[proc]; i<owners[proc+1]; i++) {
4683         if (ai[i+1] > ai[i]) nrows++;
4684       }
4685       len_si[proc] = 2*(nrows+1);
4686       len         += len_si[proc];
4687     }
4688   }
4689 
4690   /* determine the number and length of messages to receive for ij-structure */
4691   /*-------------------------------------------------------------------------*/
4692   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4693   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4694 
4695   /* post the Irecv of j-structure */
4696   /*-------------------------------*/
4697   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4698   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4699 
4700   /* post the Isend of j-structure */
4701   /*--------------------------------*/
4702   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4703 
4704   for (proc=0, k=0; proc<size; proc++) {
4705     if (!len_s[proc]) continue;
4706     i    = owners[proc];
4707     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4708     k++;
4709   }
4710 
4711   /* receives and sends of j-structure are complete */
4712   /*------------------------------------------------*/
4713   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4714   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4715 
4716   /* send and recv i-structure */
4717   /*---------------------------*/
4718   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4719   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4720 
4721   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4722   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4723   for (proc=0,k=0; proc<size; proc++) {
4724     if (!len_s[proc]) continue;
4725     /* form outgoing message for i-structure:
4726          buf_si[0]:                 nrows to be sent
4727                [1:nrows]:           row index (global)
4728                [nrows+1:2*nrows+1]: i-structure index
4729     */
4730     /*-------------------------------------------*/
4731     nrows       = len_si[proc]/2 - 1;
4732     buf_si_i    = buf_si + nrows+1;
4733     buf_si[0]   = nrows;
4734     buf_si_i[0] = 0;
4735     nrows       = 0;
4736     for (i=owners[proc]; i<owners[proc+1]; i++) {
4737       anzi = ai[i+1] - ai[i];
4738       if (anzi) {
4739         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4740         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4741         nrows++;
4742       }
4743     }
4744     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4745     k++;
4746     buf_si += len_si[proc];
4747   }
4748 
4749   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4750   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4751 
4752   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4753   for (i=0; i<merge->nrecv; i++) {
4754     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4755   }
4756 
4757   ierr = PetscFree(len_si);CHKERRQ(ierr);
4758   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4759   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4760   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4761   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4762   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4763   ierr = PetscFree(status);CHKERRQ(ierr);
4764 
4765   /* compute a local seq matrix in each processor */
4766   /*----------------------------------------------*/
4767   /* allocate bi array and free space for accumulating nonzero column info */
4768   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4769   bi[0] = 0;
4770 
4771   /* create and initialize a linked list */
4772   nlnk = N+1;
4773   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4774 
4775   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4776   len  = ai[owners[rank+1]] - ai[owners[rank]];
4777   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4778 
4779   current_space = free_space;
4780 
4781   /* determine symbolic info for each local row */
4782   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4783 
4784   for (k=0; k<merge->nrecv; k++) {
4785     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4786     nrows       = *buf_ri_k[k];
4787     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4788     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4789   }
4790 
4791   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4792   len  = 0;
4793   for (i=0; i<m; i++) {
4794     bnzi = 0;
4795     /* add local non-zero cols of this proc's seqmat into lnk */
4796     arow  = owners[rank] + i;
4797     anzi  = ai[arow+1] - ai[arow];
4798     aj    = a->j + ai[arow];
4799     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4800     bnzi += nlnk;
4801     /* add received col data into lnk */
4802     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4803       if (i == *nextrow[k]) { /* i-th row */
4804         anzi  = *(nextai[k]+1) - *nextai[k];
4805         aj    = buf_rj[k] + *nextai[k];
4806         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4807         bnzi += nlnk;
4808         nextrow[k]++; nextai[k]++;
4809       }
4810     }
4811     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4812 
4813     /* if free space is not available, make more free space */
4814     if (current_space->local_remaining<bnzi) {
4815       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4816       nspacedouble++;
4817     }
4818     /* copy data into free space, then initialize lnk */
4819     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4820     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4821 
4822     current_space->array           += bnzi;
4823     current_space->local_used      += bnzi;
4824     current_space->local_remaining -= bnzi;
4825 
4826     bi[i+1] = bi[i] + bnzi;
4827   }
4828 
4829   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4830 
4831   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4832   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4833   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4834 
4835   /* create symbolic parallel matrix B_mpi */
4836   /*---------------------------------------*/
4837   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4838   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4839   if (n==PETSC_DECIDE) {
4840     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4841   } else {
4842     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4843   }
4844   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4845   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4846   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4847   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4848   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4849 
4850   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4851   B_mpi->assembled    = PETSC_FALSE;
4852   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4853   merge->bi           = bi;
4854   merge->bj           = bj;
4855   merge->buf_ri       = buf_ri;
4856   merge->buf_rj       = buf_rj;
4857   merge->coi          = NULL;
4858   merge->coj          = NULL;
4859   merge->owners_co    = NULL;
4860 
4861   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4862 
4863   /* attach the supporting struct to B_mpi for reuse */
4864   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4865   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4866   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4867   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4868   *mpimat = B_mpi;
4869 
4870   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4871   PetscFunctionReturn(0);
4872 }
4873 
4874 #undef __FUNCT__
4875 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4876 /*@C
4877       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4878                  matrices from each processor
4879 
4880     Collective on MPI_Comm
4881 
4882    Input Parameters:
4883 +    comm - the communicators the parallel matrix will live on
4884 .    seqmat - the input sequential matrices
4885 .    m - number of local rows (or PETSC_DECIDE)
4886 .    n - number of local columns (or PETSC_DECIDE)
4887 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4888 
4889    Output Parameter:
4890 .    mpimat - the parallel matrix generated
4891 
4892     Level: advanced
4893 
4894    Notes:
4895      The dimensions of the sequential matrix in each processor MUST be the same.
4896      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4897      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4898 @*/
4899 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4900 {
4901   PetscErrorCode ierr;
4902   PetscMPIInt    size;
4903 
4904   PetscFunctionBegin;
4905   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4906   if (size == 1) {
4907     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4908     if (scall == MAT_INITIAL_MATRIX) {
4909       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4910     } else {
4911       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4912     }
4913     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4914     PetscFunctionReturn(0);
4915   }
4916   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4917   if (scall == MAT_INITIAL_MATRIX) {
4918     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4919   }
4920   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4921   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4922   PetscFunctionReturn(0);
4923 }
4924 
4925 #undef __FUNCT__
4926 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4927 /*@
4928      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4929           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4930           with MatGetSize()
4931 
4932     Not Collective
4933 
4934    Input Parameters:
4935 +    A - the matrix
4936 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4937 
4938    Output Parameter:
4939 .    A_loc - the local sequential matrix generated
4940 
4941     Level: developer
4942 
4943 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4944 
4945 @*/
4946 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4947 {
4948   PetscErrorCode ierr;
4949   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4950   Mat_SeqAIJ     *mat,*a,*b;
4951   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4952   MatScalar      *aa,*ba,*cam;
4953   PetscScalar    *ca;
4954   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4955   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4956   PetscBool      match;
4957   MPI_Comm       comm;
4958   PetscMPIInt    size;
4959 
4960   PetscFunctionBegin;
4961   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4962   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4963   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
4964   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4965   if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0);
4966 
4967   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4968   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4969   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4970   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4971   aa = a->a; ba = b->a;
4972   if (scall == MAT_INITIAL_MATRIX) {
4973     if (size == 1) {
4974       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr);
4975       PetscFunctionReturn(0);
4976     }
4977 
4978     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4979     ci[0] = 0;
4980     for (i=0; i<am; i++) {
4981       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4982     }
4983     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4984     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4985     k    = 0;
4986     for (i=0; i<am; i++) {
4987       ncols_o = bi[i+1] - bi[i];
4988       ncols_d = ai[i+1] - ai[i];
4989       /* off-diagonal portion of A */
4990       for (jo=0; jo<ncols_o; jo++) {
4991         col = cmap[*bj];
4992         if (col >= cstart) break;
4993         cj[k]   = col; bj++;
4994         ca[k++] = *ba++;
4995       }
4996       /* diagonal portion of A */
4997       for (j=0; j<ncols_d; j++) {
4998         cj[k]   = cstart + *aj++;
4999         ca[k++] = *aa++;
5000       }
5001       /* off-diagonal portion of A */
5002       for (j=jo; j<ncols_o; j++) {
5003         cj[k]   = cmap[*bj++];
5004         ca[k++] = *ba++;
5005       }
5006     }
5007     /* put together the new matrix */
5008     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5009     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5010     /* Since these are PETSc arrays, change flags to free them as necessary. */
5011     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5012     mat->free_a  = PETSC_TRUE;
5013     mat->free_ij = PETSC_TRUE;
5014     mat->nonew   = 0;
5015   } else if (scall == MAT_REUSE_MATRIX) {
5016     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5017     ci = mat->i; cj = mat->j; cam = mat->a;
5018     for (i=0; i<am; i++) {
5019       /* off-diagonal portion of A */
5020       ncols_o = bi[i+1] - bi[i];
5021       for (jo=0; jo<ncols_o; jo++) {
5022         col = cmap[*bj];
5023         if (col >= cstart) break;
5024         *cam++ = *ba++; bj++;
5025       }
5026       /* diagonal portion of A */
5027       ncols_d = ai[i+1] - ai[i];
5028       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5029       /* off-diagonal portion of A */
5030       for (j=jo; j<ncols_o; j++) {
5031         *cam++ = *ba++; bj++;
5032       }
5033     }
5034   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5035   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5036   PetscFunctionReturn(0);
5037 }
5038 
5039 #undef __FUNCT__
5040 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5041 /*@C
5042      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5043 
5044     Not Collective
5045 
5046    Input Parameters:
5047 +    A - the matrix
5048 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5049 -    row, col - index sets of rows and columns to extract (or NULL)
5050 
5051    Output Parameter:
5052 .    A_loc - the local sequential matrix generated
5053 
5054     Level: developer
5055 
5056 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5057 
5058 @*/
5059 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5060 {
5061   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5062   PetscErrorCode ierr;
5063   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5064   IS             isrowa,iscola;
5065   Mat            *aloc;
5066   PetscBool      match;
5067 
5068   PetscFunctionBegin;
5069   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5070   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5071   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5072   if (!row) {
5073     start = A->rmap->rstart; end = A->rmap->rend;
5074     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5075   } else {
5076     isrowa = *row;
5077   }
5078   if (!col) {
5079     start = A->cmap->rstart;
5080     cmap  = a->garray;
5081     nzA   = a->A->cmap->n;
5082     nzB   = a->B->cmap->n;
5083     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5084     ncols = 0;
5085     for (i=0; i<nzB; i++) {
5086       if (cmap[i] < start) idx[ncols++] = cmap[i];
5087       else break;
5088     }
5089     imark = i;
5090     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5091     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5092     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5093   } else {
5094     iscola = *col;
5095   }
5096   if (scall != MAT_INITIAL_MATRIX) {
5097     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5098     aloc[0] = *A_loc;
5099   }
5100   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5101   *A_loc = aloc[0];
5102   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5103   if (!row) {
5104     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5105   }
5106   if (!col) {
5107     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5108   }
5109   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5110   PetscFunctionReturn(0);
5111 }
5112 
5113 #undef __FUNCT__
5114 #define __FUNCT__ "MatGetBrowsOfAcols"
5115 /*@C
5116     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5117 
5118     Collective on Mat
5119 
5120    Input Parameters:
5121 +    A,B - the matrices in mpiaij format
5122 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5123 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5124 
5125    Output Parameter:
5126 +    rowb, colb - index sets of rows and columns of B to extract
5127 -    B_seq - the sequential matrix generated
5128 
5129     Level: developer
5130 
5131 @*/
5132 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5133 {
5134   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5135   PetscErrorCode ierr;
5136   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5137   IS             isrowb,iscolb;
5138   Mat            *bseq=NULL;
5139 
5140   PetscFunctionBegin;
5141   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5142     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5143   }
5144   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5145 
5146   if (scall == MAT_INITIAL_MATRIX) {
5147     start = A->cmap->rstart;
5148     cmap  = a->garray;
5149     nzA   = a->A->cmap->n;
5150     nzB   = a->B->cmap->n;
5151     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5152     ncols = 0;
5153     for (i=0; i<nzB; i++) {  /* row < local row index */
5154       if (cmap[i] < start) idx[ncols++] = cmap[i];
5155       else break;
5156     }
5157     imark = i;
5158     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5159     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5160     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5161     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5162   } else {
5163     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5164     isrowb  = *rowb; iscolb = *colb;
5165     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5166     bseq[0] = *B_seq;
5167   }
5168   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5169   *B_seq = bseq[0];
5170   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5171   if (!rowb) {
5172     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5173   } else {
5174     *rowb = isrowb;
5175   }
5176   if (!colb) {
5177     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5178   } else {
5179     *colb = iscolb;
5180   }
5181   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5182   PetscFunctionReturn(0);
5183 }
5184 
5185 #undef __FUNCT__
5186 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5187 /*
5188     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5189     of the OFF-DIAGONAL portion of local A
5190 
5191     Collective on Mat
5192 
5193    Input Parameters:
5194 +    A,B - the matrices in mpiaij format
5195 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5196 
5197    Output Parameter:
5198 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5199 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5200 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5201 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5202 
5203     Level: developer
5204 
5205 */
5206 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5207 {
5208   VecScatter_MPI_General *gen_to,*gen_from;
5209   PetscErrorCode         ierr;
5210   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5211   Mat_SeqAIJ             *b_oth;
5212   VecScatter             ctx =a->Mvctx;
5213   MPI_Comm               comm;
5214   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5215   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5216   PetscScalar            *rvalues,*svalues;
5217   MatScalar              *b_otha,*bufa,*bufA;
5218   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5219   MPI_Request            *rwaits = NULL,*swaits = NULL;
5220   MPI_Status             *sstatus,rstatus;
5221   PetscMPIInt            jj,size;
5222   PetscInt               *cols,sbs,rbs;
5223   PetscScalar            *vals;
5224 
5225   PetscFunctionBegin;
5226   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5227   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
5228   if (size == 1) PetscFunctionReturn(0);
5229 
5230   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5231     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5232   }
5233   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5234   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5235 
5236   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5237   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5238   rvalues  = gen_from->values; /* holds the length of receiving row */
5239   svalues  = gen_to->values;   /* holds the length of sending row */
5240   nrecvs   = gen_from->n;
5241   nsends   = gen_to->n;
5242 
5243   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5244   srow    = gen_to->indices;    /* local row index to be sent */
5245   sstarts = gen_to->starts;
5246   sprocs  = gen_to->procs;
5247   sstatus = gen_to->sstatus;
5248   sbs     = gen_to->bs;
5249   rstarts = gen_from->starts;
5250   rprocs  = gen_from->procs;
5251   rbs     = gen_from->bs;
5252 
5253   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5254   if (scall == MAT_INITIAL_MATRIX) {
5255     /* i-array */
5256     /*---------*/
5257     /*  post receives */
5258     for (i=0; i<nrecvs; i++) {
5259       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5260       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5261       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5262     }
5263 
5264     /* pack the outgoing message */
5265     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5266 
5267     sstartsj[0] = 0;
5268     rstartsj[0] = 0;
5269     len         = 0; /* total length of j or a array to be sent */
5270     k           = 0;
5271     for (i=0; i<nsends; i++) {
5272       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5273       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5274       for (j=0; j<nrows; j++) {
5275         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5276         for (l=0; l<sbs; l++) {
5277           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5278 
5279           rowlen[j*sbs+l] = ncols;
5280 
5281           len += ncols;
5282           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5283         }
5284         k++;
5285       }
5286       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5287 
5288       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5289     }
5290     /* recvs and sends of i-array are completed */
5291     i = nrecvs;
5292     while (i--) {
5293       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5294     }
5295     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5296 
5297     /* allocate buffers for sending j and a arrays */
5298     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5299     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5300 
5301     /* create i-array of B_oth */
5302     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5303 
5304     b_othi[0] = 0;
5305     len       = 0; /* total length of j or a array to be received */
5306     k         = 0;
5307     for (i=0; i<nrecvs; i++) {
5308       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5309       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5310       for (j=0; j<nrows; j++) {
5311         b_othi[k+1] = b_othi[k] + rowlen[j];
5312         len        += rowlen[j]; k++;
5313       }
5314       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5315     }
5316 
5317     /* allocate space for j and a arrrays of B_oth */
5318     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5319     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5320 
5321     /* j-array */
5322     /*---------*/
5323     /*  post receives of j-array */
5324     for (i=0; i<nrecvs; i++) {
5325       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5326       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5327     }
5328 
5329     /* pack the outgoing message j-array */
5330     k = 0;
5331     for (i=0; i<nsends; i++) {
5332       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5333       bufJ  = bufj+sstartsj[i];
5334       for (j=0; j<nrows; j++) {
5335         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5336         for (ll=0; ll<sbs; ll++) {
5337           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5338           for (l=0; l<ncols; l++) {
5339             *bufJ++ = cols[l];
5340           }
5341           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5342         }
5343       }
5344       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5345     }
5346 
5347     /* recvs and sends of j-array are completed */
5348     i = nrecvs;
5349     while (i--) {
5350       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5351     }
5352     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5353   } else if (scall == MAT_REUSE_MATRIX) {
5354     sstartsj = *startsj_s;
5355     rstartsj = *startsj_r;
5356     bufa     = *bufa_ptr;
5357     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5358     b_otha   = b_oth->a;
5359   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5360 
5361   /* a-array */
5362   /*---------*/
5363   /*  post receives of a-array */
5364   for (i=0; i<nrecvs; i++) {
5365     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5366     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5367   }
5368 
5369   /* pack the outgoing message a-array */
5370   k = 0;
5371   for (i=0; i<nsends; i++) {
5372     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5373     bufA  = bufa+sstartsj[i];
5374     for (j=0; j<nrows; j++) {
5375       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5376       for (ll=0; ll<sbs; ll++) {
5377         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5378         for (l=0; l<ncols; l++) {
5379           *bufA++ = vals[l];
5380         }
5381         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5382       }
5383     }
5384     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5385   }
5386   /* recvs and sends of a-array are completed */
5387   i = nrecvs;
5388   while (i--) {
5389     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5390   }
5391   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5392   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5393 
5394   if (scall == MAT_INITIAL_MATRIX) {
5395     /* put together the new matrix */
5396     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5397 
5398     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5399     /* Since these are PETSc arrays, change flags to free them as necessary. */
5400     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5401     b_oth->free_a  = PETSC_TRUE;
5402     b_oth->free_ij = PETSC_TRUE;
5403     b_oth->nonew   = 0;
5404 
5405     ierr = PetscFree(bufj);CHKERRQ(ierr);
5406     if (!startsj_s || !bufa_ptr) {
5407       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5408       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5409     } else {
5410       *startsj_s = sstartsj;
5411       *startsj_r = rstartsj;
5412       *bufa_ptr  = bufa;
5413     }
5414   }
5415   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5416   PetscFunctionReturn(0);
5417 }
5418 
5419 #undef __FUNCT__
5420 #define __FUNCT__ "MatGetCommunicationStructs"
5421 /*@C
5422   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5423 
5424   Not Collective
5425 
5426   Input Parameters:
5427 . A - The matrix in mpiaij format
5428 
5429   Output Parameter:
5430 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5431 . colmap - A map from global column index to local index into lvec
5432 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5433 
5434   Level: developer
5435 
5436 @*/
5437 #if defined(PETSC_USE_CTABLE)
5438 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5439 #else
5440 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5441 #endif
5442 {
5443   Mat_MPIAIJ *a;
5444 
5445   PetscFunctionBegin;
5446   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5447   PetscValidPointer(lvec, 2);
5448   PetscValidPointer(colmap, 3);
5449   PetscValidPointer(multScatter, 4);
5450   a = (Mat_MPIAIJ*) A->data;
5451   if (lvec) *lvec = a->lvec;
5452   if (colmap) *colmap = a->colmap;
5453   if (multScatter) *multScatter = a->Mvctx;
5454   PetscFunctionReturn(0);
5455 }
5456 
5457 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5458 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5459 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5460 #if defined(PETSC_HAVE_ELEMENTAL)
5461 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5462 #endif
5463 
5464 #undef __FUNCT__
5465 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5466 /*
5467     Computes (B'*A')' since computing B*A directly is untenable
5468 
5469                n                       p                          p
5470         (              )       (              )         (                  )
5471       m (      A       )  *  n (       B      )   =   m (         C        )
5472         (              )       (              )         (                  )
5473 
5474 */
5475 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5476 {
5477   PetscErrorCode ierr;
5478   Mat            At,Bt,Ct;
5479 
5480   PetscFunctionBegin;
5481   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5482   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5483   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5484   ierr = MatDestroy(&At);CHKERRQ(ierr);
5485   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5486   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5487   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5488   PetscFunctionReturn(0);
5489 }
5490 
5491 #undef __FUNCT__
5492 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5493 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5494 {
5495   PetscErrorCode ierr;
5496   PetscInt       m=A->rmap->n,n=B->cmap->n;
5497   Mat            Cmat;
5498 
5499   PetscFunctionBegin;
5500   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5501   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5502   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5503   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5504   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5505   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5506   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5507   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5508 
5509   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5510 
5511   *C = Cmat;
5512   PetscFunctionReturn(0);
5513 }
5514 
5515 /* ----------------------------------------------------------------*/
5516 #undef __FUNCT__
5517 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5518 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5519 {
5520   PetscErrorCode ierr;
5521 
5522   PetscFunctionBegin;
5523   if (scall == MAT_INITIAL_MATRIX) {
5524     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5525     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5526     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5527   }
5528   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5529   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5530   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5531   PetscFunctionReturn(0);
5532 }
5533 
5534 #if defined(PETSC_HAVE_MUMPS)
5535 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5536 #endif
5537 #if defined(PETSC_HAVE_PASTIX)
5538 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5539 #endif
5540 #if defined(PETSC_HAVE_SUPERLU_DIST)
5541 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5542 #endif
5543 #if defined(PETSC_HAVE_CLIQUE)
5544 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5545 #endif
5546 
5547 /*MC
5548    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5549 
5550    Options Database Keys:
5551 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5552 
5553   Level: beginner
5554 
5555 .seealso: MatCreateAIJ()
5556 M*/
5557 
5558 #undef __FUNCT__
5559 #define __FUNCT__ "MatCreate_MPIAIJ"
5560 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5561 {
5562   Mat_MPIAIJ     *b;
5563   PetscErrorCode ierr;
5564   PetscMPIInt    size;
5565 
5566   PetscFunctionBegin;
5567   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5568 
5569   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5570   B->data       = (void*)b;
5571   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5572   B->assembled  = PETSC_FALSE;
5573   B->insertmode = NOT_SET_VALUES;
5574   b->size       = size;
5575 
5576   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5577 
5578   /* build cache for off array entries formed */
5579   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5580 
5581   b->donotstash  = PETSC_FALSE;
5582   b->colmap      = 0;
5583   b->garray      = 0;
5584   b->roworiented = PETSC_TRUE;
5585 
5586   /* stuff used for matrix vector multiply */
5587   b->lvec  = NULL;
5588   b->Mvctx = NULL;
5589 
5590   /* stuff for MatGetRow() */
5591   b->rowindices   = 0;
5592   b->rowvalues    = 0;
5593   b->getrowactive = PETSC_FALSE;
5594 
5595   /* flexible pointer used in CUSP/CUSPARSE classes */
5596   b->spptr = NULL;
5597 
5598 #if defined(PETSC_HAVE_MUMPS)
5599   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5600 #endif
5601 #if defined(PETSC_HAVE_PASTIX)
5602   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5603 #endif
5604 #if defined(PETSC_HAVE_SUPERLU_DIST)
5605   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5606 #endif
5607 #if defined(PETSC_HAVE_CLIQUE)
5608   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5609 #endif
5610   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5620 #if defined(PETSC_HAVE_ELEMENTAL)
5621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
5622 #endif
5623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5624   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5626   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5627   PetscFunctionReturn(0);
5628 }
5629 
5630 #undef __FUNCT__
5631 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5632 /*@C
5633      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5634          and "off-diagonal" part of the matrix in CSR format.
5635 
5636    Collective on MPI_Comm
5637 
5638    Input Parameters:
5639 +  comm - MPI communicator
5640 .  m - number of local rows (Cannot be PETSC_DECIDE)
5641 .  n - This value should be the same as the local size used in creating the
5642        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5643        calculated if N is given) For square matrices n is almost always m.
5644 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5645 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5646 .   i - row indices for "diagonal" portion of matrix
5647 .   j - column indices
5648 .   a - matrix values
5649 .   oi - row indices for "off-diagonal" portion of matrix
5650 .   oj - column indices
5651 -   oa - matrix values
5652 
5653    Output Parameter:
5654 .   mat - the matrix
5655 
5656    Level: advanced
5657 
5658    Notes:
5659        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5660        must free the arrays once the matrix has been destroyed and not before.
5661 
5662        The i and j indices are 0 based
5663 
5664        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5665 
5666        This sets local rows and cannot be used to set off-processor values.
5667 
5668        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5669        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5670        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5671        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5672        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5673        communication if it is known that only local entries will be set.
5674 
5675 .keywords: matrix, aij, compressed row, sparse, parallel
5676 
5677 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5678           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5679 C@*/
5680 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5681 {
5682   PetscErrorCode ierr;
5683   Mat_MPIAIJ     *maij;
5684 
5685   PetscFunctionBegin;
5686   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5687   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5688   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5689   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5690   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5691   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5692   maij = (Mat_MPIAIJ*) (*mat)->data;
5693 
5694   (*mat)->preallocated = PETSC_TRUE;
5695 
5696   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5697   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5698 
5699   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5700   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5701 
5702   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5703   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5704   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5705   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5706 
5707   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5708   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5709   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5710   PetscFunctionReturn(0);
5711 }
5712 
5713 /*
5714     Special version for direct calls from Fortran
5715 */
5716 #include <petsc-private/fortranimpl.h>
5717 
5718 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5719 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5720 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5721 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5722 #endif
5723 
5724 /* Change these macros so can be used in void function */
5725 #undef CHKERRQ
5726 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5727 #undef SETERRQ2
5728 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5729 #undef SETERRQ3
5730 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5731 #undef SETERRQ
5732 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5733 
5734 #undef __FUNCT__
5735 #define __FUNCT__ "matsetvaluesmpiaij_"
5736 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5737 {
5738   Mat            mat  = *mmat;
5739   PetscInt       m    = *mm, n = *mn;
5740   InsertMode     addv = *maddv;
5741   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5742   PetscScalar    value;
5743   PetscErrorCode ierr;
5744 
5745   MatCheckPreallocated(mat,1);
5746   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5747 
5748 #if defined(PETSC_USE_DEBUG)
5749   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5750 #endif
5751   {
5752     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5753     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5754     PetscBool roworiented = aij->roworiented;
5755 
5756     /* Some Variables required in the macro */
5757     Mat        A                 = aij->A;
5758     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5759     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5760     MatScalar  *aa               = a->a;
5761     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5762     Mat        B                 = aij->B;
5763     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5764     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5765     MatScalar  *ba               = b->a;
5766 
5767     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5768     PetscInt  nonew = a->nonew;
5769     MatScalar *ap1,*ap2;
5770 
5771     PetscFunctionBegin;
5772     for (i=0; i<m; i++) {
5773       if (im[i] < 0) continue;
5774 #if defined(PETSC_USE_DEBUG)
5775       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5776 #endif
5777       if (im[i] >= rstart && im[i] < rend) {
5778         row      = im[i] - rstart;
5779         lastcol1 = -1;
5780         rp1      = aj + ai[row];
5781         ap1      = aa + ai[row];
5782         rmax1    = aimax[row];
5783         nrow1    = ailen[row];
5784         low1     = 0;
5785         high1    = nrow1;
5786         lastcol2 = -1;
5787         rp2      = bj + bi[row];
5788         ap2      = ba + bi[row];
5789         rmax2    = bimax[row];
5790         nrow2    = bilen[row];
5791         low2     = 0;
5792         high2    = nrow2;
5793 
5794         for (j=0; j<n; j++) {
5795           if (roworiented) value = v[i*n+j];
5796           else value = v[i+j*m];
5797           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5798           if (in[j] >= cstart && in[j] < cend) {
5799             col = in[j] - cstart;
5800             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5801           } else if (in[j] < 0) continue;
5802 #if defined(PETSC_USE_DEBUG)
5803           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5804 #endif
5805           else {
5806             if (mat->was_assembled) {
5807               if (!aij->colmap) {
5808                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5809               }
5810 #if defined(PETSC_USE_CTABLE)
5811               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5812               col--;
5813 #else
5814               col = aij->colmap[in[j]] - 1;
5815 #endif
5816               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5817                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5818                 col  =  in[j];
5819                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5820                 B     = aij->B;
5821                 b     = (Mat_SeqAIJ*)B->data;
5822                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5823                 rp2   = bj + bi[row];
5824                 ap2   = ba + bi[row];
5825                 rmax2 = bimax[row];
5826                 nrow2 = bilen[row];
5827                 low2  = 0;
5828                 high2 = nrow2;
5829                 bm    = aij->B->rmap->n;
5830                 ba    = b->a;
5831               }
5832             } else col = in[j];
5833             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5834           }
5835         }
5836       } else if (!aij->donotstash) {
5837         if (roworiented) {
5838           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5839         } else {
5840           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5841         }
5842       }
5843     }
5844   }
5845   PetscFunctionReturnVoid();
5846 }
5847 
5848