xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision d77f618a8e7e0495ca7f234424fb3ee7ee5bf7e2)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
109 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
110 {
111   PetscErrorCode    ierr;
112   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
113 
114   PetscFunctionBegin;
115   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
116     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
117   } else {
118     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
119   }
120   PetscFunctionReturn(0);
121 }
122 
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
127 {
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
129   PetscErrorCode ierr;
130   PetscInt       i,rstart,nrows,*rows;
131 
132   PetscFunctionBegin;
133   *zrows = NULL;
134   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
135   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
136   for (i=0; i<nrows; i++) rows[i] += rstart;
137   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
144 {
145   PetscErrorCode ierr;
146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
147   PetscInt       i,n,*garray = aij->garray;
148   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
149   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
150   PetscReal      *work;
151 
152   PetscFunctionBegin;
153   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
154   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
155   if (type == NORM_2) {
156     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
157       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
158     }
159     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
160       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
161     }
162   } else if (type == NORM_1) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
168     }
169   } else if (type == NORM_INFINITY) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
175     }
176 
177   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
178   if (type == NORM_INFINITY) {
179     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
180   } else {
181     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
182   }
183   ierr = PetscFree(work);CHKERRQ(ierr);
184   if (type == NORM_2) {
185     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatDistribute_MPIAIJ"
192 /*
193     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
194     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
195 
196     Only for square matrices
197 
198     Used by a preconditioner, hence PETSC_EXTERN
199 */
200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
201 {
202   PetscMPIInt    rank,size;
203   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
204   PetscErrorCode ierr;
205   Mat            mat;
206   Mat_SeqAIJ     *gmata;
207   PetscMPIInt    tag;
208   MPI_Status     status;
209   PetscBool      aij;
210   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215   if (!rank) {
216     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
217     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
218   }
219   if (reuse == MAT_INITIAL_MATRIX) {
220     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
221     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
222     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
223     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
224     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
225     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
226     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
227     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
228     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
229 
230     rowners[0] = 0;
231     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
232     rstart = rowners[rank];
233     rend   = rowners[rank+1];
234     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
235     if (!rank) {
236       gmata = (Mat_SeqAIJ*) gmat->data;
237       /* send row lengths to all processors */
238       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
239       for (i=1; i<size; i++) {
240         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242       /* determine number diagonal and off-diagonal counts */
243       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
244       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
245       jj   = 0;
246       for (i=0; i<m; i++) {
247         for (j=0; j<dlens[i]; j++) {
248           if (gmata->j[jj] < rstart) ld[i]++;
249           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
250           jj++;
251         }
252       }
253       /* send column indices to other processes */
254       for (i=1; i<size; i++) {
255         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
256         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
257         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259 
260       /* send numerical values to other processes */
261       for (i=1; i<size; i++) {
262         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
263         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
264       }
265       gmataa = gmata->a;
266       gmataj = gmata->j;
267 
268     } else {
269       /* receive row lengths */
270       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
271       /* receive column indices */
272       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
273       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
275       /* determine number diagonal and off-diagonal counts */
276       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
277       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
278       jj   = 0;
279       for (i=0; i<m; i++) {
280         for (j=0; j<dlens[i]; j++) {
281           if (gmataj[jj] < rstart) ld[i]++;
282           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
283           jj++;
284         }
285       }
286       /* receive numerical values */
287       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
288       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
289     }
290     /* set preallocation */
291     for (i=0; i<m; i++) {
292       dlens[i] -= olens[i];
293     }
294     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
295     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
296 
297     for (i=0; i<m; i++) {
298       dlens[i] += olens[i];
299     }
300     cnt = 0;
301     for (i=0; i<m; i++) {
302       row  = rstart + i;
303       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
304       cnt += dlens[i];
305     }
306     if (rank) {
307       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
308     }
309     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
310     ierr = PetscFree(rowners);CHKERRQ(ierr);
311 
312     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
313 
314     *inmat = mat;
315   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
316     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
317     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
318     mat  = *inmat;
319     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
320     if (!rank) {
321       /* send numerical values to other processes */
322       gmata  = (Mat_SeqAIJ*) gmat->data;
323       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
324       gmataa = gmata->a;
325       for (i=1; i<size; i++) {
326         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
327         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
328       }
329       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
330     } else {
331       /* receive numerical values from process 0*/
332       nz   = Ad->nz + Ao->nz;
333       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
334       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
335     }
336     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
337     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
338     ad = Ad->a;
339     ao = Ao->a;
340     if (mat->rmap->n) {
341       i  = 0;
342       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
343       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
344     }
345     for (i=1; i<mat->rmap->n; i++) {
346       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
347       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
348     }
349     i--;
350     if (mat->rmap->n) {
351       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
352     }
353     if (rank) {
354       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
355     }
356   }
357   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to acess.
368 */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
372 {
373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
374   PetscErrorCode ierr;
375   PetscInt       n = aij->B->cmap->n,i;
376 
377   PetscFunctionBegin;
378   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
379 #if defined(PETSC_USE_CTABLE)
380   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
381   for (i=0; i<n; i++) {
382     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
383   }
384 #else
385   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
386   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
387   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
388 #endif
389   PetscFunctionReturn(0);
390 }
391 
392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
393 { \
394     if (col <= lastcol1)  low1 = 0;     \
395     else                 high1 = nrow1; \
396     lastcol1 = col;\
397     while (high1-low1 > 5) { \
398       t = (low1+high1)/2; \
399       if (rp1[t] > col) high1 = t; \
400       else              low1  = t; \
401     } \
402       for (_i=low1; _i<high1; _i++) { \
403         if (rp1[_i] > col) break; \
404         if (rp1[_i] == col) { \
405           if (addv == ADD_VALUES) ap1[_i] += value;   \
406           else                    ap1[_i] = value; \
407           goto a_noinsert; \
408         } \
409       }  \
410       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
411       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
412       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
413       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
414       N = nrow1++ - 1; a->nz++; high1++; \
415       /* shift up all the later entries in this row */ \
416       for (ii=N; ii>=_i; ii--) { \
417         rp1[ii+1] = rp1[ii]; \
418         ap1[ii+1] = ap1[ii]; \
419       } \
420       rp1[_i] = col;  \
421       ap1[_i] = value;  \
422       A->nonzerostate++;\
423       a_noinsert: ; \
424       ailen[row] = nrow1; \
425 }
426 
427 
428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
429   { \
430     if (col <= lastcol2) low2 = 0;                        \
431     else high2 = nrow2;                                   \
432     lastcol2 = col;                                       \
433     while (high2-low2 > 5) {                              \
434       t = (low2+high2)/2;                                 \
435       if (rp2[t] > col) high2 = t;                        \
436       else             low2  = t;                         \
437     }                                                     \
438     for (_i=low2; _i<high2; _i++) {                       \
439       if (rp2[_i] > col) break;                           \
440       if (rp2[_i] == col) {                               \
441         if (addv == ADD_VALUES) ap2[_i] += value;         \
442         else                    ap2[_i] = value;          \
443         goto b_noinsert;                                  \
444       }                                                   \
445     }                                                     \
446     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
447     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
448     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
449     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
450     N = nrow2++ - 1; b->nz++; high2++;                    \
451     /* shift up all the later entries in this row */      \
452     for (ii=N; ii>=_i; ii--) {                            \
453       rp2[ii+1] = rp2[ii];                                \
454       ap2[ii+1] = ap2[ii];                                \
455     }                                                     \
456     rp2[_i] = col;                                        \
457     ap2[_i] = value;                                      \
458     B->nonzerostate++;                                    \
459     b_noinsert: ;                                         \
460     bilen[row] = nrow2;                                   \
461   }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
466 {
467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
468   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
469   PetscErrorCode ierr;
470   PetscInt       l,*garray = mat->garray,diag;
471 
472   PetscFunctionBegin;
473   /* code only works for square matrices A */
474 
475   /* find size of row to the left of the diagonal part */
476   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
477   row  = row - diag;
478   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
479     if (garray[b->j[b->i[row]+l]] > diag) break;
480   }
481   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
482 
483   /* diagonal part */
484   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
485 
486   /* right of diagonal part */
487   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetValues_MPIAIJ"
493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
494 {
495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
496   PetscScalar    value;
497   PetscErrorCode ierr;
498   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
499   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
500   PetscBool      roworiented = aij->roworiented;
501 
502   /* Some Variables required in the macro */
503   Mat        A                 = aij->A;
504   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
505   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
506   MatScalar  *aa               = a->a;
507   PetscBool  ignorezeroentries = a->ignorezeroentries;
508   Mat        B                 = aij->B;
509   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
510   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
511   MatScalar  *ba               = b->a;
512 
513   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
514   PetscInt  nonew;
515   MatScalar *ap1,*ap2;
516 
517   PetscFunctionBegin;
518   for (i=0; i<m; i++) {
519     if (im[i] < 0) continue;
520 #if defined(PETSC_USE_DEBUG)
521     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
522 #endif
523     if (im[i] >= rstart && im[i] < rend) {
524       row      = im[i] - rstart;
525       lastcol1 = -1;
526       rp1      = aj + ai[row];
527       ap1      = aa + ai[row];
528       rmax1    = aimax[row];
529       nrow1    = ailen[row];
530       low1     = 0;
531       high1    = nrow1;
532       lastcol2 = -1;
533       rp2      = bj + bi[row];
534       ap2      = ba + bi[row];
535       rmax2    = bimax[row];
536       nrow2    = bilen[row];
537       low2     = 0;
538       high2    = nrow2;
539 
540       for (j=0; j<n; j++) {
541         if (roworiented) value = v[i*n+j];
542         else             value = v[i+j*m];
543         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
544         if (in[j] >= cstart && in[j] < cend) {
545           col   = in[j] - cstart;
546           nonew = a->nonew;
547           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
548         } else if (in[j] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
551 #endif
552         else {
553           if (mat->was_assembled) {
554             if (!aij->colmap) {
555               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
556             }
557 #if defined(PETSC_USE_CTABLE)
558             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
559             col--;
560 #else
561             col = aij->colmap[in[j]] - 1;
562 #endif
563             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
564               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
565               col  =  in[j];
566               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
567               B     = aij->B;
568               b     = (Mat_SeqAIJ*)B->data;
569               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
570               rp2   = bj + bi[row];
571               ap2   = ba + bi[row];
572               rmax2 = bimax[row];
573               nrow2 = bilen[row];
574               low2  = 0;
575               high2 = nrow2;
576               bm    = aij->B->rmap->n;
577               ba    = b->a;
578             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
579           } else col = in[j];
580           nonew = b->nonew;
581           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
582         }
583       }
584     } else {
585       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
586       if (!aij->donotstash) {
587         mat->assembled = PETSC_FALSE;
588         if (roworiented) {
589           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
590         } else {
591           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
592         }
593       }
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatGetValues_MPIAIJ"
601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
602 {
603   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
604   PetscErrorCode ierr;
605   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
607 
608   PetscFunctionBegin;
609   for (i=0; i<m; i++) {
610     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
611     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
612     if (idxm[i] >= rstart && idxm[i] < rend) {
613       row = idxm[i] - rstart;
614       for (j=0; j<n; j++) {
615         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
616         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
617         if (idxn[j] >= cstart && idxn[j] < cend) {
618           col  = idxn[j] - cstart;
619           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620         } else {
621           if (!aij->colmap) {
622             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
623           }
624 #if defined(PETSC_USE_CTABLE)
625           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
626           col--;
627 #else
628           col = aij->colmap[idxn[j]] - 1;
629 #endif
630           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
631           else {
632             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   /* make sure all processors are either in INSERTMODE or ADDMODE */
656   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
658   mat->insertmode = addv; /* in case this processor had no cache */
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
672   PetscErrorCode ierr;
673   PetscMPIInt    n;
674   PetscInt       i,j,rstart,ncols,flg;
675   PetscInt       *row,*col;
676   PetscBool      other_disassembled;
677   PetscScalar    *val;
678   InsertMode     addv = mat->insertmode;
679 
680   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
681 
682   PetscFunctionBegin;
683   if (!aij->donotstash && !mat->nooffprocentries) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       if (!flg) break;
687 
688       for (i=0; i<n; ) {
689         /* Now identify the consecutive vals belonging to the same row */
690         for (j=i,rstart=row[j]; j<n; j++) {
691           if (row[j] != rstart) break;
692         }
693         if (j < n) ncols = j-i;
694         else       ncols = n-i;
695         /* Now assemble all these values with a single function call */
696         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
697 
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702   }
703   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
704   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
705 
706   /* determine if any processor has disassembled, if so we must
707      also disassemble ourselfs, in order that we may reassemble. */
708   /*
709      if nonzero structure of submatrix B cannot change then we know that
710      no processor disassembled thus we can skip this stuff
711   */
712   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
713     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714     if (mat->was_assembled && !other_disassembled) {
715       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
716     }
717   }
718   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
719     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
720   }
721   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
722   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
723   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
724 
725   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
726 
727   aij->rowvalues = 0;
728 
729   /* used by MatAXPY() */
730   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
731   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
732 
733   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
734   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
735 
736   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
737   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
738     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
739     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
747 {
748   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
753   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatZeroRows_MPIAIJ"
759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
760 {
761   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
762   PetscInt      *owners = A->rmap->range;
763   PetscInt       n      = A->rmap->n;
764   PetscSF        sf;
765   PetscInt      *lrows;
766   PetscSFNode   *rrows;
767   PetscInt       r, p = 0, len = 0;
768   PetscErrorCode ierr;
769 
770   PetscFunctionBegin;
771   /* Create SF where leaves are input rows and roots are owned rows */
772   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
773   for (r = 0; r < n; ++r) lrows[r] = -1;
774   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
775   for (r = 0; r < N; ++r) {
776     const PetscInt idx   = rows[r];
777     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
778     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
779       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
780     }
781     if (A->nooffproczerorows) {
782       if (p != mat->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,mat->rank);
783       lrows[len++] = idx - owners[p];
784     } else {
785       rrows[r].rank = p;
786       rrows[r].index = rows[r] - owners[p];
787     }
788   }
789   if (!A->nooffproczerorows) {
790     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
791     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
792     /* Collect flags for rows to be zeroed */
793     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
794     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt*)rows, lrows, MPI_LOR);CHKERRQ(ierr);
795     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
796     /* Compress and put in row numbers */
797     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
798   }
799   /* fix right hand side if needed */
800   if (x && b) {
801     const PetscScalar *xx;
802     PetscScalar       *bb;
803 
804     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
805     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
806     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
807     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
808     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
809   }
810   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
811   ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
812   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
813     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
814   } else if (diag != 0.0) {
815     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
817     for (r = 0; r < len; ++r) {
818       const PetscInt row = lrows[r] + A->rmap->rstart;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
822     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827 
828   /* only change matrix nonzero state if pattern was allowed to be changed */
829   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
830     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
831     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,p = 0,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x) {
888     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
891     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
892   }
893   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
894   /* remove zeroed rows of off diagonal matrix */
895   ii = aij->i;
896   for (i=0; i<len; i++) {
897     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
898   }
899   /* loop over all elements of off process part of matrix zeroing removed columns*/
900   if (aij->compressedrow.use) {
901     m    = aij->compressedrow.nrows;
902     ii   = aij->compressedrow.i;
903     ridx = aij->compressedrow.rindex;
904     for (i=0; i<m; i++) {
905       n  = ii[i+1] - ii[i];
906       aj = aij->j + ii[i];
907       aa = aij->a + ii[i];
908 
909       for (j=0; j<n; j++) {
910         if (PetscAbsScalar(mask[*aj])) {
911           if (b) bb[*ridx] -= *aa*xx[*aj];
912           *aa = 0.0;
913         }
914         aa++;
915         aj++;
916       }
917       ridx++;
918     }
919   } else { /* do not use compressed row format */
920     m = l->B->rmap->n;
921     for (i=0; i<m; i++) {
922       n  = ii[i+1] - ii[i];
923       aj = aij->j + ii[i];
924       aa = aij->a + ii[i];
925       for (j=0; j<n; j++) {
926         if (PetscAbsScalar(mask[*aj])) {
927           if (b) bb[i] -= *aa*xx[*aj];
928           *aa = 0.0;
929         }
930         aa++;
931         aj++;
932       }
933     }
934   }
935   if (x) {
936     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
937     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
938   }
939   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
940   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
941   ierr = PetscFree(lrows);CHKERRQ(ierr);
942 
943   /* only change matrix nonzero state if pattern was allowed to be changed */
944   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
945     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
946     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
947   }
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMult_MPIAIJ"
953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
954 {
955   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
956   PetscErrorCode ierr;
957   PetscInt       nt;
958 
959   PetscFunctionBegin;
960   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
961   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
962   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
963   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
972 {
973   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatMultAdd_MPIAIJ"
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 #undef __FUNCT__
997 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
999 {
1000   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1001   PetscErrorCode ierr;
1002   PetscBool      merged;
1003 
1004   PetscFunctionBegin;
1005   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1006   /* do nondiagonal part */
1007   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1008   if (!merged) {
1009     /* send it on its way */
1010     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1011     /* do local part */
1012     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1013     /* receive remote parts: note this assumes the values are not actually */
1014     /* added in yy until the next line, */
1015     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1016   } else {
1017     /* do local part */
1018     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1019     /* send it on its way */
1020     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1021     /* values actually were received in the Begin() but we need to call this nop */
1022     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1023   }
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1029 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1030 {
1031   MPI_Comm       comm;
1032   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1033   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1034   IS             Me,Notme;
1035   PetscErrorCode ierr;
1036   PetscInt       M,N,first,last,*notme,i;
1037   PetscMPIInt    size;
1038 
1039   PetscFunctionBegin;
1040   /* Easy test: symmetric diagonal block */
1041   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1042   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1043   if (!*f) PetscFunctionReturn(0);
1044   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1046   if (size == 1) PetscFunctionReturn(0);
1047 
1048   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1049   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1050   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1051   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1052   for (i=0; i<first; i++) notme[i] = i;
1053   for (i=last; i<M; i++) notme[i-last+first] = i;
1054   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1055   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1056   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1057   Aoff = Aoffs[0];
1058   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1059   Boff = Boffs[0];
1060   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1061   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1062   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1063   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1064   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1065   ierr = PetscFree(notme);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1074   PetscErrorCode ierr;
1075 
1076   PetscFunctionBegin;
1077   /* do nondiagonal part */
1078   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1079   /* send it on its way */
1080   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1081   /* do local part */
1082   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* receive remote parts */
1084   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 /*
1089   This only works correctly for square matrices where the subblock A->A is the
1090    diagonal block
1091 */
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1095 {
1096   PetscErrorCode ierr;
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098 
1099   PetscFunctionBegin;
1100   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1101   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1102   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatScale_MPIAIJ"
1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1109 {
1110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1111   PetscErrorCode ierr;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1115   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatDestroy_Redundant"
1121 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1122 {
1123   PetscErrorCode ierr;
1124   Mat_Redundant  *redund = *redundant;
1125   PetscInt       i;
1126 
1127   PetscFunctionBegin;
1128   *redundant = NULL;
1129   if (redund){
1130     if (redund->matseq) { /* via MatGetSubMatrices()  */
1131       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1132       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1133       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1134       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1135     } else {
1136       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1137       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1138       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1139       for (i=0; i<redund->nrecvs; i++) {
1140         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1141         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1142       }
1143       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1144     }
1145 
1146     if (redund->psubcomm) {
1147       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1148     }
1149     ierr = PetscFree(redund);CHKERRQ(ierr);
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatDestroy_MPIAIJ"
1156 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1157 {
1158   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1159   PetscErrorCode ierr;
1160 
1161   PetscFunctionBegin;
1162 #if defined(PETSC_USE_LOG)
1163   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1164 #endif
1165   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1166   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1167   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1168   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1169   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1170 #if defined(PETSC_USE_CTABLE)
1171   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1172 #else
1173   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1174 #endif
1175   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1176   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1177   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1178   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1179   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1180   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1181 
1182   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1183   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1184   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1185   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1186   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1187   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1188   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1189   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1190   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1196 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1197 {
1198   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1199   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1200   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1201   PetscErrorCode ierr;
1202   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1203   int            fd;
1204   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1205   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1206   PetscScalar    *column_values;
1207   PetscInt       message_count,flowcontrolcount;
1208   FILE           *file;
1209 
1210   PetscFunctionBegin;
1211   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1212   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1213   nz   = A->nz + B->nz;
1214   if (!rank) {
1215     header[0] = MAT_FILE_CLASSID;
1216     header[1] = mat->rmap->N;
1217     header[2] = mat->cmap->N;
1218 
1219     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1220     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1221     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     /* get largest number of rows any processor has */
1223     rlen  = mat->rmap->n;
1224     range = mat->rmap->range;
1225     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1226   } else {
1227     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228     rlen = mat->rmap->n;
1229   }
1230 
1231   /* load up the local row counts */
1232   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1233   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1234 
1235   /* store the row lengths to the file */
1236   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1237   if (!rank) {
1238     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1239     for (i=1; i<size; i++) {
1240       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1241       rlen = range[i+1] - range[i];
1242       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1243       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1244     }
1245     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1246   } else {
1247     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1248     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1249     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1250   }
1251   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1252 
1253   /* load up the local column indices */
1254   nzmax = nz; /* th processor needs space a largest processor needs */
1255   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1256   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1257   cnt   = 0;
1258   for (i=0; i<mat->rmap->n; i++) {
1259     for (j=B->i[i]; j<B->i[i+1]; j++) {
1260       if ((col = garray[B->j[j]]) > cstart) break;
1261       column_indices[cnt++] = col;
1262     }
1263     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1264     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1265   }
1266   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1267 
1268   /* store the column indices to the file */
1269   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1270   if (!rank) {
1271     MPI_Status status;
1272     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1273     for (i=1; i<size; i++) {
1274       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1275       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1276       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1277       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1278       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1279     }
1280     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1281   } else {
1282     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1283     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1284     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1285     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1286   }
1287   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1288 
1289   /* load up the local column values */
1290   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1291   cnt  = 0;
1292   for (i=0; i<mat->rmap->n; i++) {
1293     for (j=B->i[i]; j<B->i[i+1]; j++) {
1294       if (garray[B->j[j]] > cstart) break;
1295       column_values[cnt++] = B->a[j];
1296     }
1297     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1298     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1299   }
1300   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1301 
1302   /* store the column values to the file */
1303   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1304   if (!rank) {
1305     MPI_Status status;
1306     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1307     for (i=1; i<size; i++) {
1308       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1309       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1310       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1311       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1312       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1313     }
1314     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1315   } else {
1316     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1317     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1318     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1319     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1320   }
1321   ierr = PetscFree(column_values);CHKERRQ(ierr);
1322 
1323   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1324   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #include <petscdraw.h>
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1331 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1332 {
1333   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1334   PetscErrorCode    ierr;
1335   PetscMPIInt       rank = aij->rank,size = aij->size;
1336   PetscBool         isdraw,iascii,isbinary;
1337   PetscViewer       sviewer;
1338   PetscViewerFormat format;
1339 
1340   PetscFunctionBegin;
1341   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1342   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1343   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1344   if (iascii) {
1345     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1346     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1347       MatInfo   info;
1348       PetscBool inodes;
1349 
1350       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1351       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1352       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1353       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1354       if (!inodes) {
1355         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1356                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1357       } else {
1358         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1359                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1360       }
1361       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1362       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1363       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1365       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1366       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1367       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1368       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1369       PetscFunctionReturn(0);
1370     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1371       PetscInt inodecount,inodelimit,*inodes;
1372       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1373       if (inodes) {
1374         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1375       } else {
1376         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1377       }
1378       PetscFunctionReturn(0);
1379     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1380       PetscFunctionReturn(0);
1381     }
1382   } else if (isbinary) {
1383     if (size == 1) {
1384       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1385       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1386     } else {
1387       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1388     }
1389     PetscFunctionReturn(0);
1390   } else if (isdraw) {
1391     PetscDraw draw;
1392     PetscBool isnull;
1393     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1394     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1395   }
1396 
1397   {
1398     /* assemble the entire matrix onto first processor. */
1399     Mat        A;
1400     Mat_SeqAIJ *Aloc;
1401     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1402     MatScalar  *a;
1403 
1404     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1405     if (!rank) {
1406       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1407     } else {
1408       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1409     }
1410     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1411     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1412     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1413     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1414     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1415 
1416     /* copy over the A part */
1417     Aloc = (Mat_SeqAIJ*)aij->A->data;
1418     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1419     row  = mat->rmap->rstart;
1420     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1421     for (i=0; i<m; i++) {
1422       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1423       row++;
1424       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1425     }
1426     aj = Aloc->j;
1427     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1428 
1429     /* copy over the B part */
1430     Aloc = (Mat_SeqAIJ*)aij->B->data;
1431     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1432     row  = mat->rmap->rstart;
1433     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1434     ct   = cols;
1435     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1436     for (i=0; i<m; i++) {
1437       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1438       row++;
1439       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1440     }
1441     ierr = PetscFree(ct);CHKERRQ(ierr);
1442     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1443     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1444     /*
1445        Everyone has to call to draw the matrix since the graphics waits are
1446        synchronized across all processors that share the PetscDraw object
1447     */
1448     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1449     if (!rank) {
1450       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1451     }
1452     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1453     ierr = MatDestroy(&A);CHKERRQ(ierr);
1454   }
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatView_MPIAIJ"
1460 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1461 {
1462   PetscErrorCode ierr;
1463   PetscBool      iascii,isdraw,issocket,isbinary;
1464 
1465   PetscFunctionBegin;
1466   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1467   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1468   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1469   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1470   if (iascii || isdraw || isbinary || issocket) {
1471     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1472   }
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatSOR_MPIAIJ"
1478 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1479 {
1480   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1481   PetscErrorCode ierr;
1482   Vec            bb1 = 0;
1483   PetscBool      hasop;
1484 
1485   PetscFunctionBegin;
1486   if (flag == SOR_APPLY_UPPER) {
1487     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1488     PetscFunctionReturn(0);
1489   }
1490 
1491   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1492     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1493   }
1494 
1495   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1496     if (flag & SOR_ZERO_INITIAL_GUESS) {
1497       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1498       its--;
1499     }
1500 
1501     while (its--) {
1502       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1503       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1504 
1505       /* update rhs: bb1 = bb - B*x */
1506       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1507       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1508 
1509       /* local sweep */
1510       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1511     }
1512   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1513     if (flag & SOR_ZERO_INITIAL_GUESS) {
1514       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1515       its--;
1516     }
1517     while (its--) {
1518       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1519       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520 
1521       /* update rhs: bb1 = bb - B*x */
1522       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1523       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1524 
1525       /* local sweep */
1526       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1527     }
1528   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1529     if (flag & SOR_ZERO_INITIAL_GUESS) {
1530       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1531       its--;
1532     }
1533     while (its--) {
1534       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536 
1537       /* update rhs: bb1 = bb - B*x */
1538       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1539       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1540 
1541       /* local sweep */
1542       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1543     }
1544   } else if (flag & SOR_EISENSTAT) {
1545     Vec xx1;
1546 
1547     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1548     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1549 
1550     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1551     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1552     if (!mat->diag) {
1553       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1554       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1555     }
1556     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1557     if (hasop) {
1558       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1559     } else {
1560       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1561     }
1562     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1563 
1564     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1565 
1566     /* local sweep */
1567     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1568     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1569     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1570   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1571 
1572   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatPermute_MPIAIJ"
1578 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1579 {
1580   Mat            aA,aB,Aperm;
1581   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1582   PetscScalar    *aa,*ba;
1583   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1584   PetscSF        rowsf,sf;
1585   IS             parcolp = NULL;
1586   PetscBool      done;
1587   PetscErrorCode ierr;
1588 
1589   PetscFunctionBegin;
1590   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1591   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1592   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1593   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1594 
1595   /* Invert row permutation to find out where my rows should go */
1596   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1597   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1598   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1599   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1600   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1601   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1602 
1603   /* Invert column permutation to find out where my columns should go */
1604   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1605   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1606   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1607   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1608   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1609   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1610   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1611 
1612   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1613   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1614   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1615 
1616   /* Find out where my gcols should go */
1617   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1618   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1619   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1620   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1621   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1622   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1623   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1624   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1625 
1626   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1627   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1628   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1629   for (i=0; i<m; i++) {
1630     PetscInt row = rdest[i],rowner;
1631     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1632     for (j=ai[i]; j<ai[i+1]; j++) {
1633       PetscInt cowner,col = cdest[aj[j]];
1634       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1635       if (rowner == cowner) dnnz[i]++;
1636       else onnz[i]++;
1637     }
1638     for (j=bi[i]; j<bi[i+1]; j++) {
1639       PetscInt cowner,col = gcdest[bj[j]];
1640       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1641       if (rowner == cowner) dnnz[i]++;
1642       else onnz[i]++;
1643     }
1644   }
1645   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1646   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1647   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1648   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1649   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1650 
1651   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1652   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1653   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1654   for (i=0; i<m; i++) {
1655     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1656     PetscInt j0,rowlen;
1657     rowlen = ai[i+1] - ai[i];
1658     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1659       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1660       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1661     }
1662     rowlen = bi[i+1] - bi[i];
1663     for (j0=j=0; j<rowlen; j0=j) {
1664       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1665       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1666     }
1667   }
1668   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1671   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1672   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1673   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1674   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1675   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1676   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1677   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1678   *B = Aperm;
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1684 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1685 {
1686   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1687   Mat            A    = mat->A,B = mat->B;
1688   PetscErrorCode ierr;
1689   PetscReal      isend[5],irecv[5];
1690 
1691   PetscFunctionBegin;
1692   info->block_size = 1.0;
1693   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1694 
1695   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1696   isend[3] = info->memory;  isend[4] = info->mallocs;
1697 
1698   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1699 
1700   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1701   isend[3] += info->memory;  isend[4] += info->mallocs;
1702   if (flag == MAT_LOCAL) {
1703     info->nz_used      = isend[0];
1704     info->nz_allocated = isend[1];
1705     info->nz_unneeded  = isend[2];
1706     info->memory       = isend[3];
1707     info->mallocs      = isend[4];
1708   } else if (flag == MAT_GLOBAL_MAX) {
1709     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1710 
1711     info->nz_used      = irecv[0];
1712     info->nz_allocated = irecv[1];
1713     info->nz_unneeded  = irecv[2];
1714     info->memory       = irecv[3];
1715     info->mallocs      = irecv[4];
1716   } else if (flag == MAT_GLOBAL_SUM) {
1717     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1718 
1719     info->nz_used      = irecv[0];
1720     info->nz_allocated = irecv[1];
1721     info->nz_unneeded  = irecv[2];
1722     info->memory       = irecv[3];
1723     info->mallocs      = irecv[4];
1724   }
1725   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1726   info->fill_ratio_needed = 0;
1727   info->factor_mallocs    = 0;
1728   PetscFunctionReturn(0);
1729 }
1730 
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatSetOption_MPIAIJ"
1733 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1734 {
1735   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1736   PetscErrorCode ierr;
1737 
1738   PetscFunctionBegin;
1739   switch (op) {
1740   case MAT_NEW_NONZERO_LOCATIONS:
1741   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1742   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1743   case MAT_KEEP_NONZERO_PATTERN:
1744   case MAT_NEW_NONZERO_LOCATION_ERR:
1745   case MAT_USE_INODES:
1746   case MAT_IGNORE_ZERO_ENTRIES:
1747     MatCheckPreallocated(A,1);
1748     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1749     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1750     break;
1751   case MAT_ROW_ORIENTED:
1752     a->roworiented = flg;
1753 
1754     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1755     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1756     break;
1757   case MAT_NEW_DIAGONALS:
1758     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1759     break;
1760   case MAT_IGNORE_OFF_PROC_ENTRIES:
1761     a->donotstash = flg;
1762     break;
1763   case MAT_SPD:
1764     A->spd_set = PETSC_TRUE;
1765     A->spd     = flg;
1766     if (flg) {
1767       A->symmetric                  = PETSC_TRUE;
1768       A->structurally_symmetric     = PETSC_TRUE;
1769       A->symmetric_set              = PETSC_TRUE;
1770       A->structurally_symmetric_set = PETSC_TRUE;
1771     }
1772     break;
1773   case MAT_SYMMETRIC:
1774     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1775     break;
1776   case MAT_STRUCTURALLY_SYMMETRIC:
1777     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1778     break;
1779   case MAT_HERMITIAN:
1780     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1781     break;
1782   case MAT_SYMMETRY_ETERNAL:
1783     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1784     break;
1785   default:
1786     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1787   }
1788   PetscFunctionReturn(0);
1789 }
1790 
1791 #undef __FUNCT__
1792 #define __FUNCT__ "MatGetRow_MPIAIJ"
1793 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1794 {
1795   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1796   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1797   PetscErrorCode ierr;
1798   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1799   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1800   PetscInt       *cmap,*idx_p;
1801 
1802   PetscFunctionBegin;
1803   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1804   mat->getrowactive = PETSC_TRUE;
1805 
1806   if (!mat->rowvalues && (idx || v)) {
1807     /*
1808         allocate enough space to hold information from the longest row.
1809     */
1810     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1811     PetscInt   max = 1,tmp;
1812     for (i=0; i<matin->rmap->n; i++) {
1813       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1814       if (max < tmp) max = tmp;
1815     }
1816     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1817   }
1818 
1819   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1820   lrow = row - rstart;
1821 
1822   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1823   if (!v)   {pvA = 0; pvB = 0;}
1824   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1825   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1826   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1827   nztot = nzA + nzB;
1828 
1829   cmap = mat->garray;
1830   if (v  || idx) {
1831     if (nztot) {
1832       /* Sort by increasing column numbers, assuming A and B already sorted */
1833       PetscInt imark = -1;
1834       if (v) {
1835         *v = v_p = mat->rowvalues;
1836         for (i=0; i<nzB; i++) {
1837           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1838           else break;
1839         }
1840         imark = i;
1841         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1842         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1843       }
1844       if (idx) {
1845         *idx = idx_p = mat->rowindices;
1846         if (imark > -1) {
1847           for (i=0; i<imark; i++) {
1848             idx_p[i] = cmap[cworkB[i]];
1849           }
1850         } else {
1851           for (i=0; i<nzB; i++) {
1852             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1853             else break;
1854           }
1855           imark = i;
1856         }
1857         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1858         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1859       }
1860     } else {
1861       if (idx) *idx = 0;
1862       if (v)   *v   = 0;
1863     }
1864   }
1865   *nz  = nztot;
1866   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1867   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1873 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1874 {
1875   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1876 
1877   PetscFunctionBegin;
1878   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1879   aij->getrowactive = PETSC_FALSE;
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatNorm_MPIAIJ"
1885 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1886 {
1887   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1888   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1889   PetscErrorCode ierr;
1890   PetscInt       i,j,cstart = mat->cmap->rstart;
1891   PetscReal      sum = 0.0;
1892   MatScalar      *v;
1893 
1894   PetscFunctionBegin;
1895   if (aij->size == 1) {
1896     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1897   } else {
1898     if (type == NORM_FROBENIUS) {
1899       v = amat->a;
1900       for (i=0; i<amat->nz; i++) {
1901         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1902       }
1903       v = bmat->a;
1904       for (i=0; i<bmat->nz; i++) {
1905         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1906       }
1907       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1908       *norm = PetscSqrtReal(*norm);
1909     } else if (type == NORM_1) { /* max column norm */
1910       PetscReal *tmp,*tmp2;
1911       PetscInt  *jj,*garray = aij->garray;
1912       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1913       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1914       *norm = 0.0;
1915       v     = amat->a; jj = amat->j;
1916       for (j=0; j<amat->nz; j++) {
1917         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1918       }
1919       v = bmat->a; jj = bmat->j;
1920       for (j=0; j<bmat->nz; j++) {
1921         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1922       }
1923       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1924       for (j=0; j<mat->cmap->N; j++) {
1925         if (tmp2[j] > *norm) *norm = tmp2[j];
1926       }
1927       ierr = PetscFree(tmp);CHKERRQ(ierr);
1928       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1929     } else if (type == NORM_INFINITY) { /* max row norm */
1930       PetscReal ntemp = 0.0;
1931       for (j=0; j<aij->A->rmap->n; j++) {
1932         v   = amat->a + amat->i[j];
1933         sum = 0.0;
1934         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1935           sum += PetscAbsScalar(*v); v++;
1936         }
1937         v = bmat->a + bmat->i[j];
1938         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1939           sum += PetscAbsScalar(*v); v++;
1940         }
1941         if (sum > ntemp) ntemp = sum;
1942       }
1943       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1944     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 #undef __FUNCT__
1950 #define __FUNCT__ "MatTranspose_MPIAIJ"
1951 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1952 {
1953   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1954   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1955   PetscErrorCode ierr;
1956   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1957   PetscInt       cstart = A->cmap->rstart,ncol;
1958   Mat            B;
1959   MatScalar      *array;
1960 
1961   PetscFunctionBegin;
1962   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1963 
1964   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1965   ai = Aloc->i; aj = Aloc->j;
1966   bi = Bloc->i; bj = Bloc->j;
1967   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1968     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1969     PetscSFNode          *oloc;
1970     PETSC_UNUSED PetscSF sf;
1971 
1972     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1973     /* compute d_nnz for preallocation */
1974     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1975     for (i=0; i<ai[ma]; i++) {
1976       d_nnz[aj[i]]++;
1977       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1978     }
1979     /* compute local off-diagonal contributions */
1980     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1981     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1982     /* map those to global */
1983     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1984     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1985     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1986     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1987     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1988     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1989     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1990 
1991     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1992     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1993     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1994     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1995     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1996     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1997   } else {
1998     B    = *matout;
1999     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2000     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2001   }
2002 
2003   /* copy over the A part */
2004   array = Aloc->a;
2005   row   = A->rmap->rstart;
2006   for (i=0; i<ma; i++) {
2007     ncol = ai[i+1]-ai[i];
2008     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2009     row++;
2010     array += ncol; aj += ncol;
2011   }
2012   aj = Aloc->j;
2013   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2014 
2015   /* copy over the B part */
2016   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2017   array = Bloc->a;
2018   row   = A->rmap->rstart;
2019   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2020   cols_tmp = cols;
2021   for (i=0; i<mb; i++) {
2022     ncol = bi[i+1]-bi[i];
2023     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2024     row++;
2025     array += ncol; cols_tmp += ncol;
2026   }
2027   ierr = PetscFree(cols);CHKERRQ(ierr);
2028 
2029   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2030   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2031   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2032     *matout = B;
2033   } else {
2034     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2035   }
2036   PetscFunctionReturn(0);
2037 }
2038 
2039 #undef __FUNCT__
2040 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2041 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2042 {
2043   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2044   Mat            a    = aij->A,b = aij->B;
2045   PetscErrorCode ierr;
2046   PetscInt       s1,s2,s3;
2047 
2048   PetscFunctionBegin;
2049   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2050   if (rr) {
2051     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2052     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2053     /* Overlap communication with computation. */
2054     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2055   }
2056   if (ll) {
2057     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2058     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2059     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2060   }
2061   /* scale  the diagonal block */
2062   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2063 
2064   if (rr) {
2065     /* Do a scatter end and then right scale the off-diagonal block */
2066     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2067     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2068   }
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2074 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2075 {
2076   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2077   PetscErrorCode ierr;
2078 
2079   PetscFunctionBegin;
2080   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2081   PetscFunctionReturn(0);
2082 }
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatEqual_MPIAIJ"
2086 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2087 {
2088   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2089   Mat            a,b,c,d;
2090   PetscBool      flg;
2091   PetscErrorCode ierr;
2092 
2093   PetscFunctionBegin;
2094   a = matA->A; b = matA->B;
2095   c = matB->A; d = matB->B;
2096 
2097   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2098   if (flg) {
2099     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2100   }
2101   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2102   PetscFunctionReturn(0);
2103 }
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatCopy_MPIAIJ"
2107 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2108 {
2109   PetscErrorCode ierr;
2110   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2111   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2112 
2113   PetscFunctionBegin;
2114   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2115   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2116     /* because of the column compression in the off-processor part of the matrix a->B,
2117        the number of columns in a->B and b->B may be different, hence we cannot call
2118        the MatCopy() directly on the two parts. If need be, we can provide a more
2119        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2120        then copying the submatrices */
2121     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2122   } else {
2123     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2124     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2125   }
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 #undef __FUNCT__
2130 #define __FUNCT__ "MatSetUp_MPIAIJ"
2131 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2132 {
2133   PetscErrorCode ierr;
2134 
2135   PetscFunctionBegin;
2136   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 /*
2141    Computes the number of nonzeros per row needed for preallocation when X and Y
2142    have different nonzero structure.
2143 */
2144 #undef __FUNCT__
2145 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2146 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2147 {
2148   PetscInt       i,j,k,nzx,nzy;
2149 
2150   PetscFunctionBegin;
2151   /* Set the number of nonzeros in the new matrix */
2152   for (i=0; i<m; i++) {
2153     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2154     nzx = xi[i+1] - xi[i];
2155     nzy = yi[i+1] - yi[i];
2156     nnz[i] = 0;
2157     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2158       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2159       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2160       nnz[i]++;
2161     }
2162     for (; k<nzy; k++) nnz[i]++;
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2170 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2171 {
2172   PetscErrorCode ierr;
2173   PetscInt       m = Y->rmap->N;
2174   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2175   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2176 
2177   PetscFunctionBegin;
2178   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatAXPY_MPIAIJ"
2184 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2185 {
2186   PetscErrorCode ierr;
2187   PetscInt       i;
2188   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2189   PetscBLASInt   bnz,one=1;
2190   Mat_SeqAIJ     *x,*y;
2191 
2192   PetscFunctionBegin;
2193   if (str == SAME_NONZERO_PATTERN) {
2194     PetscScalar alpha = a;
2195     x    = (Mat_SeqAIJ*)xx->A->data;
2196     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2197     y    = (Mat_SeqAIJ*)yy->A->data;
2198     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2199     x    = (Mat_SeqAIJ*)xx->B->data;
2200     y    = (Mat_SeqAIJ*)yy->B->data;
2201     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2202     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2203     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2204   } else if (str == SUBSET_NONZERO_PATTERN) {
2205     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2206 
2207     x = (Mat_SeqAIJ*)xx->B->data;
2208     y = (Mat_SeqAIJ*)yy->B->data;
2209     if (y->xtoy && y->XtoY != xx->B) {
2210       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2211       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2212     }
2213     if (!y->xtoy) { /* get xtoy */
2214       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2215       y->XtoY = xx->B;
2216       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2217     }
2218     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2219     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2220   } else {
2221     Mat      B;
2222     PetscInt *nnz_d,*nnz_o;
2223     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2224     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2225     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2226     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2227     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2228     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2229     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2230     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2231     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2232     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2233     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2234     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2235     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2236     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2237   }
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatConjugate_MPIAIJ"
2245 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2246 {
2247 #if defined(PETSC_USE_COMPLEX)
2248   PetscErrorCode ierr;
2249   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2250 
2251   PetscFunctionBegin;
2252   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2253   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2254 #else
2255   PetscFunctionBegin;
2256 #endif
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatRealPart_MPIAIJ"
2262 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2263 {
2264   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2265   PetscErrorCode ierr;
2266 
2267   PetscFunctionBegin;
2268   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2269   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2275 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2276 {
2277   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2278   PetscErrorCode ierr;
2279 
2280   PetscFunctionBegin;
2281   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2282   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #if defined(PETSC_HAVE_PBGL)
2287 
2288 #include <boost/parallel/mpi/bsp_process_group.hpp>
2289 #include <boost/graph/distributed/ilu_default_graph.hpp>
2290 #include <boost/graph/distributed/ilu_0_block.hpp>
2291 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2292 #include <boost/graph/distributed/petsc/interface.hpp>
2293 #include <boost/multi_array.hpp>
2294 #include <boost/parallel/distributed_property_map->hpp>
2295 
2296 #undef __FUNCT__
2297 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2298 /*
2299   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2300 */
2301 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2302 {
2303   namespace petsc = boost::distributed::petsc;
2304 
2305   namespace graph_dist = boost::graph::distributed;
2306   using boost::graph::distributed::ilu_default::process_group_type;
2307   using boost::graph::ilu_permuted;
2308 
2309   PetscBool      row_identity, col_identity;
2310   PetscContainer c;
2311   PetscInt       m, n, M, N;
2312   PetscErrorCode ierr;
2313 
2314   PetscFunctionBegin;
2315   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2316   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2317   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2318   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2319 
2320   process_group_type pg;
2321   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2322   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2323   lgraph_type& level_graph = *lgraph_p;
2324   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2325 
2326   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2327   ilu_permuted(level_graph);
2328 
2329   /* put together the new matrix */
2330   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2331   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2332   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2333   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2334   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2335   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2336   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2337   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338 
2339   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2340   ierr = PetscContainerSetPointer(c, lgraph_p);
2341   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2342   ierr = PetscContainerDestroy(&c);
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2348 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2349 {
2350   PetscFunctionBegin;
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 #undef __FUNCT__
2355 #define __FUNCT__ "MatSolve_MPIAIJ"
2356 /*
2357   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2358 */
2359 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2360 {
2361   namespace graph_dist = boost::graph::distributed;
2362 
2363   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2364   lgraph_type    *lgraph_p;
2365   PetscContainer c;
2366   PetscErrorCode ierr;
2367 
2368   PetscFunctionBegin;
2369   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2370   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2371   ierr = VecCopy(b, x);CHKERRQ(ierr);
2372 
2373   PetscScalar *array_x;
2374   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2375   PetscInt sx;
2376   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2377 
2378   PetscScalar *array_b;
2379   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2380   PetscInt sb;
2381   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2382 
2383   lgraph_type& level_graph = *lgraph_p;
2384   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2385 
2386   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2387   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2388   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2389 
2390   typedef boost::iterator_property_map<array_ref_type::iterator,
2391                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2392   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2393   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2394 
2395   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2396   PetscFunctionReturn(0);
2397 }
2398 #endif
2399 
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2403 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2404 {
2405   PetscMPIInt    rank,size;
2406   MPI_Comm       comm;
2407   PetscErrorCode ierr;
2408   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2409   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2410   PetscInt       *rowrange = mat->rmap->range;
2411   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2412   Mat            A = aij->A,B=aij->B,C=*matredundant;
2413   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2414   PetscScalar    *sbuf_a;
2415   PetscInt       nzlocal=a->nz+b->nz;
2416   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2417   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2418   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2419   MatScalar      *aworkA,*aworkB;
2420   PetscScalar    *vals;
2421   PetscMPIInt    tag1,tag2,tag3,imdex;
2422   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2423   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2424   MPI_Status     recv_status,*send_status;
2425   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2426   PetscInt       **rbuf_j=NULL;
2427   PetscScalar    **rbuf_a=NULL;
2428   Mat_Redundant  *redund =NULL;
2429 
2430   PetscFunctionBegin;
2431   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2432   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2433   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2434   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2435   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2436 
2437   if (reuse == MAT_REUSE_MATRIX) {
2438     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2439     if (subsize == 1) {
2440       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2441       redund = c->redundant;
2442     } else {
2443       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2444       redund = c->redundant;
2445     }
2446     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2447 
2448     nsends    = redund->nsends;
2449     nrecvs    = redund->nrecvs;
2450     send_rank = redund->send_rank;
2451     recv_rank = redund->recv_rank;
2452     sbuf_nz   = redund->sbuf_nz;
2453     rbuf_nz   = redund->rbuf_nz;
2454     sbuf_j    = redund->sbuf_j;
2455     sbuf_a    = redund->sbuf_a;
2456     rbuf_j    = redund->rbuf_j;
2457     rbuf_a    = redund->rbuf_a;
2458   }
2459 
2460   if (reuse == MAT_INITIAL_MATRIX) {
2461     PetscInt    nleftover,np_subcomm;
2462 
2463     /* get the destination processors' id send_rank, nsends and nrecvs */
2464     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2465 
2466     np_subcomm = size/nsubcomm;
2467     nleftover  = size - nsubcomm*np_subcomm;
2468 
2469     /* block of codes below is specific for INTERLACED */
2470     /* ------------------------------------------------*/
2471     nsends = 0; nrecvs = 0;
2472     for (i=0; i<size; i++) {
2473       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2474         send_rank[nsends++] = i;
2475         recv_rank[nrecvs++] = i;
2476       }
2477     }
2478     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2479       i = size-nleftover-1;
2480       j = 0;
2481       while (j < nsubcomm - nleftover) {
2482         send_rank[nsends++] = i;
2483         i--; j++;
2484       }
2485     }
2486 
2487     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2488       for (i=0; i<nleftover; i++) {
2489         recv_rank[nrecvs++] = size-nleftover+i;
2490       }
2491     }
2492     /*----------------------------------------------*/
2493 
2494     /* allocate sbuf_j, sbuf_a */
2495     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2496     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2497     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2498     /*
2499     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2500     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2501      */
2502   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2503 
2504   /* copy mat's local entries into the buffers */
2505   if (reuse == MAT_INITIAL_MATRIX) {
2506     rownz_max = 0;
2507     rptr      = sbuf_j;
2508     cols      = sbuf_j + rend-rstart + 1;
2509     vals      = sbuf_a;
2510     rptr[0]   = 0;
2511     for (i=0; i<rend-rstart; i++) {
2512       row    = i + rstart;
2513       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2514       ncols  = nzA + nzB;
2515       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2516       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2517       /* load the column indices for this row into cols */
2518       lwrite = 0;
2519       for (l=0; l<nzB; l++) {
2520         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2521           vals[lwrite]   = aworkB[l];
2522           cols[lwrite++] = ctmp;
2523         }
2524       }
2525       for (l=0; l<nzA; l++) {
2526         vals[lwrite]   = aworkA[l];
2527         cols[lwrite++] = cstart + cworkA[l];
2528       }
2529       for (l=0; l<nzB; l++) {
2530         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2531           vals[lwrite]   = aworkB[l];
2532           cols[lwrite++] = ctmp;
2533         }
2534       }
2535       vals     += ncols;
2536       cols     += ncols;
2537       rptr[i+1] = rptr[i] + ncols;
2538       if (rownz_max < ncols) rownz_max = ncols;
2539     }
2540     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2541   } else { /* only copy matrix values into sbuf_a */
2542     rptr    = sbuf_j;
2543     vals    = sbuf_a;
2544     rptr[0] = 0;
2545     for (i=0; i<rend-rstart; i++) {
2546       row    = i + rstart;
2547       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2548       ncols  = nzA + nzB;
2549       cworkB = b->j + b->i[i];
2550       aworkA = a->a + a->i[i];
2551       aworkB = b->a + b->i[i];
2552       lwrite = 0;
2553       for (l=0; l<nzB; l++) {
2554         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2555       }
2556       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2557       for (l=0; l<nzB; l++) {
2558         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2559       }
2560       vals     += ncols;
2561       rptr[i+1] = rptr[i] + ncols;
2562     }
2563   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2564 
2565   /* send nzlocal to others, and recv other's nzlocal */
2566   /*--------------------------------------------------*/
2567   if (reuse == MAT_INITIAL_MATRIX) {
2568     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2569 
2570     s_waits2 = s_waits3 + nsends;
2571     s_waits1 = s_waits2 + nsends;
2572     r_waits1 = s_waits1 + nsends;
2573     r_waits2 = r_waits1 + nrecvs;
2574     r_waits3 = r_waits2 + nrecvs;
2575   } else {
2576     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2577 
2578     r_waits3 = s_waits3 + nsends;
2579   }
2580 
2581   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2582   if (reuse == MAT_INITIAL_MATRIX) {
2583     /* get new tags to keep the communication clean */
2584     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2585     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2586     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2587 
2588     /* post receives of other's nzlocal */
2589     for (i=0; i<nrecvs; i++) {
2590       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2591     }
2592     /* send nzlocal to others */
2593     for (i=0; i<nsends; i++) {
2594       sbuf_nz[i] = nzlocal;
2595       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2596     }
2597     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2598     count = nrecvs;
2599     while (count) {
2600       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2601 
2602       recv_rank[imdex] = recv_status.MPI_SOURCE;
2603       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2604       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2605 
2606       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2607 
2608       rbuf_nz[imdex] += i + 2;
2609 
2610       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2611       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2612       count--;
2613     }
2614     /* wait on sends of nzlocal */
2615     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2616     /* send mat->i,j to others, and recv from other's */
2617     /*------------------------------------------------*/
2618     for (i=0; i<nsends; i++) {
2619       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2620       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2621     }
2622     /* wait on receives of mat->i,j */
2623     /*------------------------------*/
2624     count = nrecvs;
2625     while (count) {
2626       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2627       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2628       count--;
2629     }
2630     /* wait on sends of mat->i,j */
2631     /*---------------------------*/
2632     if (nsends) {
2633       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2634     }
2635   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2636 
2637   /* post receives, send and receive mat->a */
2638   /*----------------------------------------*/
2639   for (imdex=0; imdex<nrecvs; imdex++) {
2640     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2641   }
2642   for (i=0; i<nsends; i++) {
2643     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2644   }
2645   count = nrecvs;
2646   while (count) {
2647     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2648     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2649     count--;
2650   }
2651   if (nsends) {
2652     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2653   }
2654 
2655   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2656 
2657   /* create redundant matrix */
2658   /*-------------------------*/
2659   if (reuse == MAT_INITIAL_MATRIX) {
2660     const PetscInt *range;
2661     PetscInt       rstart_sub,rend_sub,mloc_sub;
2662 
2663     /* compute rownz_max for preallocation */
2664     for (imdex=0; imdex<nrecvs; imdex++) {
2665       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2666       rptr = rbuf_j[imdex];
2667       for (i=0; i<j; i++) {
2668         ncols = rptr[i+1] - rptr[i];
2669         if (rownz_max < ncols) rownz_max = ncols;
2670       }
2671     }
2672 
2673     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2674 
2675     /* get local size of redundant matrix
2676        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2677     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2678     rstart_sub = range[nsubcomm*subrank];
2679     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2680       rend_sub = range[nsubcomm*(subrank+1)];
2681     } else {
2682       rend_sub = mat->rmap->N;
2683     }
2684     mloc_sub = rend_sub - rstart_sub;
2685 
2686     if (M == N) {
2687       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2688     } else { /* non-square matrix */
2689       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2690     }
2691     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2692     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2693     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2694     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2695   } else {
2696     C = *matredundant;
2697   }
2698 
2699   /* insert local matrix entries */
2700   rptr = sbuf_j;
2701   cols = sbuf_j + rend-rstart + 1;
2702   vals = sbuf_a;
2703   for (i=0; i<rend-rstart; i++) {
2704     row   = i + rstart;
2705     ncols = rptr[i+1] - rptr[i];
2706     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2707     vals += ncols;
2708     cols += ncols;
2709   }
2710   /* insert received matrix entries */
2711   for (imdex=0; imdex<nrecvs; imdex++) {
2712     rstart = rowrange[recv_rank[imdex]];
2713     rend   = rowrange[recv_rank[imdex]+1];
2714     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2715     rptr   = rbuf_j[imdex];
2716     cols   = rbuf_j[imdex] + rend-rstart + 1;
2717     vals   = rbuf_a[imdex];
2718     for (i=0; i<rend-rstart; i++) {
2719       row   = i + rstart;
2720       ncols = rptr[i+1] - rptr[i];
2721       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2722       vals += ncols;
2723       cols += ncols;
2724     }
2725   }
2726   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2727   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2728 
2729   if (reuse == MAT_INITIAL_MATRIX) {
2730     *matredundant = C;
2731 
2732     /* create a supporting struct and attach it to C for reuse */
2733     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2734     if (subsize == 1) {
2735       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2736       c->redundant = redund;
2737     } else {
2738       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2739       c->redundant = redund;
2740     }
2741 
2742     redund->nzlocal   = nzlocal;
2743     redund->nsends    = nsends;
2744     redund->nrecvs    = nrecvs;
2745     redund->send_rank = send_rank;
2746     redund->recv_rank = recv_rank;
2747     redund->sbuf_nz   = sbuf_nz;
2748     redund->rbuf_nz   = rbuf_nz;
2749     redund->sbuf_j    = sbuf_j;
2750     redund->sbuf_a    = sbuf_a;
2751     redund->rbuf_j    = rbuf_j;
2752     redund->rbuf_a    = rbuf_a;
2753     redund->psubcomm  = NULL;
2754   }
2755   PetscFunctionReturn(0);
2756 }
2757 
2758 #undef __FUNCT__
2759 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2760 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2761 {
2762   PetscErrorCode ierr;
2763   MPI_Comm       comm;
2764   PetscMPIInt    size,subsize;
2765   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2766   Mat_Redundant  *redund=NULL;
2767   PetscSubcomm   psubcomm=NULL;
2768   MPI_Comm       subcomm_in=subcomm;
2769   Mat            *matseq;
2770   IS             isrow,iscol;
2771 
2772   PetscFunctionBegin;
2773   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2774     if (reuse ==  MAT_INITIAL_MATRIX) {
2775       /* create psubcomm, then get subcomm */
2776       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2777       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2778       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2779 
2780       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2781       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2782       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2783       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2784       subcomm = psubcomm->comm;
2785     } else { /* retrieve psubcomm and subcomm */
2786       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2787       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2788       if (subsize == 1) {
2789         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2790         redund = c->redundant;
2791       } else {
2792         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2793         redund = c->redundant;
2794       }
2795       psubcomm = redund->psubcomm;
2796     }
2797     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2798       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2799       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2800         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2801         if (subsize == 1) {
2802           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2803           c->redundant->psubcomm = psubcomm;
2804         } else {
2805           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2806           c->redundant->psubcomm = psubcomm ;
2807         }
2808       }
2809       PetscFunctionReturn(0);
2810     }
2811   }
2812 
2813   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2814   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2815   if (reuse == MAT_INITIAL_MATRIX) {
2816     /* create a local sequential matrix matseq[0] */
2817     mloc_sub = PETSC_DECIDE;
2818     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2819     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2820     rstart = rend - mloc_sub;
2821     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2822     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2823   } else { /* reuse == MAT_REUSE_MATRIX */
2824     if (subsize == 1) {
2825       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2826       redund = c->redundant;
2827     } else {
2828       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2829       redund = c->redundant;
2830     }
2831 
2832     isrow  = redund->isrow;
2833     iscol  = redund->iscol;
2834     matseq = redund->matseq;
2835   }
2836   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2837   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2838 
2839   if (reuse == MAT_INITIAL_MATRIX) {
2840     /* create a supporting struct and attach it to C for reuse */
2841     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2842     if (subsize == 1) {
2843       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2844       c->redundant = redund;
2845     } else {
2846       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2847       c->redundant = redund;
2848     }
2849     redund->isrow    = isrow;
2850     redund->iscol    = iscol;
2851     redund->matseq   = matseq;
2852     redund->psubcomm = psubcomm;
2853   }
2854   PetscFunctionReturn(0);
2855 }
2856 
2857 #undef __FUNCT__
2858 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2859 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2860 {
2861   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2862   PetscErrorCode ierr;
2863   PetscInt       i,*idxb = 0;
2864   PetscScalar    *va,*vb;
2865   Vec            vtmp;
2866 
2867   PetscFunctionBegin;
2868   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2869   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2870   if (idx) {
2871     for (i=0; i<A->rmap->n; i++) {
2872       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2873     }
2874   }
2875 
2876   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2877   if (idx) {
2878     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2879   }
2880   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2881   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2882 
2883   for (i=0; i<A->rmap->n; i++) {
2884     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2885       va[i] = vb[i];
2886       if (idx) idx[i] = a->garray[idxb[i]];
2887     }
2888   }
2889 
2890   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2891   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2892   ierr = PetscFree(idxb);CHKERRQ(ierr);
2893   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2894   PetscFunctionReturn(0);
2895 }
2896 
2897 #undef __FUNCT__
2898 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2899 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2900 {
2901   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2902   PetscErrorCode ierr;
2903   PetscInt       i,*idxb = 0;
2904   PetscScalar    *va,*vb;
2905   Vec            vtmp;
2906 
2907   PetscFunctionBegin;
2908   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2909   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2910   if (idx) {
2911     for (i=0; i<A->cmap->n; i++) {
2912       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2913     }
2914   }
2915 
2916   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2917   if (idx) {
2918     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2919   }
2920   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2921   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2922 
2923   for (i=0; i<A->rmap->n; i++) {
2924     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2925       va[i] = vb[i];
2926       if (idx) idx[i] = a->garray[idxb[i]];
2927     }
2928   }
2929 
2930   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2931   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2932   ierr = PetscFree(idxb);CHKERRQ(ierr);
2933   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2934   PetscFunctionReturn(0);
2935 }
2936 
2937 #undef __FUNCT__
2938 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2939 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2940 {
2941   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2942   PetscInt       n      = A->rmap->n;
2943   PetscInt       cstart = A->cmap->rstart;
2944   PetscInt       *cmap  = mat->garray;
2945   PetscInt       *diagIdx, *offdiagIdx;
2946   Vec            diagV, offdiagV;
2947   PetscScalar    *a, *diagA, *offdiagA;
2948   PetscInt       r;
2949   PetscErrorCode ierr;
2950 
2951   PetscFunctionBegin;
2952   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2953   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2954   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2955   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2956   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2957   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2958   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2959   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2960   for (r = 0; r < n; ++r) {
2961     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2962       a[r]   = diagA[r];
2963       idx[r] = cstart + diagIdx[r];
2964     } else {
2965       a[r]   = offdiagA[r];
2966       idx[r] = cmap[offdiagIdx[r]];
2967     }
2968   }
2969   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2970   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2971   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2972   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2973   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2974   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2975   PetscFunctionReturn(0);
2976 }
2977 
2978 #undef __FUNCT__
2979 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2980 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2981 {
2982   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2983   PetscInt       n      = A->rmap->n;
2984   PetscInt       cstart = A->cmap->rstart;
2985   PetscInt       *cmap  = mat->garray;
2986   PetscInt       *diagIdx, *offdiagIdx;
2987   Vec            diagV, offdiagV;
2988   PetscScalar    *a, *diagA, *offdiagA;
2989   PetscInt       r;
2990   PetscErrorCode ierr;
2991 
2992   PetscFunctionBegin;
2993   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2994   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
2995   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
2996   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2997   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2998   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2999   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3000   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3001   for (r = 0; r < n; ++r) {
3002     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3003       a[r]   = diagA[r];
3004       idx[r] = cstart + diagIdx[r];
3005     } else {
3006       a[r]   = offdiagA[r];
3007       idx[r] = cmap[offdiagIdx[r]];
3008     }
3009   }
3010   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3011   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3012   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3013   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3014   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3015   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 #undef __FUNCT__
3020 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3021 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3022 {
3023   PetscErrorCode ierr;
3024   Mat            *dummy;
3025 
3026   PetscFunctionBegin;
3027   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3028   *newmat = *dummy;
3029   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3030   PetscFunctionReturn(0);
3031 }
3032 
3033 #undef __FUNCT__
3034 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3035 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3036 {
3037   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3038   PetscErrorCode ierr;
3039 
3040   PetscFunctionBegin;
3041   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3042   PetscFunctionReturn(0);
3043 }
3044 
3045 #undef __FUNCT__
3046 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3047 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3048 {
3049   PetscErrorCode ierr;
3050   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3051 
3052   PetscFunctionBegin;
3053   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3054   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3055   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3056   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3057   PetscFunctionReturn(0);
3058 }
3059 
3060 /* -------------------------------------------------------------------*/
3061 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3062                                        MatGetRow_MPIAIJ,
3063                                        MatRestoreRow_MPIAIJ,
3064                                        MatMult_MPIAIJ,
3065                                 /* 4*/ MatMultAdd_MPIAIJ,
3066                                        MatMultTranspose_MPIAIJ,
3067                                        MatMultTransposeAdd_MPIAIJ,
3068 #if defined(PETSC_HAVE_PBGL)
3069                                        MatSolve_MPIAIJ,
3070 #else
3071                                        0,
3072 #endif
3073                                        0,
3074                                        0,
3075                                 /*10*/ 0,
3076                                        0,
3077                                        0,
3078                                        MatSOR_MPIAIJ,
3079                                        MatTranspose_MPIAIJ,
3080                                 /*15*/ MatGetInfo_MPIAIJ,
3081                                        MatEqual_MPIAIJ,
3082                                        MatGetDiagonal_MPIAIJ,
3083                                        MatDiagonalScale_MPIAIJ,
3084                                        MatNorm_MPIAIJ,
3085                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3086                                        MatAssemblyEnd_MPIAIJ,
3087                                        MatSetOption_MPIAIJ,
3088                                        MatZeroEntries_MPIAIJ,
3089                                 /*24*/ MatZeroRows_MPIAIJ,
3090                                        0,
3091 #if defined(PETSC_HAVE_PBGL)
3092                                        0,
3093 #else
3094                                        0,
3095 #endif
3096                                        0,
3097                                        0,
3098                                 /*29*/ MatSetUp_MPIAIJ,
3099 #if defined(PETSC_HAVE_PBGL)
3100                                        0,
3101 #else
3102                                        0,
3103 #endif
3104                                        0,
3105                                        0,
3106                                        0,
3107                                 /*34*/ MatDuplicate_MPIAIJ,
3108                                        0,
3109                                        0,
3110                                        0,
3111                                        0,
3112                                 /*39*/ MatAXPY_MPIAIJ,
3113                                        MatGetSubMatrices_MPIAIJ,
3114                                        MatIncreaseOverlap_MPIAIJ,
3115                                        MatGetValues_MPIAIJ,
3116                                        MatCopy_MPIAIJ,
3117                                 /*44*/ MatGetRowMax_MPIAIJ,
3118                                        MatScale_MPIAIJ,
3119                                        0,
3120                                        MatDiagonalSet_MPIAIJ,
3121                                        MatZeroRowsColumns_MPIAIJ,
3122                                 /*49*/ MatSetRandom_MPIAIJ,
3123                                        0,
3124                                        0,
3125                                        0,
3126                                        0,
3127                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3128                                        0,
3129                                        MatSetUnfactored_MPIAIJ,
3130                                        MatPermute_MPIAIJ,
3131                                        0,
3132                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3133                                        MatDestroy_MPIAIJ,
3134                                        MatView_MPIAIJ,
3135                                        0,
3136                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3137                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3138                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3139                                        0,
3140                                        0,
3141                                        0,
3142                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3143                                        MatGetRowMinAbs_MPIAIJ,
3144                                        0,
3145                                        MatSetColoring_MPIAIJ,
3146                                        0,
3147                                        MatSetValuesAdifor_MPIAIJ,
3148                                 /*75*/ MatFDColoringApply_AIJ,
3149                                        0,
3150                                        0,
3151                                        0,
3152                                        MatFindZeroDiagonals_MPIAIJ,
3153                                 /*80*/ 0,
3154                                        0,
3155                                        0,
3156                                 /*83*/ MatLoad_MPIAIJ,
3157                                        0,
3158                                        0,
3159                                        0,
3160                                        0,
3161                                        0,
3162                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3163                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3164                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3165                                        MatPtAP_MPIAIJ_MPIAIJ,
3166                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3167                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3168                                        0,
3169                                        0,
3170                                        0,
3171                                        0,
3172                                 /*99*/ 0,
3173                                        0,
3174                                        0,
3175                                        MatConjugate_MPIAIJ,
3176                                        0,
3177                                 /*104*/MatSetValuesRow_MPIAIJ,
3178                                        MatRealPart_MPIAIJ,
3179                                        MatImaginaryPart_MPIAIJ,
3180                                        0,
3181                                        0,
3182                                 /*109*/0,
3183                                        MatGetRedundantMatrix_MPIAIJ,
3184                                        MatGetRowMin_MPIAIJ,
3185                                        0,
3186                                        0,
3187                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3188                                        0,
3189                                        0,
3190                                        0,
3191                                        0,
3192                                 /*119*/0,
3193                                        0,
3194                                        0,
3195                                        0,
3196                                        MatGetMultiProcBlock_MPIAIJ,
3197                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3198                                        MatGetColumnNorms_MPIAIJ,
3199                                        MatInvertBlockDiagonal_MPIAIJ,
3200                                        0,
3201                                        MatGetSubMatricesParallel_MPIAIJ,
3202                                 /*129*/0,
3203                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3204                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3205                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3206                                        0,
3207                                 /*134*/0,
3208                                        0,
3209                                        0,
3210                                        0,
3211                                        0,
3212                                 /*139*/0,
3213                                        0,
3214                                        0,
3215                                        MatFDColoringSetUp_MPIXAIJ
3216 };
3217 
3218 /* ----------------------------------------------------------------------------------------*/
3219 
3220 #undef __FUNCT__
3221 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3222 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3223 {
3224   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3225   PetscErrorCode ierr;
3226 
3227   PetscFunctionBegin;
3228   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3229   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3230   PetscFunctionReturn(0);
3231 }
3232 
3233 #undef __FUNCT__
3234 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3235 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3236 {
3237   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3238   PetscErrorCode ierr;
3239 
3240   PetscFunctionBegin;
3241   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3242   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3243   PetscFunctionReturn(0);
3244 }
3245 
3246 #undef __FUNCT__
3247 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3248 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3249 {
3250   Mat_MPIAIJ     *b;
3251   PetscErrorCode ierr;
3252 
3253   PetscFunctionBegin;
3254   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3255   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3256   b = (Mat_MPIAIJ*)B->data;
3257 
3258   if (!B->preallocated) {
3259     /* Explicitly create 2 MATSEQAIJ matrices. */
3260     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3261     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3262     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3263     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3264     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3265     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3266     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3267     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3268     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3269     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3270   }
3271 
3272   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3273   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3274   B->preallocated = PETSC_TRUE;
3275   PetscFunctionReturn(0);
3276 }
3277 
3278 #undef __FUNCT__
3279 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3280 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3281 {
3282   Mat            mat;
3283   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3284   PetscErrorCode ierr;
3285 
3286   PetscFunctionBegin;
3287   *newmat = 0;
3288   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3289   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3290   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3291   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3292   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3293   a       = (Mat_MPIAIJ*)mat->data;
3294 
3295   mat->factortype   = matin->factortype;
3296   mat->assembled    = PETSC_TRUE;
3297   mat->insertmode   = NOT_SET_VALUES;
3298   mat->preallocated = PETSC_TRUE;
3299 
3300   a->size         = oldmat->size;
3301   a->rank         = oldmat->rank;
3302   a->donotstash   = oldmat->donotstash;
3303   a->roworiented  = oldmat->roworiented;
3304   a->rowindices   = 0;
3305   a->rowvalues    = 0;
3306   a->getrowactive = PETSC_FALSE;
3307 
3308   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3309   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3310 
3311   if (oldmat->colmap) {
3312 #if defined(PETSC_USE_CTABLE)
3313     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3314 #else
3315     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3316     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3317     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3318 #endif
3319   } else a->colmap = 0;
3320   if (oldmat->garray) {
3321     PetscInt len;
3322     len  = oldmat->B->cmap->n;
3323     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3324     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3325     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3326   } else a->garray = 0;
3327 
3328   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3329   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3330   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3331   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3332   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3333   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3334   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3335   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3336   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3337   *newmat = mat;
3338   PetscFunctionReturn(0);
3339 }
3340 
3341 
3342 
3343 #undef __FUNCT__
3344 #define __FUNCT__ "MatLoad_MPIAIJ"
3345 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3346 {
3347   PetscScalar    *vals,*svals;
3348   MPI_Comm       comm;
3349   PetscErrorCode ierr;
3350   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3351   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3352   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3353   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3354   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3355   int            fd;
3356   PetscInt       bs = 1;
3357 
3358   PetscFunctionBegin;
3359   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3360   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3361   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3362   if (!rank) {
3363     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3364     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3365     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3366   }
3367 
3368   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3369   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3370   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3371 
3372   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3373 
3374   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3375   M    = header[1]; N = header[2];
3376   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3377   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3378   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3379 
3380   /* If global sizes are set, check if they are consistent with that given in the file */
3381   if (sizesset) {
3382     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3383   }
3384   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3385   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3386 
3387   /* determine ownership of all (block) rows */
3388   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3389   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3390   else m = newMat->rmap->n; /* Set by user */
3391 
3392   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3393   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3394 
3395   /* First process needs enough room for process with most rows */
3396   if (!rank) {
3397     mmax = rowners[1];
3398     for (i=2; i<=size; i++) {
3399       mmax = PetscMax(mmax, rowners[i]);
3400     }
3401   } else mmax = -1;             /* unused, but compilers complain */
3402 
3403   rowners[0] = 0;
3404   for (i=2; i<=size; i++) {
3405     rowners[i] += rowners[i-1];
3406   }
3407   rstart = rowners[rank];
3408   rend   = rowners[rank+1];
3409 
3410   /* distribute row lengths to all processors */
3411   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3412   if (!rank) {
3413     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3414     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3415     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3416     for (j=0; j<m; j++) {
3417       procsnz[0] += ourlens[j];
3418     }
3419     for (i=1; i<size; i++) {
3420       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3421       /* calculate the number of nonzeros on each processor */
3422       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3423         procsnz[i] += rowlengths[j];
3424       }
3425       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3426     }
3427     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3428   } else {
3429     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3430   }
3431 
3432   if (!rank) {
3433     /* determine max buffer needed and allocate it */
3434     maxnz = 0;
3435     for (i=0; i<size; i++) {
3436       maxnz = PetscMax(maxnz,procsnz[i]);
3437     }
3438     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3439 
3440     /* read in my part of the matrix column indices  */
3441     nz   = procsnz[0];
3442     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3443     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3444 
3445     /* read in every one elses and ship off */
3446     for (i=1; i<size; i++) {
3447       nz   = procsnz[i];
3448       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3449       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3450     }
3451     ierr = PetscFree(cols);CHKERRQ(ierr);
3452   } else {
3453     /* determine buffer space needed for message */
3454     nz = 0;
3455     for (i=0; i<m; i++) {
3456       nz += ourlens[i];
3457     }
3458     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3459 
3460     /* receive message of column indices*/
3461     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3462   }
3463 
3464   /* determine column ownership if matrix is not square */
3465   if (N != M) {
3466     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3467     else n = newMat->cmap->n;
3468     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3469     cstart = cend - n;
3470   } else {
3471     cstart = rstart;
3472     cend   = rend;
3473     n      = cend - cstart;
3474   }
3475 
3476   /* loop over local rows, determining number of off diagonal entries */
3477   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3478   jj   = 0;
3479   for (i=0; i<m; i++) {
3480     for (j=0; j<ourlens[i]; j++) {
3481       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3482       jj++;
3483     }
3484   }
3485 
3486   for (i=0; i<m; i++) {
3487     ourlens[i] -= offlens[i];
3488   }
3489   if (!sizesset) {
3490     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3491   }
3492 
3493   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3494 
3495   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3496 
3497   for (i=0; i<m; i++) {
3498     ourlens[i] += offlens[i];
3499   }
3500 
3501   if (!rank) {
3502     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3503 
3504     /* read in my part of the matrix numerical values  */
3505     nz   = procsnz[0];
3506     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3507 
3508     /* insert into matrix */
3509     jj      = rstart;
3510     smycols = mycols;
3511     svals   = vals;
3512     for (i=0; i<m; i++) {
3513       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3514       smycols += ourlens[i];
3515       svals   += ourlens[i];
3516       jj++;
3517     }
3518 
3519     /* read in other processors and ship out */
3520     for (i=1; i<size; i++) {
3521       nz   = procsnz[i];
3522       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3523       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3524     }
3525     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3526   } else {
3527     /* receive numeric values */
3528     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3529 
3530     /* receive message of values*/
3531     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3532 
3533     /* insert into matrix */
3534     jj      = rstart;
3535     smycols = mycols;
3536     svals   = vals;
3537     for (i=0; i<m; i++) {
3538       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3539       smycols += ourlens[i];
3540       svals   += ourlens[i];
3541       jj++;
3542     }
3543   }
3544   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3545   ierr = PetscFree(vals);CHKERRQ(ierr);
3546   ierr = PetscFree(mycols);CHKERRQ(ierr);
3547   ierr = PetscFree(rowners);CHKERRQ(ierr);
3548   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3549   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3550   PetscFunctionReturn(0);
3551 }
3552 
3553 #undef __FUNCT__
3554 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3555 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3556 {
3557   PetscErrorCode ierr;
3558   IS             iscol_local;
3559   PetscInt       csize;
3560 
3561   PetscFunctionBegin;
3562   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3563   if (call == MAT_REUSE_MATRIX) {
3564     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3565     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3566   } else {
3567     PetscInt cbs;
3568     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3569     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3570     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3571   }
3572   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3573   if (call == MAT_INITIAL_MATRIX) {
3574     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3575     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3576   }
3577   PetscFunctionReturn(0);
3578 }
3579 
3580 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3581 #undef __FUNCT__
3582 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3583 /*
3584     Not great since it makes two copies of the submatrix, first an SeqAIJ
3585   in local and then by concatenating the local matrices the end result.
3586   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3587 
3588   Note: This requires a sequential iscol with all indices.
3589 */
3590 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3591 {
3592   PetscErrorCode ierr;
3593   PetscMPIInt    rank,size;
3594   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3595   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3596   PetscBool      allcolumns, colflag;
3597   Mat            M,Mreuse;
3598   MatScalar      *vwork,*aa;
3599   MPI_Comm       comm;
3600   Mat_SeqAIJ     *aij;
3601 
3602   PetscFunctionBegin;
3603   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3604   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3605   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3606 
3607   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3608   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3609   if (colflag && ncol == mat->cmap->N) {
3610     allcolumns = PETSC_TRUE;
3611   } else {
3612     allcolumns = PETSC_FALSE;
3613   }
3614   if (call ==  MAT_REUSE_MATRIX) {
3615     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3616     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3617     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3618   } else {
3619     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3620   }
3621 
3622   /*
3623       m - number of local rows
3624       n - number of columns (same on all processors)
3625       rstart - first row in new global matrix generated
3626   */
3627   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3628   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3629   if (call == MAT_INITIAL_MATRIX) {
3630     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3631     ii  = aij->i;
3632     jj  = aij->j;
3633 
3634     /*
3635         Determine the number of non-zeros in the diagonal and off-diagonal
3636         portions of the matrix in order to do correct preallocation
3637     */
3638 
3639     /* first get start and end of "diagonal" columns */
3640     if (csize == PETSC_DECIDE) {
3641       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3642       if (mglobal == n) { /* square matrix */
3643         nlocal = m;
3644       } else {
3645         nlocal = n/size + ((n % size) > rank);
3646       }
3647     } else {
3648       nlocal = csize;
3649     }
3650     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3651     rstart = rend - nlocal;
3652     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3653 
3654     /* next, compute all the lengths */
3655     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3656     olens = dlens + m;
3657     for (i=0; i<m; i++) {
3658       jend = ii[i+1] - ii[i];
3659       olen = 0;
3660       dlen = 0;
3661       for (j=0; j<jend; j++) {
3662         if (*jj < rstart || *jj >= rend) olen++;
3663         else dlen++;
3664         jj++;
3665       }
3666       olens[i] = olen;
3667       dlens[i] = dlen;
3668     }
3669     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3670     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3671     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3672     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3673     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3674     ierr = PetscFree(dlens);CHKERRQ(ierr);
3675   } else {
3676     PetscInt ml,nl;
3677 
3678     M    = *newmat;
3679     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3680     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3681     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3682     /*
3683          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3684        rather than the slower MatSetValues().
3685     */
3686     M->was_assembled = PETSC_TRUE;
3687     M->assembled     = PETSC_FALSE;
3688   }
3689   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3690   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3691   ii   = aij->i;
3692   jj   = aij->j;
3693   aa   = aij->a;
3694   for (i=0; i<m; i++) {
3695     row   = rstart + i;
3696     nz    = ii[i+1] - ii[i];
3697     cwork = jj;     jj += nz;
3698     vwork = aa;     aa += nz;
3699     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3700   }
3701 
3702   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3703   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3704   *newmat = M;
3705 
3706   /* save submatrix used in processor for next request */
3707   if (call ==  MAT_INITIAL_MATRIX) {
3708     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3709     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3710   }
3711   PetscFunctionReturn(0);
3712 }
3713 
3714 #undef __FUNCT__
3715 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3716 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3717 {
3718   PetscInt       m,cstart, cend,j,nnz,i,d;
3719   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3720   const PetscInt *JJ;
3721   PetscScalar    *values;
3722   PetscErrorCode ierr;
3723 
3724   PetscFunctionBegin;
3725   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3726 
3727   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3728   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3729   m      = B->rmap->n;
3730   cstart = B->cmap->rstart;
3731   cend   = B->cmap->rend;
3732   rstart = B->rmap->rstart;
3733 
3734   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3735 
3736 #if defined(PETSC_USE_DEBUGGING)
3737   for (i=0; i<m; i++) {
3738     nnz = Ii[i+1]- Ii[i];
3739     JJ  = J + Ii[i];
3740     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3741     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3742     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3743   }
3744 #endif
3745 
3746   for (i=0; i<m; i++) {
3747     nnz     = Ii[i+1]- Ii[i];
3748     JJ      = J + Ii[i];
3749     nnz_max = PetscMax(nnz_max,nnz);
3750     d       = 0;
3751     for (j=0; j<nnz; j++) {
3752       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3753     }
3754     d_nnz[i] = d;
3755     o_nnz[i] = nnz - d;
3756   }
3757   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3758   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3759 
3760   if (v) values = (PetscScalar*)v;
3761   else {
3762     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3763   }
3764 
3765   for (i=0; i<m; i++) {
3766     ii   = i + rstart;
3767     nnz  = Ii[i+1]- Ii[i];
3768     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3769   }
3770   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3771   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3772 
3773   if (!v) {
3774     ierr = PetscFree(values);CHKERRQ(ierr);
3775   }
3776   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3777   PetscFunctionReturn(0);
3778 }
3779 
3780 #undef __FUNCT__
3781 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3782 /*@
3783    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3784    (the default parallel PETSc format).
3785 
3786    Collective on MPI_Comm
3787 
3788    Input Parameters:
3789 +  B - the matrix
3790 .  i - the indices into j for the start of each local row (starts with zero)
3791 .  j - the column indices for each local row (starts with zero)
3792 -  v - optional values in the matrix
3793 
3794    Level: developer
3795 
3796    Notes:
3797        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3798      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3799      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3800 
3801        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3802 
3803        The format which is used for the sparse matrix input, is equivalent to a
3804     row-major ordering.. i.e for the following matrix, the input data expected is
3805     as shown:
3806 
3807         1 0 0
3808         2 0 3     P0
3809        -------
3810         4 5 6     P1
3811 
3812      Process0 [P0]: rows_owned=[0,1]
3813         i =  {0,1,3}  [size = nrow+1  = 2+1]
3814         j =  {0,0,2}  [size = nz = 6]
3815         v =  {1,2,3}  [size = nz = 6]
3816 
3817      Process1 [P1]: rows_owned=[2]
3818         i =  {0,3}    [size = nrow+1  = 1+1]
3819         j =  {0,1,2}  [size = nz = 6]
3820         v =  {4,5,6}  [size = nz = 6]
3821 
3822 .keywords: matrix, aij, compressed row, sparse, parallel
3823 
3824 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3825           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3826 @*/
3827 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3828 {
3829   PetscErrorCode ierr;
3830 
3831   PetscFunctionBegin;
3832   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3833   PetscFunctionReturn(0);
3834 }
3835 
3836 #undef __FUNCT__
3837 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3838 /*@C
3839    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3840    (the default parallel PETSc format).  For good matrix assembly performance
3841    the user should preallocate the matrix storage by setting the parameters
3842    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3843    performance can be increased by more than a factor of 50.
3844 
3845    Collective on MPI_Comm
3846 
3847    Input Parameters:
3848 +  B - the matrix
3849 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3850            (same value is used for all local rows)
3851 .  d_nnz - array containing the number of nonzeros in the various rows of the
3852            DIAGONAL portion of the local submatrix (possibly different for each row)
3853            or NULL, if d_nz is used to specify the nonzero structure.
3854            The size of this array is equal to the number of local rows, i.e 'm'.
3855            For matrices that will be factored, you must leave room for (and set)
3856            the diagonal entry even if it is zero.
3857 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3858            submatrix (same value is used for all local rows).
3859 -  o_nnz - array containing the number of nonzeros in the various rows of the
3860            OFF-DIAGONAL portion of the local submatrix (possibly different for
3861            each row) or NULL, if o_nz is used to specify the nonzero
3862            structure. The size of this array is equal to the number
3863            of local rows, i.e 'm'.
3864 
3865    If the *_nnz parameter is given then the *_nz parameter is ignored
3866 
3867    The AIJ format (also called the Yale sparse matrix format or
3868    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3869    storage.  The stored row and column indices begin with zero.
3870    See Users-Manual: ch_mat for details.
3871 
3872    The parallel matrix is partitioned such that the first m0 rows belong to
3873    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3874    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3875 
3876    The DIAGONAL portion of the local submatrix of a processor can be defined
3877    as the submatrix which is obtained by extraction the part corresponding to
3878    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3879    first row that belongs to the processor, r2 is the last row belonging to
3880    the this processor, and c1-c2 is range of indices of the local part of a
3881    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3882    common case of a square matrix, the row and column ranges are the same and
3883    the DIAGONAL part is also square. The remaining portion of the local
3884    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3885 
3886    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3887 
3888    You can call MatGetInfo() to get information on how effective the preallocation was;
3889    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3890    You can also run with the option -info and look for messages with the string
3891    malloc in them to see if additional memory allocation was needed.
3892 
3893    Example usage:
3894 
3895    Consider the following 8x8 matrix with 34 non-zero values, that is
3896    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3897    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3898    as follows:
3899 
3900 .vb
3901             1  2  0  |  0  3  0  |  0  4
3902     Proc0   0  5  6  |  7  0  0  |  8  0
3903             9  0 10  | 11  0  0  | 12  0
3904     -------------------------------------
3905            13  0 14  | 15 16 17  |  0  0
3906     Proc1   0 18  0  | 19 20 21  |  0  0
3907             0  0  0  | 22 23  0  | 24  0
3908     -------------------------------------
3909     Proc2  25 26 27  |  0  0 28  | 29  0
3910            30  0  0  | 31 32 33  |  0 34
3911 .ve
3912 
3913    This can be represented as a collection of submatrices as:
3914 
3915 .vb
3916       A B C
3917       D E F
3918       G H I
3919 .ve
3920 
3921    Where the submatrices A,B,C are owned by proc0, D,E,F are
3922    owned by proc1, G,H,I are owned by proc2.
3923 
3924    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3925    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3926    The 'M','N' parameters are 8,8, and have the same values on all procs.
3927 
3928    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3929    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3930    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3931    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3932    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3933    matrix, ans [DF] as another SeqAIJ matrix.
3934 
3935    When d_nz, o_nz parameters are specified, d_nz storage elements are
3936    allocated for every row of the local diagonal submatrix, and o_nz
3937    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3938    One way to choose d_nz and o_nz is to use the max nonzerors per local
3939    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3940    In this case, the values of d_nz,o_nz are:
3941 .vb
3942      proc0 : dnz = 2, o_nz = 2
3943      proc1 : dnz = 3, o_nz = 2
3944      proc2 : dnz = 1, o_nz = 4
3945 .ve
3946    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3947    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3948    for proc3. i.e we are using 12+15+10=37 storage locations to store
3949    34 values.
3950 
3951    When d_nnz, o_nnz parameters are specified, the storage is specified
3952    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3953    In the above case the values for d_nnz,o_nnz are:
3954 .vb
3955      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3956      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3957      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3958 .ve
3959    Here the space allocated is sum of all the above values i.e 34, and
3960    hence pre-allocation is perfect.
3961 
3962    Level: intermediate
3963 
3964 .keywords: matrix, aij, compressed row, sparse, parallel
3965 
3966 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3967           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3968 @*/
3969 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3970 {
3971   PetscErrorCode ierr;
3972 
3973   PetscFunctionBegin;
3974   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3975   PetscValidType(B,1);
3976   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3977   PetscFunctionReturn(0);
3978 }
3979 
3980 #undef __FUNCT__
3981 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3982 /*@
3983      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3984          CSR format the local rows.
3985 
3986    Collective on MPI_Comm
3987 
3988    Input Parameters:
3989 +  comm - MPI communicator
3990 .  m - number of local rows (Cannot be PETSC_DECIDE)
3991 .  n - This value should be the same as the local size used in creating the
3992        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3993        calculated if N is given) For square matrices n is almost always m.
3994 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3995 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3996 .   i - row indices
3997 .   j - column indices
3998 -   a - matrix values
3999 
4000    Output Parameter:
4001 .   mat - the matrix
4002 
4003    Level: intermediate
4004 
4005    Notes:
4006        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4007      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4008      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4009 
4010        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4011 
4012        The format which is used for the sparse matrix input, is equivalent to a
4013     row-major ordering.. i.e for the following matrix, the input data expected is
4014     as shown:
4015 
4016         1 0 0
4017         2 0 3     P0
4018        -------
4019         4 5 6     P1
4020 
4021      Process0 [P0]: rows_owned=[0,1]
4022         i =  {0,1,3}  [size = nrow+1  = 2+1]
4023         j =  {0,0,2}  [size = nz = 6]
4024         v =  {1,2,3}  [size = nz = 6]
4025 
4026      Process1 [P1]: rows_owned=[2]
4027         i =  {0,3}    [size = nrow+1  = 1+1]
4028         j =  {0,1,2}  [size = nz = 6]
4029         v =  {4,5,6}  [size = nz = 6]
4030 
4031 .keywords: matrix, aij, compressed row, sparse, parallel
4032 
4033 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4034           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4035 @*/
4036 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4037 {
4038   PetscErrorCode ierr;
4039 
4040   PetscFunctionBegin;
4041   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4042   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4043   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4044   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4045   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4046   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4047   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4048   PetscFunctionReturn(0);
4049 }
4050 
4051 #undef __FUNCT__
4052 #define __FUNCT__ "MatCreateAIJ"
4053 /*@C
4054    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4055    (the default parallel PETSc format).  For good matrix assembly performance
4056    the user should preallocate the matrix storage by setting the parameters
4057    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4058    performance can be increased by more than a factor of 50.
4059 
4060    Collective on MPI_Comm
4061 
4062    Input Parameters:
4063 +  comm - MPI communicator
4064 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4065            This value should be the same as the local size used in creating the
4066            y vector for the matrix-vector product y = Ax.
4067 .  n - This value should be the same as the local size used in creating the
4068        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4069        calculated if N is given) For square matrices n is almost always m.
4070 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4071 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4072 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4073            (same value is used for all local rows)
4074 .  d_nnz - array containing the number of nonzeros in the various rows of the
4075            DIAGONAL portion of the local submatrix (possibly different for each row)
4076            or NULL, if d_nz is used to specify the nonzero structure.
4077            The size of this array is equal to the number of local rows, i.e 'm'.
4078 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4079            submatrix (same value is used for all local rows).
4080 -  o_nnz - array containing the number of nonzeros in the various rows of the
4081            OFF-DIAGONAL portion of the local submatrix (possibly different for
4082            each row) or NULL, if o_nz is used to specify the nonzero
4083            structure. The size of this array is equal to the number
4084            of local rows, i.e 'm'.
4085 
4086    Output Parameter:
4087 .  A - the matrix
4088 
4089    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4090    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4091    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4092 
4093    Notes:
4094    If the *_nnz parameter is given then the *_nz parameter is ignored
4095 
4096    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4097    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4098    storage requirements for this matrix.
4099 
4100    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4101    processor than it must be used on all processors that share the object for
4102    that argument.
4103 
4104    The user MUST specify either the local or global matrix dimensions
4105    (possibly both).
4106 
4107    The parallel matrix is partitioned across processors such that the
4108    first m0 rows belong to process 0, the next m1 rows belong to
4109    process 1, the next m2 rows belong to process 2 etc.. where
4110    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4111    values corresponding to [m x N] submatrix.
4112 
4113    The columns are logically partitioned with the n0 columns belonging
4114    to 0th partition, the next n1 columns belonging to the next
4115    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4116 
4117    The DIAGONAL portion of the local submatrix on any given processor
4118    is the submatrix corresponding to the rows and columns m,n
4119    corresponding to the given processor. i.e diagonal matrix on
4120    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4121    etc. The remaining portion of the local submatrix [m x (N-n)]
4122    constitute the OFF-DIAGONAL portion. The example below better
4123    illustrates this concept.
4124 
4125    For a square global matrix we define each processor's diagonal portion
4126    to be its local rows and the corresponding columns (a square submatrix);
4127    each processor's off-diagonal portion encompasses the remainder of the
4128    local matrix (a rectangular submatrix).
4129 
4130    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4131 
4132    When calling this routine with a single process communicator, a matrix of
4133    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4134    type of communicator, use the construction mechanism:
4135      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4136 
4137    By default, this format uses inodes (identical nodes) when possible.
4138    We search for consecutive rows with the same nonzero structure, thereby
4139    reusing matrix information to achieve increased efficiency.
4140 
4141    Options Database Keys:
4142 +  -mat_no_inode  - Do not use inodes
4143 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4144 -  -mat_aij_oneindex - Internally use indexing starting at 1
4145         rather than 0.  Note that when calling MatSetValues(),
4146         the user still MUST index entries starting at 0!
4147 
4148 
4149    Example usage:
4150 
4151    Consider the following 8x8 matrix with 34 non-zero values, that is
4152    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4153    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4154    as follows:
4155 
4156 .vb
4157             1  2  0  |  0  3  0  |  0  4
4158     Proc0   0  5  6  |  7  0  0  |  8  0
4159             9  0 10  | 11  0  0  | 12  0
4160     -------------------------------------
4161            13  0 14  | 15 16 17  |  0  0
4162     Proc1   0 18  0  | 19 20 21  |  0  0
4163             0  0  0  | 22 23  0  | 24  0
4164     -------------------------------------
4165     Proc2  25 26 27  |  0  0 28  | 29  0
4166            30  0  0  | 31 32 33  |  0 34
4167 .ve
4168 
4169    This can be represented as a collection of submatrices as:
4170 
4171 .vb
4172       A B C
4173       D E F
4174       G H I
4175 .ve
4176 
4177    Where the submatrices A,B,C are owned by proc0, D,E,F are
4178    owned by proc1, G,H,I are owned by proc2.
4179 
4180    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4181    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4182    The 'M','N' parameters are 8,8, and have the same values on all procs.
4183 
4184    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4185    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4186    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4187    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4188    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4189    matrix, ans [DF] as another SeqAIJ matrix.
4190 
4191    When d_nz, o_nz parameters are specified, d_nz storage elements are
4192    allocated for every row of the local diagonal submatrix, and o_nz
4193    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4194    One way to choose d_nz and o_nz is to use the max nonzerors per local
4195    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4196    In this case, the values of d_nz,o_nz are:
4197 .vb
4198      proc0 : dnz = 2, o_nz = 2
4199      proc1 : dnz = 3, o_nz = 2
4200      proc2 : dnz = 1, o_nz = 4
4201 .ve
4202    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4203    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4204    for proc3. i.e we are using 12+15+10=37 storage locations to store
4205    34 values.
4206 
4207    When d_nnz, o_nnz parameters are specified, the storage is specified
4208    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4209    In the above case the values for d_nnz,o_nnz are:
4210 .vb
4211      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4212      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4213      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4214 .ve
4215    Here the space allocated is sum of all the above values i.e 34, and
4216    hence pre-allocation is perfect.
4217 
4218    Level: intermediate
4219 
4220 .keywords: matrix, aij, compressed row, sparse, parallel
4221 
4222 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4223           MPIAIJ, MatCreateMPIAIJWithArrays()
4224 @*/
4225 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4226 {
4227   PetscErrorCode ierr;
4228   PetscMPIInt    size;
4229 
4230   PetscFunctionBegin;
4231   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4232   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4233   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4234   if (size > 1) {
4235     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4236     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4237   } else {
4238     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4239     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4240   }
4241   PetscFunctionReturn(0);
4242 }
4243 
4244 #undef __FUNCT__
4245 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4246 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4247 {
4248   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4249 
4250   PetscFunctionBegin;
4251   if (Ad)     *Ad     = a->A;
4252   if (Ao)     *Ao     = a->B;
4253   if (colmap) *colmap = a->garray;
4254   PetscFunctionReturn(0);
4255 }
4256 
4257 #undef __FUNCT__
4258 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4259 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4260 {
4261   PetscErrorCode ierr;
4262   PetscInt       i;
4263   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4264 
4265   PetscFunctionBegin;
4266   if (coloring->ctype == IS_COLORING_GLOBAL) {
4267     ISColoringValue *allcolors,*colors;
4268     ISColoring      ocoloring;
4269 
4270     /* set coloring for diagonal portion */
4271     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4272 
4273     /* set coloring for off-diagonal portion */
4274     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4275     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4276     for (i=0; i<a->B->cmap->n; i++) {
4277       colors[i] = allcolors[a->garray[i]];
4278     }
4279     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4280     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4281     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4282     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4283   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4284     ISColoringValue *colors;
4285     PetscInt        *larray;
4286     ISColoring      ocoloring;
4287 
4288     /* set coloring for diagonal portion */
4289     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4290     for (i=0; i<a->A->cmap->n; i++) {
4291       larray[i] = i + A->cmap->rstart;
4292     }
4293     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4294     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4295     for (i=0; i<a->A->cmap->n; i++) {
4296       colors[i] = coloring->colors[larray[i]];
4297     }
4298     ierr = PetscFree(larray);CHKERRQ(ierr);
4299     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4300     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4301     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4302 
4303     /* set coloring for off-diagonal portion */
4304     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4305     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4306     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4307     for (i=0; i<a->B->cmap->n; i++) {
4308       colors[i] = coloring->colors[larray[i]];
4309     }
4310     ierr = PetscFree(larray);CHKERRQ(ierr);
4311     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4312     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4313     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4314   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4315   PetscFunctionReturn(0);
4316 }
4317 
4318 #undef __FUNCT__
4319 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4320 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4321 {
4322   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4323   PetscErrorCode ierr;
4324 
4325   PetscFunctionBegin;
4326   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4327   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4328   PetscFunctionReturn(0);
4329 }
4330 
4331 #undef __FUNCT__
4332 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4333 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4334 {
4335   PetscErrorCode ierr;
4336   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4337   PetscInt       *indx;
4338 
4339   PetscFunctionBegin;
4340   /* This routine will ONLY return MPIAIJ type matrix */
4341   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4342   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4343   if (n == PETSC_DECIDE) {
4344     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4345   }
4346   /* Check sum(n) = N */
4347   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4348   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4349 
4350   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4351   rstart -= m;
4352 
4353   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4354   for (i=0; i<m; i++) {
4355     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4356     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4357     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4358   }
4359 
4360   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4361   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4362   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4363   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4364   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4365   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4366   PetscFunctionReturn(0);
4367 }
4368 
4369 #undef __FUNCT__
4370 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4371 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4372 {
4373   PetscErrorCode ierr;
4374   PetscInt       m,N,i,rstart,nnz,Ii;
4375   PetscInt       *indx;
4376   PetscScalar    *values;
4377 
4378   PetscFunctionBegin;
4379   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4380   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4381   for (i=0; i<m; i++) {
4382     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4383     Ii   = i + rstart;
4384     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4385     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4386   }
4387   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4388   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4389   PetscFunctionReturn(0);
4390 }
4391 
4392 #undef __FUNCT__
4393 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4394 /*@
4395       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4396                  matrices from each processor
4397 
4398     Collective on MPI_Comm
4399 
4400    Input Parameters:
4401 +    comm - the communicators the parallel matrix will live on
4402 .    inmat - the input sequential matrices
4403 .    n - number of local columns (or PETSC_DECIDE)
4404 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4405 
4406    Output Parameter:
4407 .    outmat - the parallel matrix generated
4408 
4409     Level: advanced
4410 
4411    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4412 
4413 @*/
4414 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4415 {
4416   PetscErrorCode ierr;
4417   PetscMPIInt    size;
4418 
4419   PetscFunctionBegin;
4420   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4421   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4422   if (size == 1) {
4423     if (scall == MAT_INITIAL_MATRIX) {
4424       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4425     } else {
4426       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4427     }
4428   } else {
4429     if (scall == MAT_INITIAL_MATRIX) {
4430       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4431     }
4432     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4433   }
4434   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4435   PetscFunctionReturn(0);
4436 }
4437 
4438 #undef __FUNCT__
4439 #define __FUNCT__ "MatFileSplit"
4440 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4441 {
4442   PetscErrorCode    ierr;
4443   PetscMPIInt       rank;
4444   PetscInt          m,N,i,rstart,nnz;
4445   size_t            len;
4446   const PetscInt    *indx;
4447   PetscViewer       out;
4448   char              *name;
4449   Mat               B;
4450   const PetscScalar *values;
4451 
4452   PetscFunctionBegin;
4453   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4454   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4455   /* Should this be the type of the diagonal block of A? */
4456   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4457   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4458   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4459   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4460   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4461   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4462   for (i=0; i<m; i++) {
4463     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4464     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4465     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4466   }
4467   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4468   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4469 
4470   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4471   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4472   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4473   sprintf(name,"%s.%d",outfile,rank);
4474   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4475   ierr = PetscFree(name);CHKERRQ(ierr);
4476   ierr = MatView(B,out);CHKERRQ(ierr);
4477   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4478   ierr = MatDestroy(&B);CHKERRQ(ierr);
4479   PetscFunctionReturn(0);
4480 }
4481 
4482 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4483 #undef __FUNCT__
4484 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4485 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4486 {
4487   PetscErrorCode      ierr;
4488   Mat_Merge_SeqsToMPI *merge;
4489   PetscContainer      container;
4490 
4491   PetscFunctionBegin;
4492   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4493   if (container) {
4494     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4495     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4496     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4497     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4498     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4499     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4500     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4501     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4502     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4503     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4504     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4505     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4506     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4507     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4508     ierr = PetscFree(merge);CHKERRQ(ierr);
4509     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4510   }
4511   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4512   PetscFunctionReturn(0);
4513 }
4514 
4515 #include <../src/mat/utils/freespace.h>
4516 #include <petscbt.h>
4517 
4518 #undef __FUNCT__
4519 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4520 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4521 {
4522   PetscErrorCode      ierr;
4523   MPI_Comm            comm;
4524   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4525   PetscMPIInt         size,rank,taga,*len_s;
4526   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4527   PetscInt            proc,m;
4528   PetscInt            **buf_ri,**buf_rj;
4529   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4530   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4531   MPI_Request         *s_waits,*r_waits;
4532   MPI_Status          *status;
4533   MatScalar           *aa=a->a;
4534   MatScalar           **abuf_r,*ba_i;
4535   Mat_Merge_SeqsToMPI *merge;
4536   PetscContainer      container;
4537 
4538   PetscFunctionBegin;
4539   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4540   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4541 
4542   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4543   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4544 
4545   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4546   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4547 
4548   bi     = merge->bi;
4549   bj     = merge->bj;
4550   buf_ri = merge->buf_ri;
4551   buf_rj = merge->buf_rj;
4552 
4553   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4554   owners = merge->rowmap->range;
4555   len_s  = merge->len_s;
4556 
4557   /* send and recv matrix values */
4558   /*-----------------------------*/
4559   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4560   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4561 
4562   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4563   for (proc=0,k=0; proc<size; proc++) {
4564     if (!len_s[proc]) continue;
4565     i    = owners[proc];
4566     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4567     k++;
4568   }
4569 
4570   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4571   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4572   ierr = PetscFree(status);CHKERRQ(ierr);
4573 
4574   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4575   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4576 
4577   /* insert mat values of mpimat */
4578   /*----------------------------*/
4579   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4580   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4581 
4582   for (k=0; k<merge->nrecv; k++) {
4583     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4584     nrows       = *(buf_ri_k[k]);
4585     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4586     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4587   }
4588 
4589   /* set values of ba */
4590   m = merge->rowmap->n;
4591   for (i=0; i<m; i++) {
4592     arow = owners[rank] + i;
4593     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4594     bnzi = bi[i+1] - bi[i];
4595     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4596 
4597     /* add local non-zero vals of this proc's seqmat into ba */
4598     anzi   = ai[arow+1] - ai[arow];
4599     aj     = a->j + ai[arow];
4600     aa     = a->a + ai[arow];
4601     nextaj = 0;
4602     for (j=0; nextaj<anzi; j++) {
4603       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4604         ba_i[j] += aa[nextaj++];
4605       }
4606     }
4607 
4608     /* add received vals into ba */
4609     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4610       /* i-th row */
4611       if (i == *nextrow[k]) {
4612         anzi   = *(nextai[k]+1) - *nextai[k];
4613         aj     = buf_rj[k] + *(nextai[k]);
4614         aa     = abuf_r[k] + *(nextai[k]);
4615         nextaj = 0;
4616         for (j=0; nextaj<anzi; j++) {
4617           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4618             ba_i[j] += aa[nextaj++];
4619           }
4620         }
4621         nextrow[k]++; nextai[k]++;
4622       }
4623     }
4624     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4625   }
4626   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4627   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4628 
4629   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4630   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4631   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4632   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4633   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4634   PetscFunctionReturn(0);
4635 }
4636 
4637 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4638 
4639 #undef __FUNCT__
4640 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4641 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4642 {
4643   PetscErrorCode      ierr;
4644   Mat                 B_mpi;
4645   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4646   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4647   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4648   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4649   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4650   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4651   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4652   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4653   MPI_Status          *status;
4654   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4655   PetscBT             lnkbt;
4656   Mat_Merge_SeqsToMPI *merge;
4657   PetscContainer      container;
4658 
4659   PetscFunctionBegin;
4660   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4661 
4662   /* make sure it is a PETSc comm */
4663   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4664   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4665   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4666 
4667   ierr = PetscNew(&merge);CHKERRQ(ierr);
4668   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4669 
4670   /* determine row ownership */
4671   /*---------------------------------------------------------*/
4672   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4673   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4674   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4675   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4676   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4677   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4678   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4679 
4680   m      = merge->rowmap->n;
4681   owners = merge->rowmap->range;
4682 
4683   /* determine the number of messages to send, their lengths */
4684   /*---------------------------------------------------------*/
4685   len_s = merge->len_s;
4686 
4687   len          = 0; /* length of buf_si[] */
4688   merge->nsend = 0;
4689   for (proc=0; proc<size; proc++) {
4690     len_si[proc] = 0;
4691     if (proc == rank) {
4692       len_s[proc] = 0;
4693     } else {
4694       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4695       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4696     }
4697     if (len_s[proc]) {
4698       merge->nsend++;
4699       nrows = 0;
4700       for (i=owners[proc]; i<owners[proc+1]; i++) {
4701         if (ai[i+1] > ai[i]) nrows++;
4702       }
4703       len_si[proc] = 2*(nrows+1);
4704       len         += len_si[proc];
4705     }
4706   }
4707 
4708   /* determine the number and length of messages to receive for ij-structure */
4709   /*-------------------------------------------------------------------------*/
4710   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4711   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4712 
4713   /* post the Irecv of j-structure */
4714   /*-------------------------------*/
4715   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4716   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4717 
4718   /* post the Isend of j-structure */
4719   /*--------------------------------*/
4720   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4721 
4722   for (proc=0, k=0; proc<size; proc++) {
4723     if (!len_s[proc]) continue;
4724     i    = owners[proc];
4725     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4726     k++;
4727   }
4728 
4729   /* receives and sends of j-structure are complete */
4730   /*------------------------------------------------*/
4731   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4732   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4733 
4734   /* send and recv i-structure */
4735   /*---------------------------*/
4736   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4737   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4738 
4739   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4740   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4741   for (proc=0,k=0; proc<size; proc++) {
4742     if (!len_s[proc]) continue;
4743     /* form outgoing message for i-structure:
4744          buf_si[0]:                 nrows to be sent
4745                [1:nrows]:           row index (global)
4746                [nrows+1:2*nrows+1]: i-structure index
4747     */
4748     /*-------------------------------------------*/
4749     nrows       = len_si[proc]/2 - 1;
4750     buf_si_i    = buf_si + nrows+1;
4751     buf_si[0]   = nrows;
4752     buf_si_i[0] = 0;
4753     nrows       = 0;
4754     for (i=owners[proc]; i<owners[proc+1]; i++) {
4755       anzi = ai[i+1] - ai[i];
4756       if (anzi) {
4757         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4758         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4759         nrows++;
4760       }
4761     }
4762     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4763     k++;
4764     buf_si += len_si[proc];
4765   }
4766 
4767   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4768   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4769 
4770   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4771   for (i=0; i<merge->nrecv; i++) {
4772     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4773   }
4774 
4775   ierr = PetscFree(len_si);CHKERRQ(ierr);
4776   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4777   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4778   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4779   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4780   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4781   ierr = PetscFree(status);CHKERRQ(ierr);
4782 
4783   /* compute a local seq matrix in each processor */
4784   /*----------------------------------------------*/
4785   /* allocate bi array and free space for accumulating nonzero column info */
4786   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4787   bi[0] = 0;
4788 
4789   /* create and initialize a linked list */
4790   nlnk = N+1;
4791   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4792 
4793   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4794   len  = ai[owners[rank+1]] - ai[owners[rank]];
4795   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4796 
4797   current_space = free_space;
4798 
4799   /* determine symbolic info for each local row */
4800   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4801 
4802   for (k=0; k<merge->nrecv; k++) {
4803     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4804     nrows       = *buf_ri_k[k];
4805     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4806     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4807   }
4808 
4809   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4810   len  = 0;
4811   for (i=0; i<m; i++) {
4812     bnzi = 0;
4813     /* add local non-zero cols of this proc's seqmat into lnk */
4814     arow  = owners[rank] + i;
4815     anzi  = ai[arow+1] - ai[arow];
4816     aj    = a->j + ai[arow];
4817     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4818     bnzi += nlnk;
4819     /* add received col data into lnk */
4820     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4821       if (i == *nextrow[k]) { /* i-th row */
4822         anzi  = *(nextai[k]+1) - *nextai[k];
4823         aj    = buf_rj[k] + *nextai[k];
4824         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4825         bnzi += nlnk;
4826         nextrow[k]++; nextai[k]++;
4827       }
4828     }
4829     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4830 
4831     /* if free space is not available, make more free space */
4832     if (current_space->local_remaining<bnzi) {
4833       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4834       nspacedouble++;
4835     }
4836     /* copy data into free space, then initialize lnk */
4837     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4838     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4839 
4840     current_space->array           += bnzi;
4841     current_space->local_used      += bnzi;
4842     current_space->local_remaining -= bnzi;
4843 
4844     bi[i+1] = bi[i] + bnzi;
4845   }
4846 
4847   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4848 
4849   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4850   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4851   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4852 
4853   /* create symbolic parallel matrix B_mpi */
4854   /*---------------------------------------*/
4855   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4856   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4857   if (n==PETSC_DECIDE) {
4858     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4859   } else {
4860     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4861   }
4862   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4863   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4864   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4865   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4866   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4867 
4868   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4869   B_mpi->assembled    = PETSC_FALSE;
4870   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4871   merge->bi           = bi;
4872   merge->bj           = bj;
4873   merge->buf_ri       = buf_ri;
4874   merge->buf_rj       = buf_rj;
4875   merge->coi          = NULL;
4876   merge->coj          = NULL;
4877   merge->owners_co    = NULL;
4878 
4879   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4880 
4881   /* attach the supporting struct to B_mpi for reuse */
4882   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4883   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4884   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4885   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4886   *mpimat = B_mpi;
4887 
4888   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4889   PetscFunctionReturn(0);
4890 }
4891 
4892 #undef __FUNCT__
4893 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4894 /*@C
4895       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4896                  matrices from each processor
4897 
4898     Collective on MPI_Comm
4899 
4900    Input Parameters:
4901 +    comm - the communicators the parallel matrix will live on
4902 .    seqmat - the input sequential matrices
4903 .    m - number of local rows (or PETSC_DECIDE)
4904 .    n - number of local columns (or PETSC_DECIDE)
4905 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4906 
4907    Output Parameter:
4908 .    mpimat - the parallel matrix generated
4909 
4910     Level: advanced
4911 
4912    Notes:
4913      The dimensions of the sequential matrix in each processor MUST be the same.
4914      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4915      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4916 @*/
4917 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4918 {
4919   PetscErrorCode ierr;
4920   PetscMPIInt    size;
4921 
4922   PetscFunctionBegin;
4923   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4924   if (size == 1) {
4925     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4926     if (scall == MAT_INITIAL_MATRIX) {
4927       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4928     } else {
4929       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4930     }
4931     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4932     PetscFunctionReturn(0);
4933   }
4934   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4935   if (scall == MAT_INITIAL_MATRIX) {
4936     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4937   }
4938   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4939   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4940   PetscFunctionReturn(0);
4941 }
4942 
4943 #undef __FUNCT__
4944 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4945 /*@
4946      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4947           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4948           with MatGetSize()
4949 
4950     Not Collective
4951 
4952    Input Parameters:
4953 +    A - the matrix
4954 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4955 
4956    Output Parameter:
4957 .    A_loc - the local sequential matrix generated
4958 
4959     Level: developer
4960 
4961 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4962 
4963 @*/
4964 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4965 {
4966   PetscErrorCode ierr;
4967   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4968   Mat_SeqAIJ     *mat,*a,*b;
4969   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4970   MatScalar      *aa,*ba,*cam;
4971   PetscScalar    *ca;
4972   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4973   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4974   PetscBool      match;
4975 
4976   PetscFunctionBegin;
4977   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4978   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4979   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4980   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4981   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4982   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4983   aa = a->a; ba = b->a;
4984   if (scall == MAT_INITIAL_MATRIX) {
4985     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4986     ci[0] = 0;
4987     for (i=0; i<am; i++) {
4988       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
4989     }
4990     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
4991     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
4992     k    = 0;
4993     for (i=0; i<am; i++) {
4994       ncols_o = bi[i+1] - bi[i];
4995       ncols_d = ai[i+1] - ai[i];
4996       /* off-diagonal portion of A */
4997       for (jo=0; jo<ncols_o; jo++) {
4998         col = cmap[*bj];
4999         if (col >= cstart) break;
5000         cj[k]   = col; bj++;
5001         ca[k++] = *ba++;
5002       }
5003       /* diagonal portion of A */
5004       for (j=0; j<ncols_d; j++) {
5005         cj[k]   = cstart + *aj++;
5006         ca[k++] = *aa++;
5007       }
5008       /* off-diagonal portion of A */
5009       for (j=jo; j<ncols_o; j++) {
5010         cj[k]   = cmap[*bj++];
5011         ca[k++] = *ba++;
5012       }
5013     }
5014     /* put together the new matrix */
5015     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5016     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5017     /* Since these are PETSc arrays, change flags to free them as necessary. */
5018     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5019     mat->free_a  = PETSC_TRUE;
5020     mat->free_ij = PETSC_TRUE;
5021     mat->nonew   = 0;
5022   } else if (scall == MAT_REUSE_MATRIX) {
5023     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5024     ci = mat->i; cj = mat->j; cam = mat->a;
5025     for (i=0; i<am; i++) {
5026       /* off-diagonal portion of A */
5027       ncols_o = bi[i+1] - bi[i];
5028       for (jo=0; jo<ncols_o; jo++) {
5029         col = cmap[*bj];
5030         if (col >= cstart) break;
5031         *cam++ = *ba++; bj++;
5032       }
5033       /* diagonal portion of A */
5034       ncols_d = ai[i+1] - ai[i];
5035       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5036       /* off-diagonal portion of A */
5037       for (j=jo; j<ncols_o; j++) {
5038         *cam++ = *ba++; bj++;
5039       }
5040     }
5041   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5042   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5043   PetscFunctionReturn(0);
5044 }
5045 
5046 #undef __FUNCT__
5047 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5048 /*@C
5049      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5050 
5051     Not Collective
5052 
5053    Input Parameters:
5054 +    A - the matrix
5055 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5056 -    row, col - index sets of rows and columns to extract (or NULL)
5057 
5058    Output Parameter:
5059 .    A_loc - the local sequential matrix generated
5060 
5061     Level: developer
5062 
5063 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5064 
5065 @*/
5066 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5067 {
5068   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5069   PetscErrorCode ierr;
5070   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5071   IS             isrowa,iscola;
5072   Mat            *aloc;
5073   PetscBool      match;
5074 
5075   PetscFunctionBegin;
5076   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5077   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5078   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5079   if (!row) {
5080     start = A->rmap->rstart; end = A->rmap->rend;
5081     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5082   } else {
5083     isrowa = *row;
5084   }
5085   if (!col) {
5086     start = A->cmap->rstart;
5087     cmap  = a->garray;
5088     nzA   = a->A->cmap->n;
5089     nzB   = a->B->cmap->n;
5090     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5091     ncols = 0;
5092     for (i=0; i<nzB; i++) {
5093       if (cmap[i] < start) idx[ncols++] = cmap[i];
5094       else break;
5095     }
5096     imark = i;
5097     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5098     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5099     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5100   } else {
5101     iscola = *col;
5102   }
5103   if (scall != MAT_INITIAL_MATRIX) {
5104     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5105     aloc[0] = *A_loc;
5106   }
5107   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5108   *A_loc = aloc[0];
5109   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5110   if (!row) {
5111     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5112   }
5113   if (!col) {
5114     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5115   }
5116   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5117   PetscFunctionReturn(0);
5118 }
5119 
5120 #undef __FUNCT__
5121 #define __FUNCT__ "MatGetBrowsOfAcols"
5122 /*@C
5123     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5124 
5125     Collective on Mat
5126 
5127    Input Parameters:
5128 +    A,B - the matrices in mpiaij format
5129 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5130 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5131 
5132    Output Parameter:
5133 +    rowb, colb - index sets of rows and columns of B to extract
5134 -    B_seq - the sequential matrix generated
5135 
5136     Level: developer
5137 
5138 @*/
5139 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5140 {
5141   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5142   PetscErrorCode ierr;
5143   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5144   IS             isrowb,iscolb;
5145   Mat            *bseq=NULL;
5146 
5147   PetscFunctionBegin;
5148   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5149     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5150   }
5151   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5152 
5153   if (scall == MAT_INITIAL_MATRIX) {
5154     start = A->cmap->rstart;
5155     cmap  = a->garray;
5156     nzA   = a->A->cmap->n;
5157     nzB   = a->B->cmap->n;
5158     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5159     ncols = 0;
5160     for (i=0; i<nzB; i++) {  /* row < local row index */
5161       if (cmap[i] < start) idx[ncols++] = cmap[i];
5162       else break;
5163     }
5164     imark = i;
5165     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5166     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5167     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5168     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5169   } else {
5170     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5171     isrowb  = *rowb; iscolb = *colb;
5172     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5173     bseq[0] = *B_seq;
5174   }
5175   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5176   *B_seq = bseq[0];
5177   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5178   if (!rowb) {
5179     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5180   } else {
5181     *rowb = isrowb;
5182   }
5183   if (!colb) {
5184     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5185   } else {
5186     *colb = iscolb;
5187   }
5188   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5189   PetscFunctionReturn(0);
5190 }
5191 
5192 #undef __FUNCT__
5193 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5194 /*
5195     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5196     of the OFF-DIAGONAL portion of local A
5197 
5198     Collective on Mat
5199 
5200    Input Parameters:
5201 +    A,B - the matrices in mpiaij format
5202 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5203 
5204    Output Parameter:
5205 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5206 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5207 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5208 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5209 
5210     Level: developer
5211 
5212 */
5213 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5214 {
5215   VecScatter_MPI_General *gen_to,*gen_from;
5216   PetscErrorCode         ierr;
5217   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5218   Mat_SeqAIJ             *b_oth;
5219   VecScatter             ctx =a->Mvctx;
5220   MPI_Comm               comm;
5221   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5222   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5223   PetscScalar            *rvalues,*svalues;
5224   MatScalar              *b_otha,*bufa,*bufA;
5225   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5226   MPI_Request            *rwaits = NULL,*swaits = NULL;
5227   MPI_Status             *sstatus,rstatus;
5228   PetscMPIInt            jj;
5229   PetscInt               *cols,sbs,rbs;
5230   PetscScalar            *vals;
5231 
5232   PetscFunctionBegin;
5233   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5234   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5235     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5236   }
5237   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5238   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5239 
5240   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5241   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5242   rvalues  = gen_from->values; /* holds the length of receiving row */
5243   svalues  = gen_to->values;   /* holds the length of sending row */
5244   nrecvs   = gen_from->n;
5245   nsends   = gen_to->n;
5246 
5247   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5248   srow    = gen_to->indices;    /* local row index to be sent */
5249   sstarts = gen_to->starts;
5250   sprocs  = gen_to->procs;
5251   sstatus = gen_to->sstatus;
5252   sbs     = gen_to->bs;
5253   rstarts = gen_from->starts;
5254   rprocs  = gen_from->procs;
5255   rbs     = gen_from->bs;
5256 
5257   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5258   if (scall == MAT_INITIAL_MATRIX) {
5259     /* i-array */
5260     /*---------*/
5261     /*  post receives */
5262     for (i=0; i<nrecvs; i++) {
5263       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5264       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5265       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5266     }
5267 
5268     /* pack the outgoing message */
5269     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5270 
5271     sstartsj[0] = 0;
5272     rstartsj[0] = 0;
5273     len         = 0; /* total length of j or a array to be sent */
5274     k           = 0;
5275     for (i=0; i<nsends; i++) {
5276       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5277       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5278       for (j=0; j<nrows; j++) {
5279         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5280         for (l=0; l<sbs; l++) {
5281           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5282 
5283           rowlen[j*sbs+l] = ncols;
5284 
5285           len += ncols;
5286           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5287         }
5288         k++;
5289       }
5290       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5291 
5292       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5293     }
5294     /* recvs and sends of i-array are completed */
5295     i = nrecvs;
5296     while (i--) {
5297       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5298     }
5299     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5300 
5301     /* allocate buffers for sending j and a arrays */
5302     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5303     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5304 
5305     /* create i-array of B_oth */
5306     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5307 
5308     b_othi[0] = 0;
5309     len       = 0; /* total length of j or a array to be received */
5310     k         = 0;
5311     for (i=0; i<nrecvs; i++) {
5312       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5313       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5314       for (j=0; j<nrows; j++) {
5315         b_othi[k+1] = b_othi[k] + rowlen[j];
5316         len        += rowlen[j]; k++;
5317       }
5318       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5319     }
5320 
5321     /* allocate space for j and a arrrays of B_oth */
5322     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5323     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5324 
5325     /* j-array */
5326     /*---------*/
5327     /*  post receives of j-array */
5328     for (i=0; i<nrecvs; i++) {
5329       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5330       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5331     }
5332 
5333     /* pack the outgoing message j-array */
5334     k = 0;
5335     for (i=0; i<nsends; i++) {
5336       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5337       bufJ  = bufj+sstartsj[i];
5338       for (j=0; j<nrows; j++) {
5339         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5340         for (ll=0; ll<sbs; ll++) {
5341           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5342           for (l=0; l<ncols; l++) {
5343             *bufJ++ = cols[l];
5344           }
5345           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5346         }
5347       }
5348       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5349     }
5350 
5351     /* recvs and sends of j-array are completed */
5352     i = nrecvs;
5353     while (i--) {
5354       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5355     }
5356     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5357   } else if (scall == MAT_REUSE_MATRIX) {
5358     sstartsj = *startsj_s;
5359     rstartsj = *startsj_r;
5360     bufa     = *bufa_ptr;
5361     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5362     b_otha   = b_oth->a;
5363   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5364 
5365   /* a-array */
5366   /*---------*/
5367   /*  post receives of a-array */
5368   for (i=0; i<nrecvs; i++) {
5369     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5370     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5371   }
5372 
5373   /* pack the outgoing message a-array */
5374   k = 0;
5375   for (i=0; i<nsends; i++) {
5376     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5377     bufA  = bufa+sstartsj[i];
5378     for (j=0; j<nrows; j++) {
5379       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5380       for (ll=0; ll<sbs; ll++) {
5381         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5382         for (l=0; l<ncols; l++) {
5383           *bufA++ = vals[l];
5384         }
5385         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5386       }
5387     }
5388     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5389   }
5390   /* recvs and sends of a-array are completed */
5391   i = nrecvs;
5392   while (i--) {
5393     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5394   }
5395   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5396   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5397 
5398   if (scall == MAT_INITIAL_MATRIX) {
5399     /* put together the new matrix */
5400     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5401 
5402     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5403     /* Since these are PETSc arrays, change flags to free them as necessary. */
5404     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5405     b_oth->free_a  = PETSC_TRUE;
5406     b_oth->free_ij = PETSC_TRUE;
5407     b_oth->nonew   = 0;
5408 
5409     ierr = PetscFree(bufj);CHKERRQ(ierr);
5410     if (!startsj_s || !bufa_ptr) {
5411       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5412       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5413     } else {
5414       *startsj_s = sstartsj;
5415       *startsj_r = rstartsj;
5416       *bufa_ptr  = bufa;
5417     }
5418   }
5419   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5420   PetscFunctionReturn(0);
5421 }
5422 
5423 #undef __FUNCT__
5424 #define __FUNCT__ "MatGetCommunicationStructs"
5425 /*@C
5426   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5427 
5428   Not Collective
5429 
5430   Input Parameters:
5431 . A - The matrix in mpiaij format
5432 
5433   Output Parameter:
5434 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5435 . colmap - A map from global column index to local index into lvec
5436 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5437 
5438   Level: developer
5439 
5440 @*/
5441 #if defined(PETSC_USE_CTABLE)
5442 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5443 #else
5444 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5445 #endif
5446 {
5447   Mat_MPIAIJ *a;
5448 
5449   PetscFunctionBegin;
5450   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5451   PetscValidPointer(lvec, 2);
5452   PetscValidPointer(colmap, 3);
5453   PetscValidPointer(multScatter, 4);
5454   a = (Mat_MPIAIJ*) A->data;
5455   if (lvec) *lvec = a->lvec;
5456   if (colmap) *colmap = a->colmap;
5457   if (multScatter) *multScatter = a->Mvctx;
5458   PetscFunctionReturn(0);
5459 }
5460 
5461 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5462 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5463 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5464 
5465 #undef __FUNCT__
5466 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5467 /*
5468     Computes (B'*A')' since computing B*A directly is untenable
5469 
5470                n                       p                          p
5471         (              )       (              )         (                  )
5472       m (      A       )  *  n (       B      )   =   m (         C        )
5473         (              )       (              )         (                  )
5474 
5475 */
5476 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5477 {
5478   PetscErrorCode ierr;
5479   Mat            At,Bt,Ct;
5480 
5481   PetscFunctionBegin;
5482   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5483   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5484   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5485   ierr = MatDestroy(&At);CHKERRQ(ierr);
5486   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5487   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5488   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5489   PetscFunctionReturn(0);
5490 }
5491 
5492 #undef __FUNCT__
5493 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5494 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5495 {
5496   PetscErrorCode ierr;
5497   PetscInt       m=A->rmap->n,n=B->cmap->n;
5498   Mat            Cmat;
5499 
5500   PetscFunctionBegin;
5501   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5502   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5503   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5504   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5505   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5506   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5507   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5508   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5509 
5510   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5511 
5512   *C = Cmat;
5513   PetscFunctionReturn(0);
5514 }
5515 
5516 /* ----------------------------------------------------------------*/
5517 #undef __FUNCT__
5518 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5519 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5520 {
5521   PetscErrorCode ierr;
5522 
5523   PetscFunctionBegin;
5524   if (scall == MAT_INITIAL_MATRIX) {
5525     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5526     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5527     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5528   }
5529   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5530   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5531   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5532   PetscFunctionReturn(0);
5533 }
5534 
5535 #if defined(PETSC_HAVE_MUMPS)
5536 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5537 #endif
5538 #if defined(PETSC_HAVE_PASTIX)
5539 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5540 #endif
5541 #if defined(PETSC_HAVE_SUPERLU_DIST)
5542 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5543 #endif
5544 #if defined(PETSC_HAVE_CLIQUE)
5545 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5546 #endif
5547 
5548 /*MC
5549    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5550 
5551    Options Database Keys:
5552 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5553 
5554   Level: beginner
5555 
5556 .seealso: MatCreateAIJ()
5557 M*/
5558 
5559 #undef __FUNCT__
5560 #define __FUNCT__ "MatCreate_MPIAIJ"
5561 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5562 {
5563   Mat_MPIAIJ     *b;
5564   PetscErrorCode ierr;
5565   PetscMPIInt    size;
5566 
5567   PetscFunctionBegin;
5568   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5569 
5570   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5571   B->data       = (void*)b;
5572   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5573   B->assembled  = PETSC_FALSE;
5574   B->insertmode = NOT_SET_VALUES;
5575   b->size       = size;
5576 
5577   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5578 
5579   /* build cache for off array entries formed */
5580   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5581 
5582   b->donotstash  = PETSC_FALSE;
5583   b->colmap      = 0;
5584   b->garray      = 0;
5585   b->roworiented = PETSC_TRUE;
5586 
5587   /* stuff used for matrix vector multiply */
5588   b->lvec  = NULL;
5589   b->Mvctx = NULL;
5590 
5591   /* stuff for MatGetRow() */
5592   b->rowindices   = 0;
5593   b->rowvalues    = 0;
5594   b->getrowactive = PETSC_FALSE;
5595 
5596   /* flexible pointer used in CUSP/CUSPARSE classes */
5597   b->spptr = NULL;
5598 
5599 #if defined(PETSC_HAVE_MUMPS)
5600   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5601 #endif
5602 #if defined(PETSC_HAVE_PASTIX)
5603   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5604 #endif
5605 #if defined(PETSC_HAVE_SUPERLU_DIST)
5606   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5607 #endif
5608 #if defined(PETSC_HAVE_CLIQUE)
5609   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5610 #endif
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5612   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5613   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5615   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5616   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5618   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5619   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5621   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5622   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5624   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5625   PetscFunctionReturn(0);
5626 }
5627 
5628 #undef __FUNCT__
5629 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5630 /*@
5631      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5632          and "off-diagonal" part of the matrix in CSR format.
5633 
5634    Collective on MPI_Comm
5635 
5636    Input Parameters:
5637 +  comm - MPI communicator
5638 .  m - number of local rows (Cannot be PETSC_DECIDE)
5639 .  n - This value should be the same as the local size used in creating the
5640        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5641        calculated if N is given) For square matrices n is almost always m.
5642 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5643 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5644 .   i - row indices for "diagonal" portion of matrix
5645 .   j - column indices
5646 .   a - matrix values
5647 .   oi - row indices for "off-diagonal" portion of matrix
5648 .   oj - column indices
5649 -   oa - matrix values
5650 
5651    Output Parameter:
5652 .   mat - the matrix
5653 
5654    Level: advanced
5655 
5656    Notes:
5657        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5658        must free the arrays once the matrix has been destroyed and not before.
5659 
5660        The i and j indices are 0 based
5661 
5662        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5663 
5664        This sets local rows and cannot be used to set off-processor values.
5665 
5666        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5667        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5668        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5669        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5670        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5671        communication if it is known that only local entries will be set.
5672 
5673 .keywords: matrix, aij, compressed row, sparse, parallel
5674 
5675 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5676           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5677 @*/
5678 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5679 {
5680   PetscErrorCode ierr;
5681   Mat_MPIAIJ     *maij;
5682 
5683   PetscFunctionBegin;
5684   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5685   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5686   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5687   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5688   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5689   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5690   maij = (Mat_MPIAIJ*) (*mat)->data;
5691 
5692   (*mat)->preallocated = PETSC_TRUE;
5693 
5694   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5695   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5696 
5697   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5698   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5699 
5700   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5701   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5702   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5703   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5704 
5705   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5706   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5707   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5708   PetscFunctionReturn(0);
5709 }
5710 
5711 /*
5712     Special version for direct calls from Fortran
5713 */
5714 #include <petsc-private/fortranimpl.h>
5715 
5716 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5717 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5718 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5719 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5720 #endif
5721 
5722 /* Change these macros so can be used in void function */
5723 #undef CHKERRQ
5724 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5725 #undef SETERRQ2
5726 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5727 #undef SETERRQ3
5728 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5729 #undef SETERRQ
5730 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5731 
5732 #undef __FUNCT__
5733 #define __FUNCT__ "matsetvaluesmpiaij_"
5734 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5735 {
5736   Mat            mat  = *mmat;
5737   PetscInt       m    = *mm, n = *mn;
5738   InsertMode     addv = *maddv;
5739   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5740   PetscScalar    value;
5741   PetscErrorCode ierr;
5742 
5743   MatCheckPreallocated(mat,1);
5744   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5745 
5746 #if defined(PETSC_USE_DEBUG)
5747   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5748 #endif
5749   {
5750     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5751     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5752     PetscBool roworiented = aij->roworiented;
5753 
5754     /* Some Variables required in the macro */
5755     Mat        A                 = aij->A;
5756     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5757     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5758     MatScalar  *aa               = a->a;
5759     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5760     Mat        B                 = aij->B;
5761     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5762     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5763     MatScalar  *ba               = b->a;
5764 
5765     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5766     PetscInt  nonew = a->nonew;
5767     MatScalar *ap1,*ap2;
5768 
5769     PetscFunctionBegin;
5770     for (i=0; i<m; i++) {
5771       if (im[i] < 0) continue;
5772 #if defined(PETSC_USE_DEBUG)
5773       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5774 #endif
5775       if (im[i] >= rstart && im[i] < rend) {
5776         row      = im[i] - rstart;
5777         lastcol1 = -1;
5778         rp1      = aj + ai[row];
5779         ap1      = aa + ai[row];
5780         rmax1    = aimax[row];
5781         nrow1    = ailen[row];
5782         low1     = 0;
5783         high1    = nrow1;
5784         lastcol2 = -1;
5785         rp2      = bj + bi[row];
5786         ap2      = ba + bi[row];
5787         rmax2    = bimax[row];
5788         nrow2    = bilen[row];
5789         low2     = 0;
5790         high2    = nrow2;
5791 
5792         for (j=0; j<n; j++) {
5793           if (roworiented) value = v[i*n+j];
5794           else value = v[i+j*m];
5795           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5796           if (in[j] >= cstart && in[j] < cend) {
5797             col = in[j] - cstart;
5798             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5799           } else if (in[j] < 0) continue;
5800 #if defined(PETSC_USE_DEBUG)
5801           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5802 #endif
5803           else {
5804             if (mat->was_assembled) {
5805               if (!aij->colmap) {
5806                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5807               }
5808 #if defined(PETSC_USE_CTABLE)
5809               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5810               col--;
5811 #else
5812               col = aij->colmap[in[j]] - 1;
5813 #endif
5814               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5815                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5816                 col  =  in[j];
5817                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5818                 B     = aij->B;
5819                 b     = (Mat_SeqAIJ*)B->data;
5820                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5821                 rp2   = bj + bi[row];
5822                 ap2   = ba + bi[row];
5823                 rmax2 = bimax[row];
5824                 nrow2 = bilen[row];
5825                 low2  = 0;
5826                 high2 = nrow2;
5827                 bm    = aij->B->rmap->n;
5828                 ba    = b->a;
5829               }
5830             } else col = in[j];
5831             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5832           }
5833         }
5834       } else if (!aij->donotstash) {
5835         if (roworiented) {
5836           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5837         } else {
5838           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5839         }
5840       }
5841     }
5842   }
5843   PetscFunctionReturnVoid();
5844 }
5845 
5846