xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision c2765ee2f7479f12e11b2c8d7d978b84d68018c1)
1 
2 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
3 #include <petsc-private/vecimpl.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 /*MC
8    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
9 
10    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
11    and MATMPIAIJ otherwise.  As a result, for single process communicators,
12   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
13   for communicators controlling multiple processes.  It is recommended that you call both of
14   the above preallocation routines for simplicity.
15 
16    Options Database Keys:
17 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
18 
19   Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when
20    enough exist.
21 
22   Level: beginner
23 
24 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ,MATMPIAIJ
25 M*/
26 
27 /*MC
28    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
29 
30    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
31    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
32    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
33   for communicators controlling multiple processes.  It is recommended that you call both of
34   the above preallocation routines for simplicity.
35 
36    Options Database Keys:
37 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
38 
39   Level: beginner
40 
41 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
42 M*/
43 
44 #undef __FUNCT__
45 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ"
46 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
47 {
48   PetscErrorCode  ierr;
49   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
50   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
51   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
52   const PetscInt  *ia,*ib;
53   const MatScalar *aa,*bb;
54   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
55   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
56 
57   PetscFunctionBegin;
58   *keptrows = 0;
59   ia        = a->i;
60   ib        = b->i;
61   for (i=0; i<m; i++) {
62     na = ia[i+1] - ia[i];
63     nb = ib[i+1] - ib[i];
64     if (!na && !nb) {
65       cnt++;
66       goto ok1;
67     }
68     aa = a->a + ia[i];
69     for (j=0; j<na; j++) {
70       if (aa[j] != 0.0) goto ok1;
71     }
72     bb = b->a + ib[i];
73     for (j=0; j <nb; j++) {
74       if (bb[j] != 0.0) goto ok1;
75     }
76     cnt++;
77 ok1:;
78   }
79   ierr = MPI_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPIU_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
80   if (!n0rows) PetscFunctionReturn(0);
81   ierr = PetscMalloc1((M->rmap->n-cnt),&rows);CHKERRQ(ierr);
82   cnt  = 0;
83   for (i=0; i<m; i++) {
84     na = ia[i+1] - ia[i];
85     nb = ib[i+1] - ib[i];
86     if (!na && !nb) continue;
87     aa = a->a + ia[i];
88     for (j=0; j<na;j++) {
89       if (aa[j] != 0.0) {
90         rows[cnt++] = rstart + i;
91         goto ok2;
92       }
93     }
94     bb = b->a + ib[i];
95     for (j=0; j<nb; j++) {
96       if (bb[j] != 0.0) {
97         rows[cnt++] = rstart + i;
98         goto ok2;
99       }
100     }
101 ok2:;
102   }
103   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
104   PetscFunctionReturn(0);
105 }
106 
107 #undef __FUNCT__
108 #define __FUNCT__ "MatDiagonalSet_MPIAIJ"
109 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
110 {
111   PetscErrorCode    ierr;
112   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
113 
114   PetscFunctionBegin;
115   if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) {
116     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
117   } else {
118     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
119   }
120   PetscFunctionReturn(0);
121 }
122 
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ"
126 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
127 {
128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
129   PetscErrorCode ierr;
130   PetscInt       i,rstart,nrows,*rows;
131 
132   PetscFunctionBegin;
133   *zrows = NULL;
134   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
135   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
136   for (i=0; i<nrows; i++) rows[i] += rstart;
137   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ"
143 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
144 {
145   PetscErrorCode ierr;
146   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)A->data;
147   PetscInt       i,n,*garray = aij->garray;
148   Mat_SeqAIJ     *a_aij = (Mat_SeqAIJ*) aij->A->data;
149   Mat_SeqAIJ     *b_aij = (Mat_SeqAIJ*) aij->B->data;
150   PetscReal      *work;
151 
152   PetscFunctionBegin;
153   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
154   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
155   if (type == NORM_2) {
156     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
157       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
158     }
159     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
160       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
161     }
162   } else if (type == NORM_1) {
163     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
164       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
165     }
166     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
167       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
168     }
169   } else if (type == NORM_INFINITY) {
170     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
171       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
172     }
173     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
174       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
175     }
176 
177   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
178   if (type == NORM_INFINITY) {
179     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
180   } else {
181     ierr = MPI_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
182   }
183   ierr = PetscFree(work);CHKERRQ(ierr);
184   if (type == NORM_2) {
185     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
186   }
187   PetscFunctionReturn(0);
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatDistribute_MPIAIJ"
192 /*
193     Distributes a SeqAIJ matrix across a set of processes. Code stolen from
194     MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type.
195 
196     Only for square matrices
197 
198     Used by a preconditioner, hence PETSC_EXTERN
199 */
200 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat)
201 {
202   PetscMPIInt    rank,size;
203   PetscInt       *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2];
204   PetscErrorCode ierr;
205   Mat            mat;
206   Mat_SeqAIJ     *gmata;
207   PetscMPIInt    tag;
208   MPI_Status     status;
209   PetscBool      aij;
210   MatScalar      *gmataa,*ao,*ad,*gmataarestore=0;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
214   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
215   if (!rank) {
216     ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr);
217     if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name);
218   }
219   if (reuse == MAT_INITIAL_MATRIX) {
220     ierr = MatCreate(comm,&mat);CHKERRQ(ierr);
221     ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
222     ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr);
223     ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr);
224     ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr);
225     ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr);
226     ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
227     ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr);
228     ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
229 
230     rowners[0] = 0;
231     for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
232     rstart = rowners[rank];
233     rend   = rowners[rank+1];
234     ierr   = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
235     if (!rank) {
236       gmata = (Mat_SeqAIJ*) gmat->data;
237       /* send row lengths to all processors */
238       for (i=0; i<m; i++) dlens[i] = gmata->ilen[i];
239       for (i=1; i<size; i++) {
240         ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
241       }
242       /* determine number diagonal and off-diagonal counts */
243       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
244       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
245       jj   = 0;
246       for (i=0; i<m; i++) {
247         for (j=0; j<dlens[i]; j++) {
248           if (gmata->j[jj] < rstart) ld[i]++;
249           if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++;
250           jj++;
251         }
252       }
253       /* send column indices to other processes */
254       for (i=1; i<size; i++) {
255         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
256         ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
257         ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
258       }
259 
260       /* send numerical values to other processes */
261       for (i=1; i<size; i++) {
262         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
263         ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
264       }
265       gmataa = gmata->a;
266       gmataj = gmata->j;
267 
268     } else {
269       /* receive row lengths */
270       ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
271       /* receive column indices */
272       ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
273       ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr);
274       ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
275       /* determine number diagonal and off-diagonal counts */
276       ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr);
277       ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr);
278       jj   = 0;
279       for (i=0; i<m; i++) {
280         for (j=0; j<dlens[i]; j++) {
281           if (gmataj[jj] < rstart) ld[i]++;
282           if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++;
283           jj++;
284         }
285       }
286       /* receive numerical values */
287       ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
288       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
289     }
290     /* set preallocation */
291     for (i=0; i<m; i++) {
292       dlens[i] -= olens[i];
293     }
294     ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr);
295     ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr);
296 
297     for (i=0; i<m; i++) {
298       dlens[i] += olens[i];
299     }
300     cnt = 0;
301     for (i=0; i<m; i++) {
302       row  = rstart + i;
303       ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr);
304       cnt += dlens[i];
305     }
306     if (rank) {
307       ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr);
308     }
309     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
310     ierr = PetscFree(rowners);CHKERRQ(ierr);
311 
312     ((Mat_MPIAIJ*)(mat->data))->ld = ld;
313 
314     *inmat = mat;
315   } else {   /* column indices are already set; only need to move over numerical values from process 0 */
316     Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data;
317     Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data;
318     mat  = *inmat;
319     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr);
320     if (!rank) {
321       /* send numerical values to other processes */
322       gmata  = (Mat_SeqAIJ*) gmat->data;
323       ierr   = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr);
324       gmataa = gmata->a;
325       for (i=1; i<size; i++) {
326         nz   = gmata->i[rowners[i+1]]-gmata->i[rowners[i]];
327         ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr);
328       }
329       nz = gmata->i[rowners[1]]-gmata->i[rowners[0]];
330     } else {
331       /* receive numerical values from process 0*/
332       nz   = Ad->nz + Ao->nz;
333       ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa;
334       ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr);
335     }
336     /* transfer numerical values into the diagonal A and off diagonal B parts of mat */
337     ld = ((Mat_MPIAIJ*)(mat->data))->ld;
338     ad = Ad->a;
339     ao = Ao->a;
340     if (mat->rmap->n) {
341       i  = 0;
342       nz = ld[i];                                   ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
343       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
344     }
345     for (i=1; i<mat->rmap->n; i++) {
346       nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz;
347       nz = Ad->i[i+1] - Ad->i[i];                   ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz;
348     }
349     i--;
350     if (mat->rmap->n) {
351       nz = Ao->i[i+1] - Ao->i[i] - ld[i];           ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr);
352     }
353     if (rank) {
354       ierr = PetscFree(gmataarestore);CHKERRQ(ierr);
355     }
356   }
357   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
358   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 
362 /*
363   Local utility routine that creates a mapping from the global column
364 number to the local number in the off-diagonal part of the local
365 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
366 a slightly higher hash table cost; without it it is not scalable (each processor
367 has an order N integer array but is fast to acess.
368 */
369 #undef __FUNCT__
370 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private"
371 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
372 {
373   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
374   PetscErrorCode ierr;
375   PetscInt       n = aij->B->cmap->n,i;
376 
377   PetscFunctionBegin;
378   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
379 #if defined(PETSC_USE_CTABLE)
380   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
381   for (i=0; i<n; i++) {
382     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
383   }
384 #else
385   ierr = PetscCalloc1((mat->cmap->N+1),&aij->colmap);CHKERRQ(ierr);
386   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
387   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
388 #endif
389   PetscFunctionReturn(0);
390 }
391 
392 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv) \
393 { \
394     if (col <= lastcol1)  low1 = 0;     \
395     else                 high1 = nrow1; \
396     lastcol1 = col;\
397     while (high1-low1 > 5) { \
398       t = (low1+high1)/2; \
399       if (rp1[t] > col) high1 = t; \
400       else              low1  = t; \
401     } \
402       for (_i=low1; _i<high1; _i++) { \
403         if (rp1[_i] > col) break; \
404         if (rp1[_i] == col) { \
405           if (addv == ADD_VALUES) ap1[_i] += value;   \
406           else                    ap1[_i] = value; \
407           goto a_noinsert; \
408         } \
409       }  \
410       if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
411       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
412       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
413       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
414       N = nrow1++ - 1; a->nz++; high1++; \
415       /* shift up all the later entries in this row */ \
416       for (ii=N; ii>=_i; ii--) { \
417         rp1[ii+1] = rp1[ii]; \
418         ap1[ii+1] = ap1[ii]; \
419       } \
420       rp1[_i] = col;  \
421       ap1[_i] = value;  \
422       A->nonzerostate++;\
423       a_noinsert: ; \
424       ailen[row] = nrow1; \
425 }
426 
427 
428 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv) \
429   { \
430     if (col <= lastcol2) low2 = 0;                        \
431     else high2 = nrow2;                                   \
432     lastcol2 = col;                                       \
433     while (high2-low2 > 5) {                              \
434       t = (low2+high2)/2;                                 \
435       if (rp2[t] > col) high2 = t;                        \
436       else             low2  = t;                         \
437     }                                                     \
438     for (_i=low2; _i<high2; _i++) {                       \
439       if (rp2[_i] > col) break;                           \
440       if (rp2[_i] == col) {                               \
441         if (addv == ADD_VALUES) ap2[_i] += value;         \
442         else                    ap2[_i] = value;          \
443         goto b_noinsert;                                  \
444       }                                                   \
445     }                                                     \
446     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
447     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
448     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
449     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
450     N = nrow2++ - 1; b->nz++; high2++;                    \
451     /* shift up all the later entries in this row */      \
452     for (ii=N; ii>=_i; ii--) {                            \
453       rp2[ii+1] = rp2[ii];                                \
454       ap2[ii+1] = ap2[ii];                                \
455     }                                                     \
456     rp2[_i] = col;                                        \
457     ap2[_i] = value;                                      \
458     B->nonzerostate++;                                    \
459     b_noinsert: ;                                         \
460     bilen[row] = nrow2;                                   \
461   }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesRow_MPIAIJ"
465 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
466 {
467   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
468   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
469   PetscErrorCode ierr;
470   PetscInt       l,*garray = mat->garray,diag;
471 
472   PetscFunctionBegin;
473   /* code only works for square matrices A */
474 
475   /* find size of row to the left of the diagonal part */
476   ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr);
477   row  = row - diag;
478   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
479     if (garray[b->j[b->i[row]+l]] > diag) break;
480   }
481   ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr);
482 
483   /* diagonal part */
484   ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr);
485 
486   /* right of diagonal part */
487   ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr);
488   PetscFunctionReturn(0);
489 }
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatSetValues_MPIAIJ"
493 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
494 {
495   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
496   PetscScalar    value;
497   PetscErrorCode ierr;
498   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
499   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
500   PetscBool      roworiented = aij->roworiented;
501 
502   /* Some Variables required in the macro */
503   Mat        A                 = aij->A;
504   Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
505   PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
506   MatScalar  *aa               = a->a;
507   PetscBool  ignorezeroentries = a->ignorezeroentries;
508   Mat        B                 = aij->B;
509   Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
510   PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
511   MatScalar  *ba               = b->a;
512 
513   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
514   PetscInt  nonew;
515   MatScalar *ap1,*ap2;
516 
517   PetscFunctionBegin;
518   for (i=0; i<m; i++) {
519     if (im[i] < 0) continue;
520 #if defined(PETSC_USE_DEBUG)
521     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
522 #endif
523     if (im[i] >= rstart && im[i] < rend) {
524       row      = im[i] - rstart;
525       lastcol1 = -1;
526       rp1      = aj + ai[row];
527       ap1      = aa + ai[row];
528       rmax1    = aimax[row];
529       nrow1    = ailen[row];
530       low1     = 0;
531       high1    = nrow1;
532       lastcol2 = -1;
533       rp2      = bj + bi[row];
534       ap2      = ba + bi[row];
535       rmax2    = bimax[row];
536       nrow2    = bilen[row];
537       low2     = 0;
538       high2    = nrow2;
539 
540       for (j=0; j<n; j++) {
541         if (roworiented) value = v[i*n+j];
542         else             value = v[i+j*m];
543         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
544         if (in[j] >= cstart && in[j] < cend) {
545           col   = in[j] - cstart;
546           nonew = a->nonew;
547           MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
548         } else if (in[j] < 0) continue;
549 #if defined(PETSC_USE_DEBUG)
550         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
551 #endif
552         else {
553           if (mat->was_assembled) {
554             if (!aij->colmap) {
555               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
556             }
557 #if defined(PETSC_USE_CTABLE)
558             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
559             col--;
560 #else
561             col = aij->colmap[in[j]] - 1;
562 #endif
563             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
564               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
565               col  =  in[j];
566               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
567               B     = aij->B;
568               b     = (Mat_SeqAIJ*)B->data;
569               bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
570               rp2   = bj + bi[row];
571               ap2   = ba + bi[row];
572               rmax2 = bimax[row];
573               nrow2 = bilen[row];
574               low2  = 0;
575               high2 = nrow2;
576               bm    = aij->B->rmap->n;
577               ba    = b->a;
578             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
579           } else col = in[j];
580           nonew = b->nonew;
581           MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
582         }
583       }
584     } else {
585       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
586       if (!aij->donotstash) {
587         mat->assembled = PETSC_FALSE;
588         if (roworiented) {
589           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
590         } else {
591           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
592         }
593       }
594     }
595   }
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatGetValues_MPIAIJ"
601 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
602 {
603   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
604   PetscErrorCode ierr;
605   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
606   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
607 
608   PetscFunctionBegin;
609   for (i=0; i<m; i++) {
610     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
611     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
612     if (idxm[i] >= rstart && idxm[i] < rend) {
613       row = idxm[i] - rstart;
614       for (j=0; j<n; j++) {
615         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
616         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
617         if (idxn[j] >= cstart && idxn[j] < cend) {
618           col  = idxn[j] - cstart;
619           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
620         } else {
621           if (!aij->colmap) {
622             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
623           }
624 #if defined(PETSC_USE_CTABLE)
625           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
626           col--;
627 #else
628           col = aij->colmap[idxn[j]] - 1;
629 #endif
630           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
631           else {
632             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
633           }
634         }
635       }
636     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
637   }
638   PetscFunctionReturn(0);
639 }
640 
641 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec);
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ"
645 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
646 {
647   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
648   PetscErrorCode ierr;
649   PetscInt       nstash,reallocs;
650   InsertMode     addv;
651 
652   PetscFunctionBegin;
653   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
654 
655   /* make sure all processors are either in INSERTMODE or ADDMODE */
656   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
657   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
658   mat->insertmode = addv; /* in case this processor had no cache */
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ"
668 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
669 {
670   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
671   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)aij->A->data;
672   PetscErrorCode ierr;
673   PetscMPIInt    n;
674   PetscInt       i,j,rstart,ncols,flg;
675   PetscInt       *row,*col;
676   PetscBool      other_disassembled;
677   PetscScalar    *val;
678   InsertMode     addv = mat->insertmode;
679 
680   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
681 
682   PetscFunctionBegin;
683   if (!aij->donotstash && !mat->nooffprocentries) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       if (!flg) break;
687 
688       for (i=0; i<n; ) {
689         /* Now identify the consecutive vals belonging to the same row */
690         for (j=i,rstart=row[j]; j<n; j++) {
691           if (row[j] != rstart) break;
692         }
693         if (j < n) ncols = j-i;
694         else       ncols = n-i;
695         /* Now assemble all these values with a single function call */
696         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
697 
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702   }
703   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
704   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
705 
706   /* determine if any processor has disassembled, if so we must
707      also disassemble ourselfs, in order that we may reassemble. */
708   /*
709      if nonzero structure of submatrix B cannot change then we know that
710      no processor disassembled thus we can skip this stuff
711   */
712   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
713     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
714     if (mat->was_assembled && !other_disassembled) {
715       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
716     }
717   }
718   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
719     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
720   }
721   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
722   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
723   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
724 
725   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
726 
727   aij->rowvalues = 0;
728 
729   /* used by MatAXPY() */
730   a->xtoy = 0; ((Mat_SeqAIJ*)aij->B->data)->xtoy = 0;   /* b->xtoy = 0 */
731   a->XtoY = 0; ((Mat_SeqAIJ*)aij->B->data)->XtoY = 0;   /* b->XtoY = 0 */
732 
733   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
734   if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ;
735 
736   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
737   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
738     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
739     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
740   }
741   PetscFunctionReturn(0);
742 }
743 
744 #undef __FUNCT__
745 #define __FUNCT__ "MatZeroEntries_MPIAIJ"
746 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
747 {
748   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
749   PetscErrorCode ierr;
750 
751   PetscFunctionBegin;
752   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
753   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatZeroRows_MPIAIJ"
759 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
760 {
761   Mat_MPIAIJ    *mat    = (Mat_MPIAIJ *) A->data;
762   PetscInt      *owners = A->rmap->range;
763   PetscInt       n      = A->rmap->n;
764   PetscMPIInt    size   = mat->size;
765   PetscSF        sf;
766   PetscInt      *lrows;
767   PetscSFNode   *rrows;
768   PetscInt       lastidx = -1, r, p = 0, len = 0;
769   PetscErrorCode ierr;
770 
771   PetscFunctionBegin;
772   /* Create SF where leaves are input rows and roots are owned rows */
773   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
774   for (r = 0; r < n; ++r) lrows[r] = -1;
775   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
776   for (r = 0; r < N; ++r) {
777     const PetscInt idx   = rows[r];
778     PetscBool      found = PETSC_FALSE;
779     /* Trick for efficient searching for sorted rows */
780     if (lastidx > idx) p = 0;
781     lastidx = idx;
782     for (; p < size; ++p) {
783       if (idx >= owners[p] && idx < owners[p+1]) {
784         rrows[r].rank  = p;
785         rrows[r].index = rows[r] - owners[p];
786         found = PETSC_TRUE;
787         break;
788       }
789     }
790     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
791   }
792   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
793   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
794   /* Collect flags for rows to be zeroed */
795   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
796   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
797   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
798   /* Compress and put in row numbers */
799   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
800   /* fix right hand side if needed */
801   if (x && b) {
802     const PetscScalar *xx;
803     PetscScalar       *bb;
804 
805     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
806     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
807     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
808     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
809     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
810   }
811   /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/
812   ierr = MatZeroRows(mat->B, len, lrows, 0.0, 0,0);CHKERRQ(ierr);
813   if ((diag != 0.0) && (mat->A->rmap->N == mat->A->cmap->N)) {
814     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
815   } else if (diag != 0.0) {
816     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
817     if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
818     for (r = 0; r < len; ++r) {
819       const PetscInt row = lrows[r] + A->rmap->rstart;
820       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
821     }
822     ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823     ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
824   } else {
825     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
826   }
827   ierr = PetscFree(lrows);CHKERRQ(ierr);
828 
829   /* only change matrix nonzero state if pattern was allowed to be changed */
830   if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) {
831     PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate;
832     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
833   }
834   PetscFunctionReturn(0);
835 }
836 
837 #undef __FUNCT__
838 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ"
839 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
840 {
841   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
842   PetscErrorCode    ierr;
843   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
844   PetscInt          i,j,r,m,p = 0,len = 0;
845   PetscInt          *lrows,*owners = A->rmap->range;
846   PetscSFNode       *rrows;
847   PetscSF           sf;
848   const PetscScalar *xx;
849   PetscScalar       *bb,*mask;
850   Vec               xmask,lmask;
851   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
852   const PetscInt    *aj, *ii,*ridx;
853   PetscScalar       *aa;
854 #if defined(PETSC_DEBUG)
855   PetscBool found = PETSC_FALSE;
856 #endif
857 
858   PetscFunctionBegin;
859   /* Create SF where leaves are input rows and roots are owned rows */
860   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
861   for (r = 0; r < n; ++r) lrows[r] = -1;
862   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
863   for (r = 0; r < N; ++r) {
864     const PetscInt idx   = rows[r];
865     PetscBool      found = PETSC_FALSE;
866     /* Trick for efficient searching for sorted rows */
867     if (lastidx > idx) p = 0;
868     lastidx = idx;
869     for (; p < size; ++p) {
870       if (idx >= owners[p] && idx < owners[p+1]) {
871         rrows[r].rank  = p;
872         rrows[r].index = rows[r] - owners[p];
873         found = PETSC_TRUE;
874         break;
875       }
876     }
877     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
878   }
879   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
880   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
881   /* Collect flags for rows to be zeroed */
882   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
883   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
884   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
885   /* Compress and put in row numbers */
886   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
887   /* zero diagonal part of matrix */
888   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
889   /* handle off diagonal part of matrix */
890   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
891   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
892   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
893   for (i=0; i<len; i++) bb[lrows[i]] = 1;
894   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
895   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
896   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
897   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
898   if (x) {
899     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
900     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
901     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
902     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
903   }
904   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
905   /* remove zeroed rows of off diagonal matrix */
906   ii = aij->i;
907   for (i=0; i<len; i++) {
908     ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr);
909   }
910   /* loop over all elements of off process part of matrix zeroing removed columns*/
911   if (aij->compressedrow.use) {
912     m    = aij->compressedrow.nrows;
913     ii   = aij->compressedrow.i;
914     ridx = aij->compressedrow.rindex;
915     for (i=0; i<m; i++) {
916       n  = ii[i+1] - ii[i];
917       aj = aij->j + ii[i];
918       aa = aij->a + ii[i];
919 
920       for (j=0; j<n; j++) {
921         if (PetscAbsScalar(mask[*aj])) {
922           if (b) bb[*ridx] -= *aa*xx[*aj];
923           *aa = 0.0;
924         }
925         aa++;
926         aj++;
927       }
928       ridx++;
929     }
930   } else { /* do not use compressed row format */
931     m = l->B->rmap->n;
932     for (i=0; i<m; i++) {
933       n  = ii[i+1] - ii[i];
934       aj = aij->j + ii[i];
935       aa = aij->a + ii[i];
936       for (j=0; j<n; j++) {
937         if (PetscAbsScalar(mask[*aj])) {
938           if (b) bb[i] -= *aa*xx[*aj];
939           *aa = 0.0;
940         }
941         aa++;
942         aj++;
943       }
944     }
945   }
946   if (x) {
947     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
948     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
949   }
950   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
951   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
952   ierr = PetscFree(lrows);CHKERRQ(ierr);
953 
954   /* only change matrix nonzero state if pattern was allowed to be changed */
955   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
956     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
957     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatMult_MPIAIJ"
964 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
965 {
966   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
967   PetscErrorCode ierr;
968   PetscInt       nt;
969 
970   PetscFunctionBegin;
971   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
972   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
973   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
974   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
975   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
976   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ"
982 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
983 {
984   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatMultAdd_MPIAIJ"
994 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
995 {
996   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
997   PetscErrorCode ierr;
998 
999   PetscFunctionBegin;
1000   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1001   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1002   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1003   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatMultTranspose_MPIAIJ"
1009 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
1010 {
1011   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1012   PetscErrorCode ierr;
1013   PetscBool      merged;
1014 
1015   PetscFunctionBegin;
1016   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1017   /* do nondiagonal part */
1018   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1019   if (!merged) {
1020     /* send it on its way */
1021     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1022     /* do local part */
1023     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1024     /* receive remote parts: note this assumes the values are not actually */
1025     /* added in yy until the next line, */
1026     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1027   } else {
1028     /* do local part */
1029     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1030     /* send it on its way */
1031     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1032     /* values actually were received in the Begin() but we need to call this nop */
1033     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1034   }
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatIsTranspose_MPIAIJ"
1040 PetscErrorCode  MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1041 {
1042   MPI_Comm       comm;
1043   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1044   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1045   IS             Me,Notme;
1046   PetscErrorCode ierr;
1047   PetscInt       M,N,first,last,*notme,i;
1048   PetscMPIInt    size;
1049 
1050   PetscFunctionBegin;
1051   /* Easy test: symmetric diagonal block */
1052   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1053   ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr);
1054   if (!*f) PetscFunctionReturn(0);
1055   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1056   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1057   if (size == 1) PetscFunctionReturn(0);
1058 
1059   /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */
1060   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1061   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1062   ierr = PetscMalloc1((N-last+first),&notme);CHKERRQ(ierr);
1063   for (i=0; i<first; i++) notme[i] = i;
1064   for (i=last; i<M; i++) notme[i-last+first] = i;
1065   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1066   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1067   ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1068   Aoff = Aoffs[0];
1069   ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1070   Boff = Boffs[0];
1071   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1072   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1073   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1074   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1075   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1076   ierr = PetscFree(notme);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ"
1082 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1083 {
1084   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1085   PetscErrorCode ierr;
1086 
1087   PetscFunctionBegin;
1088   /* do nondiagonal part */
1089   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1090   /* send it on its way */
1091   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1092   /* do local part */
1093   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1094   /* receive remote parts */
1095   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /*
1100   This only works correctly for square matrices where the subblock A->A is the
1101    diagonal block
1102 */
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatGetDiagonal_MPIAIJ"
1105 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1106 {
1107   PetscErrorCode ierr;
1108   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1109 
1110   PetscFunctionBegin;
1111   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1112   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1113   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatScale_MPIAIJ"
1119 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1120 {
1121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1126   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatDestroy_Redundant"
1132 PetscErrorCode MatDestroy_Redundant(Mat_Redundant **redundant)
1133 {
1134   PetscErrorCode ierr;
1135   Mat_Redundant  *redund = *redundant;
1136   PetscInt       i;
1137 
1138   PetscFunctionBegin;
1139   *redundant = NULL;
1140   if (redund){
1141     if (redund->matseq) { /* via MatGetSubMatrices()  */
1142       ierr = ISDestroy(&redund->isrow);CHKERRQ(ierr);
1143       ierr = ISDestroy(&redund->iscol);CHKERRQ(ierr);
1144       ierr = MatDestroy(&redund->matseq[0]);CHKERRQ(ierr);
1145       ierr = PetscFree(redund->matseq);CHKERRQ(ierr);
1146     } else {
1147       ierr = PetscFree2(redund->send_rank,redund->recv_rank);CHKERRQ(ierr);
1148       ierr = PetscFree(redund->sbuf_j);CHKERRQ(ierr);
1149       ierr = PetscFree(redund->sbuf_a);CHKERRQ(ierr);
1150       for (i=0; i<redund->nrecvs; i++) {
1151         ierr = PetscFree(redund->rbuf_j[i]);CHKERRQ(ierr);
1152         ierr = PetscFree(redund->rbuf_a[i]);CHKERRQ(ierr);
1153       }
1154       ierr = PetscFree4(redund->sbuf_nz,redund->rbuf_nz,redund->rbuf_j,redund->rbuf_a);CHKERRQ(ierr);
1155     }
1156 
1157     if (redund->psubcomm) {
1158       ierr = PetscSubcommDestroy(&redund->psubcomm);CHKERRQ(ierr);
1159     }
1160     ierr = PetscFree(redund);CHKERRQ(ierr);
1161   }
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatDestroy_MPIAIJ"
1167 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1168 {
1169   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1170   PetscErrorCode ierr;
1171 
1172   PetscFunctionBegin;
1173 #if defined(PETSC_USE_LOG)
1174   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1175 #endif
1176   ierr = MatDestroy_Redundant(&aij->redundant);CHKERRQ(ierr);
1177   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1178   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1179   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1180   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1181 #if defined(PETSC_USE_CTABLE)
1182   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1183 #else
1184   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1185 #endif
1186   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1187   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1188   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1189   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1190   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1191   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1192 
1193   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1194   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1195   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1196   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1197   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1198   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1199   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1200   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1201   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatView_MPIAIJ_Binary"
1207 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1208 {
1209   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1210   Mat_SeqAIJ     *A   = (Mat_SeqAIJ*)aij->A->data;
1211   Mat_SeqAIJ     *B   = (Mat_SeqAIJ*)aij->B->data;
1212   PetscErrorCode ierr;
1213   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1214   int            fd;
1215   PetscInt       nz,header[4],*row_lengths,*range=0,rlen,i;
1216   PetscInt       nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0;
1217   PetscScalar    *column_values;
1218   PetscInt       message_count,flowcontrolcount;
1219   FILE           *file;
1220 
1221   PetscFunctionBegin;
1222   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1223   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1224   nz   = A->nz + B->nz;
1225   if (!rank) {
1226     header[0] = MAT_FILE_CLASSID;
1227     header[1] = mat->rmap->N;
1228     header[2] = mat->cmap->N;
1229 
1230     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1232     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1233     /* get largest number of rows any processor has */
1234     rlen  = mat->rmap->n;
1235     range = mat->rmap->range;
1236     for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]);
1237   } else {
1238     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1239     rlen = mat->rmap->n;
1240   }
1241 
1242   /* load up the local row counts */
1243   ierr = PetscMalloc1((rlen+1),&row_lengths);CHKERRQ(ierr);
1244   for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1245 
1246   /* store the row lengths to the file */
1247   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1248   if (!rank) {
1249     ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1250     for (i=1; i<size; i++) {
1251       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1252       rlen = range[i+1] - range[i];
1253       ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1254       ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1255     }
1256     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1257   } else {
1258     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1259     ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1260     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1261   }
1262   ierr = PetscFree(row_lengths);CHKERRQ(ierr);
1263 
1264   /* load up the local column indices */
1265   nzmax = nz; /* th processor needs space a largest processor needs */
1266   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1267   ierr  = PetscMalloc1((nzmax+1),&column_indices);CHKERRQ(ierr);
1268   cnt   = 0;
1269   for (i=0; i<mat->rmap->n; i++) {
1270     for (j=B->i[i]; j<B->i[i+1]; j++) {
1271       if ((col = garray[B->j[j]]) > cstart) break;
1272       column_indices[cnt++] = col;
1273     }
1274     for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart;
1275     for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]];
1276   }
1277   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1278 
1279   /* store the column indices to the file */
1280   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1281   if (!rank) {
1282     MPI_Status status;
1283     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1284     for (i=1; i<size; i++) {
1285       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1286       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1287       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1288       ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1289       ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1290     }
1291     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1292   } else {
1293     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1294     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1295     ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1296     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1297   }
1298   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1299 
1300   /* load up the local column values */
1301   ierr = PetscMalloc1((nzmax+1),&column_values);CHKERRQ(ierr);
1302   cnt  = 0;
1303   for (i=0; i<mat->rmap->n; i++) {
1304     for (j=B->i[i]; j<B->i[i+1]; j++) {
1305       if (garray[B->j[j]] > cstart) break;
1306       column_values[cnt++] = B->a[j];
1307     }
1308     for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k];
1309     for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j];
1310   }
1311   if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz);
1312 
1313   /* store the column values to the file */
1314   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1315   if (!rank) {
1316     MPI_Status status;
1317     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1318     for (i=1; i<size; i++) {
1319       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1320       ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1321       if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax);
1322       ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1323       ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1324     }
1325     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1326   } else {
1327     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1328     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1329     ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1330     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1331   }
1332   ierr = PetscFree(column_values);CHKERRQ(ierr);
1333 
1334   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1335   if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs));
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #include <petscdraw.h>
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket"
1342 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1343 {
1344   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1345   PetscErrorCode    ierr;
1346   PetscMPIInt       rank = aij->rank,size = aij->size;
1347   PetscBool         isdraw,iascii,isbinary;
1348   PetscViewer       sviewer;
1349   PetscViewerFormat format;
1350 
1351   PetscFunctionBegin;
1352   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1353   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1354   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1355   if (iascii) {
1356     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1357     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1358       MatInfo   info;
1359       PetscBool inodes;
1360 
1361       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1362       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1363       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1364       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
1365       if (!inodes) {
1366         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n",
1367                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1368       } else {
1369         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n",
1370                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr);
1371       }
1372       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1373       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1374       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1375       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1376       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1377       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
1378       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1379       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1380       PetscFunctionReturn(0);
1381     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1382       PetscInt inodecount,inodelimit,*inodes;
1383       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1384       if (inodes) {
1385         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1386       } else {
1387         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1388       }
1389       PetscFunctionReturn(0);
1390     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1391       PetscFunctionReturn(0);
1392     }
1393   } else if (isbinary) {
1394     if (size == 1) {
1395       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1396       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1397     } else {
1398       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1399     }
1400     PetscFunctionReturn(0);
1401   } else if (isdraw) {
1402     PetscDraw draw;
1403     PetscBool isnull;
1404     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1405     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1406   }
1407 
1408   {
1409     /* assemble the entire matrix onto first processor. */
1410     Mat        A;
1411     Mat_SeqAIJ *Aloc;
1412     PetscInt   M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct;
1413     MatScalar  *a;
1414 
1415     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1416     if (!rank) {
1417       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1418     } else {
1419       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1420     }
1421     /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */
1422     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
1423     ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr);
1424     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1425     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1426 
1427     /* copy over the A part */
1428     Aloc = (Mat_SeqAIJ*)aij->A->data;
1429     m    = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1430     row  = mat->rmap->rstart;
1431     for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart;
1432     for (i=0; i<m; i++) {
1433       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr);
1434       row++;
1435       a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i];
1436     }
1437     aj = Aloc->j;
1438     for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart;
1439 
1440     /* copy over the B part */
1441     Aloc = (Mat_SeqAIJ*)aij->B->data;
1442     m    = aij->B->rmap->n;  ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1443     row  = mat->rmap->rstart;
1444     ierr = PetscMalloc1((ai[m]+1),&cols);CHKERRQ(ierr);
1445     ct   = cols;
1446     for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]];
1447     for (i=0; i<m; i++) {
1448       ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr);
1449       row++;
1450       a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i];
1451     }
1452     ierr = PetscFree(ct);CHKERRQ(ierr);
1453     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1454     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1455     /*
1456        Everyone has to call to draw the matrix since the graphics waits are
1457        synchronized across all processors that share the PetscDraw object
1458     */
1459     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1460     if (!rank) {
1461       ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1462     }
1463     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1464     ierr = MatDestroy(&A);CHKERRQ(ierr);
1465   }
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNCT__
1470 #define __FUNCT__ "MatView_MPIAIJ"
1471 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1472 {
1473   PetscErrorCode ierr;
1474   PetscBool      iascii,isdraw,issocket,isbinary;
1475 
1476   PetscFunctionBegin;
1477   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1478   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1479   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1480   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1481   if (iascii || isdraw || isbinary || issocket) {
1482     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1483   }
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatSOR_MPIAIJ"
1489 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1490 {
1491   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1492   PetscErrorCode ierr;
1493   Vec            bb1 = 0;
1494   PetscBool      hasop;
1495 
1496   PetscFunctionBegin;
1497   if (flag == SOR_APPLY_UPPER) {
1498     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1499     PetscFunctionReturn(0);
1500   }
1501 
1502   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1503     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1504   }
1505 
1506   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1507     if (flag & SOR_ZERO_INITIAL_GUESS) {
1508       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1509       its--;
1510     }
1511 
1512     while (its--) {
1513       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1514       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1515 
1516       /* update rhs: bb1 = bb - B*x */
1517       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1518       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1519 
1520       /* local sweep */
1521       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1522     }
1523   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1524     if (flag & SOR_ZERO_INITIAL_GUESS) {
1525       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1526       its--;
1527     }
1528     while (its--) {
1529       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1530       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1531 
1532       /* update rhs: bb1 = bb - B*x */
1533       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1534       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1535 
1536       /* local sweep */
1537       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1538     }
1539   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1540     if (flag & SOR_ZERO_INITIAL_GUESS) {
1541       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1542       its--;
1543     }
1544     while (its--) {
1545       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1546       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1547 
1548       /* update rhs: bb1 = bb - B*x */
1549       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1550       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1551 
1552       /* local sweep */
1553       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1554     }
1555   } else if (flag & SOR_EISENSTAT) {
1556     Vec xx1;
1557 
1558     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1559     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1560 
1561     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1562     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1563     if (!mat->diag) {
1564       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1565       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1566     }
1567     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1568     if (hasop) {
1569       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1570     } else {
1571       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1572     }
1573     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1574 
1575     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1576 
1577     /* local sweep */
1578     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1579     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1580     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1581   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1582 
1583   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatPermute_MPIAIJ"
1589 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1590 {
1591   Mat            aA,aB,Aperm;
1592   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1593   PetscScalar    *aa,*ba;
1594   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1595   PetscSF        rowsf,sf;
1596   IS             parcolp = NULL;
1597   PetscBool      done;
1598   PetscErrorCode ierr;
1599 
1600   PetscFunctionBegin;
1601   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1602   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1603   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1604   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1605 
1606   /* Invert row permutation to find out where my rows should go */
1607   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1608   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1609   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1610   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1611   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1612   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1613 
1614   /* Invert column permutation to find out where my columns should go */
1615   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1616   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1617   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1618   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1619   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1620   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1621   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1622 
1623   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1624   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1625   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1626 
1627   /* Find out where my gcols should go */
1628   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1629   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1630   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1631   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1632   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1633   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1634   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1635   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1636 
1637   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1638   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1639   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1640   for (i=0; i<m; i++) {
1641     PetscInt row = rdest[i],rowner;
1642     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1643     for (j=ai[i]; j<ai[i+1]; j++) {
1644       PetscInt cowner,col = cdest[aj[j]];
1645       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1646       if (rowner == cowner) dnnz[i]++;
1647       else onnz[i]++;
1648     }
1649     for (j=bi[i]; j<bi[i+1]; j++) {
1650       PetscInt cowner,col = gcdest[bj[j]];
1651       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1652       if (rowner == cowner) dnnz[i]++;
1653       else onnz[i]++;
1654     }
1655   }
1656   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1657   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1658   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1659   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1660   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1661 
1662   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1663   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1664   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1665   for (i=0; i<m; i++) {
1666     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1667     PetscInt j0,rowlen;
1668     rowlen = ai[i+1] - ai[i];
1669     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1670       for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1671       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1672     }
1673     rowlen = bi[i+1] - bi[i];
1674     for (j0=j=0; j<rowlen; j0=j) {
1675       for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1676       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1677     }
1678   }
1679   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1680   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1682   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1683   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1684   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1685   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1686   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1687   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1688   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1689   *B = Aperm;
1690   PetscFunctionReturn(0);
1691 }
1692 
1693 #undef __FUNCT__
1694 #define __FUNCT__ "MatGetInfo_MPIAIJ"
1695 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1696 {
1697   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1698   Mat            A    = mat->A,B = mat->B;
1699   PetscErrorCode ierr;
1700   PetscReal      isend[5],irecv[5];
1701 
1702   PetscFunctionBegin;
1703   info->block_size = 1.0;
1704   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1705 
1706   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1707   isend[3] = info->memory;  isend[4] = info->mallocs;
1708 
1709   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1710 
1711   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1712   isend[3] += info->memory;  isend[4] += info->mallocs;
1713   if (flag == MAT_LOCAL) {
1714     info->nz_used      = isend[0];
1715     info->nz_allocated = isend[1];
1716     info->nz_unneeded  = isend[2];
1717     info->memory       = isend[3];
1718     info->mallocs      = isend[4];
1719   } else if (flag == MAT_GLOBAL_MAX) {
1720     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1721 
1722     info->nz_used      = irecv[0];
1723     info->nz_allocated = irecv[1];
1724     info->nz_unneeded  = irecv[2];
1725     info->memory       = irecv[3];
1726     info->mallocs      = irecv[4];
1727   } else if (flag == MAT_GLOBAL_SUM) {
1728     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1729 
1730     info->nz_used      = irecv[0];
1731     info->nz_allocated = irecv[1];
1732     info->nz_unneeded  = irecv[2];
1733     info->memory       = irecv[3];
1734     info->mallocs      = irecv[4];
1735   }
1736   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1737   info->fill_ratio_needed = 0;
1738   info->factor_mallocs    = 0;
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatSetOption_MPIAIJ"
1744 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1745 {
1746   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1747   PetscErrorCode ierr;
1748 
1749   PetscFunctionBegin;
1750   switch (op) {
1751   case MAT_NEW_NONZERO_LOCATIONS:
1752   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1753   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1754   case MAT_KEEP_NONZERO_PATTERN:
1755   case MAT_NEW_NONZERO_LOCATION_ERR:
1756   case MAT_USE_INODES:
1757   case MAT_IGNORE_ZERO_ENTRIES:
1758     MatCheckPreallocated(A,1);
1759     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1760     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1761     break;
1762   case MAT_ROW_ORIENTED:
1763     a->roworiented = flg;
1764 
1765     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1766     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1767     break;
1768   case MAT_NEW_DIAGONALS:
1769     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1770     break;
1771   case MAT_IGNORE_OFF_PROC_ENTRIES:
1772     a->donotstash = flg;
1773     break;
1774   case MAT_SPD:
1775     A->spd_set = PETSC_TRUE;
1776     A->spd     = flg;
1777     if (flg) {
1778       A->symmetric                  = PETSC_TRUE;
1779       A->structurally_symmetric     = PETSC_TRUE;
1780       A->symmetric_set              = PETSC_TRUE;
1781       A->structurally_symmetric_set = PETSC_TRUE;
1782     }
1783     break;
1784   case MAT_SYMMETRIC:
1785     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1786     break;
1787   case MAT_STRUCTURALLY_SYMMETRIC:
1788     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1789     break;
1790   case MAT_HERMITIAN:
1791     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1792     break;
1793   case MAT_SYMMETRY_ETERNAL:
1794     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1795     break;
1796   default:
1797     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1798   }
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatGetRow_MPIAIJ"
1804 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1805 {
1806   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1807   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1808   PetscErrorCode ierr;
1809   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1810   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1811   PetscInt       *cmap,*idx_p;
1812 
1813   PetscFunctionBegin;
1814   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1815   mat->getrowactive = PETSC_TRUE;
1816 
1817   if (!mat->rowvalues && (idx || v)) {
1818     /*
1819         allocate enough space to hold information from the longest row.
1820     */
1821     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1822     PetscInt   max = 1,tmp;
1823     for (i=0; i<matin->rmap->n; i++) {
1824       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1825       if (max < tmp) max = tmp;
1826     }
1827     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1828   }
1829 
1830   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1831   lrow = row - rstart;
1832 
1833   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1834   if (!v)   {pvA = 0; pvB = 0;}
1835   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1836   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1837   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1838   nztot = nzA + nzB;
1839 
1840   cmap = mat->garray;
1841   if (v  || idx) {
1842     if (nztot) {
1843       /* Sort by increasing column numbers, assuming A and B already sorted */
1844       PetscInt imark = -1;
1845       if (v) {
1846         *v = v_p = mat->rowvalues;
1847         for (i=0; i<nzB; i++) {
1848           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1849           else break;
1850         }
1851         imark = i;
1852         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1853         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1854       }
1855       if (idx) {
1856         *idx = idx_p = mat->rowindices;
1857         if (imark > -1) {
1858           for (i=0; i<imark; i++) {
1859             idx_p[i] = cmap[cworkB[i]];
1860           }
1861         } else {
1862           for (i=0; i<nzB; i++) {
1863             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1864             else break;
1865           }
1866           imark = i;
1867         }
1868         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1869         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1870       }
1871     } else {
1872       if (idx) *idx = 0;
1873       if (v)   *v   = 0;
1874     }
1875   }
1876   *nz  = nztot;
1877   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1878   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatRestoreRow_MPIAIJ"
1884 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1885 {
1886   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1887 
1888   PetscFunctionBegin;
1889   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1890   aij->getrowactive = PETSC_FALSE;
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatNorm_MPIAIJ"
1896 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1897 {
1898   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1899   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1900   PetscErrorCode ierr;
1901   PetscInt       i,j,cstart = mat->cmap->rstart;
1902   PetscReal      sum = 0.0;
1903   MatScalar      *v;
1904 
1905   PetscFunctionBegin;
1906   if (aij->size == 1) {
1907     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1908   } else {
1909     if (type == NORM_FROBENIUS) {
1910       v = amat->a;
1911       for (i=0; i<amat->nz; i++) {
1912         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1913       }
1914       v = bmat->a;
1915       for (i=0; i<bmat->nz; i++) {
1916         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1917       }
1918       ierr  = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1919       *norm = PetscSqrtReal(*norm);
1920     } else if (type == NORM_1) { /* max column norm */
1921       PetscReal *tmp,*tmp2;
1922       PetscInt  *jj,*garray = aij->garray;
1923       ierr  = PetscCalloc1((mat->cmap->N+1),&tmp);CHKERRQ(ierr);
1924       ierr  = PetscMalloc1((mat->cmap->N+1),&tmp2);CHKERRQ(ierr);
1925       *norm = 0.0;
1926       v     = amat->a; jj = amat->j;
1927       for (j=0; j<amat->nz; j++) {
1928         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1929       }
1930       v = bmat->a; jj = bmat->j;
1931       for (j=0; j<bmat->nz; j++) {
1932         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1933       }
1934       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1935       for (j=0; j<mat->cmap->N; j++) {
1936         if (tmp2[j] > *norm) *norm = tmp2[j];
1937       }
1938       ierr = PetscFree(tmp);CHKERRQ(ierr);
1939       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1940     } else if (type == NORM_INFINITY) { /* max row norm */
1941       PetscReal ntemp = 0.0;
1942       for (j=0; j<aij->A->rmap->n; j++) {
1943         v   = amat->a + amat->i[j];
1944         sum = 0.0;
1945         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1946           sum += PetscAbsScalar(*v); v++;
1947         }
1948         v = bmat->a + bmat->i[j];
1949         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1950           sum += PetscAbsScalar(*v); v++;
1951         }
1952         if (sum > ntemp) ntemp = sum;
1953       }
1954       ierr = MPI_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1955     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1956   }
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatTranspose_MPIAIJ"
1962 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1963 {
1964   Mat_MPIAIJ     *a   = (Mat_MPIAIJ*)A->data;
1965   Mat_SeqAIJ     *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data;
1966   PetscErrorCode ierr;
1967   PetscInt       M      = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i;
1968   PetscInt       cstart = A->cmap->rstart,ncol;
1969   Mat            B;
1970   MatScalar      *array;
1971 
1972   PetscFunctionBegin;
1973   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1974 
1975   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1976   ai = Aloc->i; aj = Aloc->j;
1977   bi = Bloc->i; bj = Bloc->j;
1978   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1979     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1980     PetscSFNode          *oloc;
1981     PETSC_UNUSED PetscSF sf;
1982 
1983     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1984     /* compute d_nnz for preallocation */
1985     ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1986     for (i=0; i<ai[ma]; i++) {
1987       d_nnz[aj[i]]++;
1988       aj[i] += cstart; /* global col index to be used by MatSetValues() */
1989     }
1990     /* compute local off-diagonal contributions */
1991     ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr);
1992     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1993     /* map those to global */
1994     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1995     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1996     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1997     ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr);
1998     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1999     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
2000     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
2001 
2002     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2003     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
2004     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
2005     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
2006     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2007     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
2008   } else {
2009     B    = *matout;
2010     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2011     for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */
2012   }
2013 
2014   /* copy over the A part */
2015   array = Aloc->a;
2016   row   = A->rmap->rstart;
2017   for (i=0; i<ma; i++) {
2018     ncol = ai[i+1]-ai[i];
2019     ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2020     row++;
2021     array += ncol; aj += ncol;
2022   }
2023   aj = Aloc->j;
2024   for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */
2025 
2026   /* copy over the B part */
2027   ierr  = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr);
2028   array = Bloc->a;
2029   row   = A->rmap->rstart;
2030   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
2031   cols_tmp = cols;
2032   for (i=0; i<mb; i++) {
2033     ncol = bi[i+1]-bi[i];
2034     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr);
2035     row++;
2036     array += ncol; cols_tmp += ncol;
2037   }
2038   ierr = PetscFree(cols);CHKERRQ(ierr);
2039 
2040   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2041   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2042   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
2043     *matout = B;
2044   } else {
2045     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
2046   }
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 #undef __FUNCT__
2051 #define __FUNCT__ "MatDiagonalScale_MPIAIJ"
2052 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
2053 {
2054   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2055   Mat            a    = aij->A,b = aij->B;
2056   PetscErrorCode ierr;
2057   PetscInt       s1,s2,s3;
2058 
2059   PetscFunctionBegin;
2060   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
2061   if (rr) {
2062     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
2063     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
2064     /* Overlap communication with computation. */
2065     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2066   }
2067   if (ll) {
2068     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
2069     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
2070     ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr);
2071   }
2072   /* scale  the diagonal block */
2073   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
2074 
2075   if (rr) {
2076     /* Do a scatter end and then right scale the off-diagonal block */
2077     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2078     ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr);
2079   }
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 #undef __FUNCT__
2084 #define __FUNCT__ "MatSetUnfactored_MPIAIJ"
2085 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
2086 {
2087   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2088   PetscErrorCode ierr;
2089 
2090   PetscFunctionBegin;
2091   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 #undef __FUNCT__
2096 #define __FUNCT__ "MatEqual_MPIAIJ"
2097 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2098 {
2099   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2100   Mat            a,b,c,d;
2101   PetscBool      flg;
2102   PetscErrorCode ierr;
2103 
2104   PetscFunctionBegin;
2105   a = matA->A; b = matA->B;
2106   c = matB->A; d = matB->B;
2107 
2108   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2109   if (flg) {
2110     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2111   }
2112   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatCopy_MPIAIJ"
2118 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2119 {
2120   PetscErrorCode ierr;
2121   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2122   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2123 
2124   PetscFunctionBegin;
2125   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2126   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2127     /* because of the column compression in the off-processor part of the matrix a->B,
2128        the number of columns in a->B and b->B may be different, hence we cannot call
2129        the MatCopy() directly on the two parts. If need be, we can provide a more
2130        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2131        then copying the submatrices */
2132     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2133   } else {
2134     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2135     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2136   }
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatSetUp_MPIAIJ"
2142 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2143 {
2144   PetscErrorCode ierr;
2145 
2146   PetscFunctionBegin;
2147   ierr =  MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 /*
2152    Computes the number of nonzeros per row needed for preallocation when X and Y
2153    have different nonzero structure.
2154 */
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private"
2157 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2158 {
2159   PetscInt       i,j,k,nzx,nzy;
2160 
2161   PetscFunctionBegin;
2162   /* Set the number of nonzeros in the new matrix */
2163   for (i=0; i<m; i++) {
2164     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2165     nzx = xi[i+1] - xi[i];
2166     nzy = yi[i+1] - yi[i];
2167     nnz[i] = 0;
2168     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2169       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2170       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2171       nnz[i]++;
2172     }
2173     for (; k<nzy; k++) nnz[i]++;
2174   }
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ"
2181 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2182 {
2183   PetscErrorCode ierr;
2184   PetscInt       m = Y->rmap->N;
2185   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2186   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2187 
2188   PetscFunctionBegin;
2189   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatAXPY_MPIAIJ"
2195 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2196 {
2197   PetscErrorCode ierr;
2198   PetscInt       i;
2199   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2200   PetscBLASInt   bnz,one=1;
2201   Mat_SeqAIJ     *x,*y;
2202 
2203   PetscFunctionBegin;
2204   if (str == SAME_NONZERO_PATTERN) {
2205     PetscScalar alpha = a;
2206     x    = (Mat_SeqAIJ*)xx->A->data;
2207     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2208     y    = (Mat_SeqAIJ*)yy->A->data;
2209     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2210     x    = (Mat_SeqAIJ*)xx->B->data;
2211     y    = (Mat_SeqAIJ*)yy->B->data;
2212     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2213     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2214     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2215   } else if (str == SUBSET_NONZERO_PATTERN) {
2216     ierr = MatAXPY_SeqAIJ(yy->A,a,xx->A,str);CHKERRQ(ierr);
2217 
2218     x = (Mat_SeqAIJ*)xx->B->data;
2219     y = (Mat_SeqAIJ*)yy->B->data;
2220     if (y->xtoy && y->XtoY != xx->B) {
2221       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2222       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2223     }
2224     if (!y->xtoy) { /* get xtoy */
2225       ierr    = MatAXPYGetxtoy_Private(xx->B->rmap->n,x->i,x->j,xx->garray,y->i,y->j,yy->garray,&y->xtoy);CHKERRQ(ierr);
2226       y->XtoY = xx->B;
2227       ierr    = PetscObjectReference((PetscObject)xx->B);CHKERRQ(ierr);
2228     }
2229     for (i=0; i<x->nz; i++) y->a[y->xtoy[i]] += a*(x->a[i]);
2230     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2231   } else {
2232     Mat      B;
2233     PetscInt *nnz_d,*nnz_o;
2234     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2235     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2236     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2237     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2238     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2239     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2240     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2241     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2242     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2243     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2244     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2245     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2246     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2247     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2248   }
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatConjugate_MPIAIJ"
2256 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2257 {
2258 #if defined(PETSC_USE_COMPLEX)
2259   PetscErrorCode ierr;
2260   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2261 
2262   PetscFunctionBegin;
2263   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2264   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2265 #else
2266   PetscFunctionBegin;
2267 #endif
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatRealPart_MPIAIJ"
2273 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2274 {
2275   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2276   PetscErrorCode ierr;
2277 
2278   PetscFunctionBegin;
2279   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2280   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatImaginaryPart_MPIAIJ"
2286 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2287 {
2288   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2289   PetscErrorCode ierr;
2290 
2291   PetscFunctionBegin;
2292   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2293   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 #if defined(PETSC_HAVE_PBGL)
2298 
2299 #include <boost/parallel/mpi/bsp_process_group.hpp>
2300 #include <boost/graph/distributed/ilu_default_graph.hpp>
2301 #include <boost/graph/distributed/ilu_0_block.hpp>
2302 #include <boost/graph/distributed/ilu_preconditioner.hpp>
2303 #include <boost/graph/distributed/petsc/interface.hpp>
2304 #include <boost/multi_array.hpp>
2305 #include <boost/parallel/distributed_property_map->hpp>
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatILUFactorSymbolic_MPIAIJ"
2309 /*
2310   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2311 */
2312 PetscErrorCode MatILUFactorSymbolic_MPIAIJ(Mat fact,Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
2313 {
2314   namespace petsc = boost::distributed::petsc;
2315 
2316   namespace graph_dist = boost::graph::distributed;
2317   using boost::graph::distributed::ilu_default::process_group_type;
2318   using boost::graph::ilu_permuted;
2319 
2320   PetscBool      row_identity, col_identity;
2321   PetscContainer c;
2322   PetscInt       m, n, M, N;
2323   PetscErrorCode ierr;
2324 
2325   PetscFunctionBegin;
2326   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for parallel ilu");
2327   ierr = ISIdentity(isrow, &row_identity);CHKERRQ(ierr);
2328   ierr = ISIdentity(iscol, &col_identity);CHKERRQ(ierr);
2329   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for parallel ILU");
2330 
2331   process_group_type pg;
2332   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2333   lgraph_type  *lgraph_p   = new lgraph_type(petsc::num_global_vertices(A), pg, petsc::matrix_distribution(A, pg));
2334   lgraph_type& level_graph = *lgraph_p;
2335   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2336 
2337   petsc::read_matrix(A, graph, get(boost::edge_weight, graph));
2338   ilu_permuted(level_graph);
2339 
2340   /* put together the new matrix */
2341   ierr = MatCreate(PetscObjectComm((PetscObject)A), fact);CHKERRQ(ierr);
2342   ierr = MatGetLocalSize(A, &m, &n);CHKERRQ(ierr);
2343   ierr = MatGetSize(A, &M, &N);CHKERRQ(ierr);
2344   ierr = MatSetSizes(fact, m, n, M, N);CHKERRQ(ierr);
2345   ierr = MatSetBlockSizesFromMats(fact,A,A);CHKERRQ(ierr);
2346   ierr = MatSetType(fact, ((PetscObject)A)->type_name);CHKERRQ(ierr);
2347   ierr = MatAssemblyBegin(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2348   ierr = MatAssemblyEnd(fact, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2349 
2350   ierr = PetscContainerCreate(PetscObjectComm((PetscObject)A), &c);
2351   ierr = PetscContainerSetPointer(c, lgraph_p);
2352   ierr = PetscObjectCompose((PetscObject) (fact), "graph", (PetscObject) c);
2353   ierr = PetscContainerDestroy(&c);
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 #undef __FUNCT__
2358 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ"
2359 PetscErrorCode MatLUFactorNumeric_MPIAIJ(Mat B,Mat A, const MatFactorInfo *info)
2360 {
2361   PetscFunctionBegin;
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatSolve_MPIAIJ"
2367 /*
2368   This uses the parallel ILU factorization of Peter Gottschling <pgottsch@osl.iu.edu>
2369 */
2370 PetscErrorCode MatSolve_MPIAIJ(Mat A, Vec b, Vec x)
2371 {
2372   namespace graph_dist = boost::graph::distributed;
2373 
2374   typedef graph_dist::ilu_default::ilu_level_graph_type lgraph_type;
2375   lgraph_type    *lgraph_p;
2376   PetscContainer c;
2377   PetscErrorCode ierr;
2378 
2379   PetscFunctionBegin;
2380   ierr = PetscObjectQuery((PetscObject) A, "graph", (PetscObject*) &c);CHKERRQ(ierr);
2381   ierr = PetscContainerGetPointer(c, (void**) &lgraph_p);CHKERRQ(ierr);
2382   ierr = VecCopy(b, x);CHKERRQ(ierr);
2383 
2384   PetscScalar *array_x;
2385   ierr = VecGetArray(x, &array_x);CHKERRQ(ierr);
2386   PetscInt sx;
2387   ierr = VecGetSize(x, &sx);CHKERRQ(ierr);
2388 
2389   PetscScalar *array_b;
2390   ierr = VecGetArray(b, &array_b);CHKERRQ(ierr);
2391   PetscInt sb;
2392   ierr = VecGetSize(b, &sb);CHKERRQ(ierr);
2393 
2394   lgraph_type& level_graph = *lgraph_p;
2395   graph_dist::ilu_default::graph_type&            graph(level_graph.graph);
2396 
2397   typedef boost::multi_array_ref<PetscScalar, 1> array_ref_type;
2398   array_ref_type                                 ref_b(array_b, boost::extents[num_vertices(graph)]);
2399   array_ref_type                                 ref_x(array_x, boost::extents[num_vertices(graph)]);
2400 
2401   typedef boost::iterator_property_map<array_ref_type::iterator,
2402                                        boost::property_map<graph_dist::ilu_default::graph_type, boost::vertex_index_t>::type>  gvector_type;
2403   gvector_type                                   vector_b(ref_b.begin(), get(boost::vertex_index, graph));
2404   gvector_type                                   vector_x(ref_x.begin(), get(boost::vertex_index, graph));
2405 
2406   ilu_set_solve(*lgraph_p, vector_b, vector_x);
2407   PetscFunctionReturn(0);
2408 }
2409 #endif
2410 
2411 
2412 #undef __FUNCT__
2413 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ_interlaced"
2414 PetscErrorCode MatGetRedundantMatrix_MPIAIJ_interlaced(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2415 {
2416   PetscMPIInt    rank,size;
2417   MPI_Comm       comm;
2418   PetscErrorCode ierr;
2419   PetscInt       nsends=0,nrecvs=0,i,rownz_max=0,M=mat->rmap->N,N=mat->cmap->N;
2420   PetscMPIInt    *send_rank= NULL,*recv_rank=NULL,subrank,subsize;
2421   PetscInt       *rowrange = mat->rmap->range;
2422   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2423   Mat            A = aij->A,B=aij->B,C=*matredundant;
2424   Mat_SeqAIJ     *a = (Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ*)B->data;
2425   PetscScalar    *sbuf_a;
2426   PetscInt       nzlocal=a->nz+b->nz;
2427   PetscInt       j,cstart=mat->cmap->rstart,cend=mat->cmap->rend,row,nzA,nzB,ncols,*cworkA,*cworkB;
2428   PetscInt       rstart=mat->rmap->rstart,rend=mat->rmap->rend,*bmap=aij->garray;
2429   PetscInt       *cols,ctmp,lwrite,*rptr,l,*sbuf_j;
2430   MatScalar      *aworkA,*aworkB;
2431   PetscScalar    *vals;
2432   PetscMPIInt    tag1,tag2,tag3,imdex;
2433   MPI_Request    *s_waits1=NULL,*s_waits2=NULL,*s_waits3=NULL;
2434   MPI_Request    *r_waits1=NULL,*r_waits2=NULL,*r_waits3=NULL;
2435   MPI_Status     recv_status,*send_status;
2436   PetscInt       *sbuf_nz=NULL,*rbuf_nz=NULL,count;
2437   PetscInt       **rbuf_j=NULL;
2438   PetscScalar    **rbuf_a=NULL;
2439   Mat_Redundant  *redund =NULL;
2440 
2441   PetscFunctionBegin;
2442   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2443   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2444   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2445   ierr = MPI_Comm_rank(subcomm,&subrank);CHKERRQ(ierr);
2446   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2447 
2448   if (reuse == MAT_REUSE_MATRIX) {
2449     if (M != mat->rmap->N || N != mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong global size");
2450     if (subsize == 1) {
2451       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2452       redund = c->redundant;
2453     } else {
2454       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2455       redund = c->redundant;
2456     }
2457     if (nzlocal != redund->nzlocal) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Cannot reuse matrix. Wrong nzlocal");
2458 
2459     nsends    = redund->nsends;
2460     nrecvs    = redund->nrecvs;
2461     send_rank = redund->send_rank;
2462     recv_rank = redund->recv_rank;
2463     sbuf_nz   = redund->sbuf_nz;
2464     rbuf_nz   = redund->rbuf_nz;
2465     sbuf_j    = redund->sbuf_j;
2466     sbuf_a    = redund->sbuf_a;
2467     rbuf_j    = redund->rbuf_j;
2468     rbuf_a    = redund->rbuf_a;
2469   }
2470 
2471   if (reuse == MAT_INITIAL_MATRIX) {
2472     PetscInt    nleftover,np_subcomm;
2473 
2474     /* get the destination processors' id send_rank, nsends and nrecvs */
2475     ierr = PetscMalloc2(size,&send_rank,size,&recv_rank);CHKERRQ(ierr);
2476 
2477     np_subcomm = size/nsubcomm;
2478     nleftover  = size - nsubcomm*np_subcomm;
2479 
2480     /* block of codes below is specific for INTERLACED */
2481     /* ------------------------------------------------*/
2482     nsends = 0; nrecvs = 0;
2483     for (i=0; i<size; i++) {
2484       if (subrank == i/nsubcomm && i != rank) { /* my_subrank == other's subrank */
2485         send_rank[nsends++] = i;
2486         recv_rank[nrecvs++] = i;
2487       }
2488     }
2489     if (rank >= size - nleftover) { /* this proc is a leftover processor */
2490       i = size-nleftover-1;
2491       j = 0;
2492       while (j < nsubcomm - nleftover) {
2493         send_rank[nsends++] = i;
2494         i--; j++;
2495       }
2496     }
2497 
2498     if (nleftover && subsize == size/nsubcomm && subrank==subsize-1) { /* this proc recvs from leftover processors */
2499       for (i=0; i<nleftover; i++) {
2500         recv_rank[nrecvs++] = size-nleftover+i;
2501       }
2502     }
2503     /*----------------------------------------------*/
2504 
2505     /* allocate sbuf_j, sbuf_a */
2506     i    = nzlocal + rowrange[rank+1] - rowrange[rank] + 2;
2507     ierr = PetscMalloc1(i,&sbuf_j);CHKERRQ(ierr);
2508     ierr = PetscMalloc1((nzlocal+1),&sbuf_a);CHKERRQ(ierr);
2509     /*
2510     ierr = PetscSynchronizedPrintf(comm,"[%d] nsends %d, nrecvs %d\n",rank,nsends,nrecvs);CHKERRQ(ierr);
2511     ierr = PetscSynchronizedFlush(comm,PETSC_STDOUT);CHKERRQ(ierr);
2512      */
2513   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2514 
2515   /* copy mat's local entries into the buffers */
2516   if (reuse == MAT_INITIAL_MATRIX) {
2517     rownz_max = 0;
2518     rptr      = sbuf_j;
2519     cols      = sbuf_j + rend-rstart + 1;
2520     vals      = sbuf_a;
2521     rptr[0]   = 0;
2522     for (i=0; i<rend-rstart; i++) {
2523       row    = i + rstart;
2524       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2525       ncols  = nzA + nzB;
2526       cworkA = a->j + a->i[i]; cworkB = b->j + b->i[i];
2527       aworkA = a->a + a->i[i]; aworkB = b->a + b->i[i];
2528       /* load the column indices for this row into cols */
2529       lwrite = 0;
2530       for (l=0; l<nzB; l++) {
2531         if ((ctmp = bmap[cworkB[l]]) < cstart) {
2532           vals[lwrite]   = aworkB[l];
2533           cols[lwrite++] = ctmp;
2534         }
2535       }
2536       for (l=0; l<nzA; l++) {
2537         vals[lwrite]   = aworkA[l];
2538         cols[lwrite++] = cstart + cworkA[l];
2539       }
2540       for (l=0; l<nzB; l++) {
2541         if ((ctmp = bmap[cworkB[l]]) >= cend) {
2542           vals[lwrite]   = aworkB[l];
2543           cols[lwrite++] = ctmp;
2544         }
2545       }
2546       vals     += ncols;
2547       cols     += ncols;
2548       rptr[i+1] = rptr[i] + ncols;
2549       if (rownz_max < ncols) rownz_max = ncols;
2550     }
2551     if (rptr[rend-rstart] != a->nz + b->nz) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "rptr[%d] %d != %d + %d",rend-rstart,rptr[rend-rstart+1],a->nz,b->nz);
2552   } else { /* only copy matrix values into sbuf_a */
2553     rptr    = sbuf_j;
2554     vals    = sbuf_a;
2555     rptr[0] = 0;
2556     for (i=0; i<rend-rstart; i++) {
2557       row    = i + rstart;
2558       nzA    = a->i[i+1] - a->i[i]; nzB = b->i[i+1] - b->i[i];
2559       ncols  = nzA + nzB;
2560       cworkB = b->j + b->i[i];
2561       aworkA = a->a + a->i[i];
2562       aworkB = b->a + b->i[i];
2563       lwrite = 0;
2564       for (l=0; l<nzB; l++) {
2565         if ((ctmp = bmap[cworkB[l]]) < cstart) vals[lwrite++] = aworkB[l];
2566       }
2567       for (l=0; l<nzA; l++) vals[lwrite++] = aworkA[l];
2568       for (l=0; l<nzB; l++) {
2569         if ((ctmp = bmap[cworkB[l]]) >= cend) vals[lwrite++] = aworkB[l];
2570       }
2571       vals     += ncols;
2572       rptr[i+1] = rptr[i] + ncols;
2573     }
2574   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2575 
2576   /* send nzlocal to others, and recv other's nzlocal */
2577   /*--------------------------------------------------*/
2578   if (reuse == MAT_INITIAL_MATRIX) {
2579     ierr = PetscMalloc2(3*(nsends + nrecvs)+1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2580 
2581     s_waits2 = s_waits3 + nsends;
2582     s_waits1 = s_waits2 + nsends;
2583     r_waits1 = s_waits1 + nsends;
2584     r_waits2 = r_waits1 + nrecvs;
2585     r_waits3 = r_waits2 + nrecvs;
2586   } else {
2587     ierr = PetscMalloc2(nsends + nrecvs +1,&s_waits3,nsends+1,&send_status);CHKERRQ(ierr);
2588 
2589     r_waits3 = s_waits3 + nsends;
2590   }
2591 
2592   ierr = PetscObjectGetNewTag((PetscObject)mat,&tag3);CHKERRQ(ierr);
2593   if (reuse == MAT_INITIAL_MATRIX) {
2594     /* get new tags to keep the communication clean */
2595     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag1);CHKERRQ(ierr);
2596     ierr = PetscObjectGetNewTag((PetscObject)mat,&tag2);CHKERRQ(ierr);
2597     ierr = PetscMalloc4(nsends,&sbuf_nz,nrecvs,&rbuf_nz,nrecvs,&rbuf_j,nrecvs,&rbuf_a);CHKERRQ(ierr);
2598 
2599     /* post receives of other's nzlocal */
2600     for (i=0; i<nrecvs; i++) {
2601       ierr = MPI_Irecv(rbuf_nz+i,1,MPIU_INT,MPI_ANY_SOURCE,tag1,comm,r_waits1+i);CHKERRQ(ierr);
2602     }
2603     /* send nzlocal to others */
2604     for (i=0; i<nsends; i++) {
2605       sbuf_nz[i] = nzlocal;
2606       ierr       = MPI_Isend(sbuf_nz+i,1,MPIU_INT,send_rank[i],tag1,comm,s_waits1+i);CHKERRQ(ierr);
2607     }
2608     /* wait on receives of nzlocal; allocate space for rbuf_j, rbuf_a */
2609     count = nrecvs;
2610     while (count) {
2611       ierr = MPI_Waitany(nrecvs,r_waits1,&imdex,&recv_status);CHKERRQ(ierr);
2612 
2613       recv_rank[imdex] = recv_status.MPI_SOURCE;
2614       /* allocate rbuf_a and rbuf_j; then post receives of rbuf_j */
2615       ierr = PetscMalloc1((rbuf_nz[imdex]+1),&rbuf_a[imdex]);CHKERRQ(ierr);
2616 
2617       i = rowrange[recv_status.MPI_SOURCE+1] - rowrange[recv_status.MPI_SOURCE]; /* number of expected mat->i */
2618 
2619       rbuf_nz[imdex] += i + 2;
2620 
2621       ierr = PetscMalloc1(rbuf_nz[imdex],&rbuf_j[imdex]);CHKERRQ(ierr);
2622       ierr = MPI_Irecv(rbuf_j[imdex],rbuf_nz[imdex],MPIU_INT,recv_status.MPI_SOURCE,tag2,comm,r_waits2+imdex);CHKERRQ(ierr);
2623       count--;
2624     }
2625     /* wait on sends of nzlocal */
2626     if (nsends) {ierr = MPI_Waitall(nsends,s_waits1,send_status);CHKERRQ(ierr);}
2627     /* send mat->i,j to others, and recv from other's */
2628     /*------------------------------------------------*/
2629     for (i=0; i<nsends; i++) {
2630       j    = nzlocal + rowrange[rank+1] - rowrange[rank] + 1;
2631       ierr = MPI_Isend(sbuf_j,j,MPIU_INT,send_rank[i],tag2,comm,s_waits2+i);CHKERRQ(ierr);
2632     }
2633     /* wait on receives of mat->i,j */
2634     /*------------------------------*/
2635     count = nrecvs;
2636     while (count) {
2637       ierr = MPI_Waitany(nrecvs,r_waits2,&imdex,&recv_status);CHKERRQ(ierr);
2638       if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2639       count--;
2640     }
2641     /* wait on sends of mat->i,j */
2642     /*---------------------------*/
2643     if (nsends) {
2644       ierr = MPI_Waitall(nsends,s_waits2,send_status);CHKERRQ(ierr);
2645     }
2646   } /* endof if (reuse == MAT_INITIAL_MATRIX) */
2647 
2648   /* post receives, send and receive mat->a */
2649   /*----------------------------------------*/
2650   for (imdex=0; imdex<nrecvs; imdex++) {
2651     ierr = MPI_Irecv(rbuf_a[imdex],rbuf_nz[imdex],MPIU_SCALAR,recv_rank[imdex],tag3,comm,r_waits3+imdex);CHKERRQ(ierr);
2652   }
2653   for (i=0; i<nsends; i++) {
2654     ierr = MPI_Isend(sbuf_a,nzlocal,MPIU_SCALAR,send_rank[i],tag3,comm,s_waits3+i);CHKERRQ(ierr);
2655   }
2656   count = nrecvs;
2657   while (count) {
2658     ierr = MPI_Waitany(nrecvs,r_waits3,&imdex,&recv_status);CHKERRQ(ierr);
2659     if (recv_rank[imdex] != recv_status.MPI_SOURCE) SETERRQ2(PETSC_COMM_SELF,1, "recv_rank %d != MPI_SOURCE %d",recv_rank[imdex],recv_status.MPI_SOURCE);
2660     count--;
2661   }
2662   if (nsends) {
2663     ierr = MPI_Waitall(nsends,s_waits3,send_status);CHKERRQ(ierr);
2664   }
2665 
2666   ierr = PetscFree2(s_waits3,send_status);CHKERRQ(ierr);
2667 
2668   /* create redundant matrix */
2669   /*-------------------------*/
2670   if (reuse == MAT_INITIAL_MATRIX) {
2671     const PetscInt *range;
2672     PetscInt       rstart_sub,rend_sub,mloc_sub;
2673 
2674     /* compute rownz_max for preallocation */
2675     for (imdex=0; imdex<nrecvs; imdex++) {
2676       j    = rowrange[recv_rank[imdex]+1] - rowrange[recv_rank[imdex]];
2677       rptr = rbuf_j[imdex];
2678       for (i=0; i<j; i++) {
2679         ncols = rptr[i+1] - rptr[i];
2680         if (rownz_max < ncols) rownz_max = ncols;
2681       }
2682     }
2683 
2684     ierr = MatCreate(subcomm,&C);CHKERRQ(ierr);
2685 
2686     /* get local size of redundant matrix
2687        - mloc_sub is chosen for PETSC_SUBCOMM_INTERLACED, works for other types, but may not efficient! */
2688     ierr = MatGetOwnershipRanges(mat,&range);CHKERRQ(ierr);
2689     rstart_sub = range[nsubcomm*subrank];
2690     if (subrank+1 < subsize) { /* not the last proc in subcomm */
2691       rend_sub = range[nsubcomm*(subrank+1)];
2692     } else {
2693       rend_sub = mat->rmap->N;
2694     }
2695     mloc_sub = rend_sub - rstart_sub;
2696 
2697     if (M == N) {
2698       ierr = MatSetSizes(C,mloc_sub,mloc_sub,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
2699     } else { /* non-square matrix */
2700       ierr = MatSetSizes(C,mloc_sub,PETSC_DECIDE,PETSC_DECIDE,mat->cmap->N);CHKERRQ(ierr);
2701     }
2702     ierr = MatSetBlockSizesFromMats(C,mat,mat);CHKERRQ(ierr);
2703     ierr = MatSetFromOptions(C);CHKERRQ(ierr);
2704     ierr = MatSeqAIJSetPreallocation(C,rownz_max,NULL);CHKERRQ(ierr);
2705     ierr = MatMPIAIJSetPreallocation(C,rownz_max,NULL,rownz_max,NULL);CHKERRQ(ierr);
2706   } else {
2707     C = *matredundant;
2708   }
2709 
2710   /* insert local matrix entries */
2711   rptr = sbuf_j;
2712   cols = sbuf_j + rend-rstart + 1;
2713   vals = sbuf_a;
2714   for (i=0; i<rend-rstart; i++) {
2715     row   = i + rstart;
2716     ncols = rptr[i+1] - rptr[i];
2717     ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2718     vals += ncols;
2719     cols += ncols;
2720   }
2721   /* insert received matrix entries */
2722   for (imdex=0; imdex<nrecvs; imdex++) {
2723     rstart = rowrange[recv_rank[imdex]];
2724     rend   = rowrange[recv_rank[imdex]+1];
2725     /* printf("[%d] insert rows %d - %d\n",rank,rstart,rend-1); */
2726     rptr   = rbuf_j[imdex];
2727     cols   = rbuf_j[imdex] + rend-rstart + 1;
2728     vals   = rbuf_a[imdex];
2729     for (i=0; i<rend-rstart; i++) {
2730       row   = i + rstart;
2731       ncols = rptr[i+1] - rptr[i];
2732       ierr  = MatSetValues(C,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr);
2733       vals += ncols;
2734       cols += ncols;
2735     }
2736   }
2737   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2738   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2739 
2740   if (reuse == MAT_INITIAL_MATRIX) {
2741     *matredundant = C;
2742 
2743     /* create a supporting struct and attach it to C for reuse */
2744     ierr = PetscNewLog(C,&redund);CHKERRQ(ierr);
2745     if (subsize == 1) {
2746       Mat_SeqAIJ *c = (Mat_SeqAIJ*)C->data;
2747       c->redundant = redund;
2748     } else {
2749       Mat_MPIAIJ *c = (Mat_MPIAIJ*)C->data;
2750       c->redundant = redund;
2751     }
2752 
2753     redund->nzlocal   = nzlocal;
2754     redund->nsends    = nsends;
2755     redund->nrecvs    = nrecvs;
2756     redund->send_rank = send_rank;
2757     redund->recv_rank = recv_rank;
2758     redund->sbuf_nz   = sbuf_nz;
2759     redund->rbuf_nz   = rbuf_nz;
2760     redund->sbuf_j    = sbuf_j;
2761     redund->sbuf_a    = sbuf_a;
2762     redund->rbuf_j    = rbuf_j;
2763     redund->rbuf_a    = rbuf_a;
2764     redund->psubcomm  = NULL;
2765   }
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 #undef __FUNCT__
2770 #define __FUNCT__ "MatGetRedundantMatrix_MPIAIJ"
2771 PetscErrorCode MatGetRedundantMatrix_MPIAIJ(Mat mat,PetscInt nsubcomm,MPI_Comm subcomm,MatReuse reuse,Mat *matredundant)
2772 {
2773   PetscErrorCode ierr;
2774   MPI_Comm       comm;
2775   PetscMPIInt    size,subsize;
2776   PetscInt       mloc_sub,rstart,rend,M=mat->rmap->N,N=mat->cmap->N;
2777   Mat_Redundant  *redund=NULL;
2778   PetscSubcomm   psubcomm=NULL;
2779   MPI_Comm       subcomm_in=subcomm;
2780   Mat            *matseq;
2781   IS             isrow,iscol;
2782 
2783   PetscFunctionBegin;
2784   if (subcomm_in == MPI_COMM_NULL) { /* user does not provide subcomm */
2785     if (reuse ==  MAT_INITIAL_MATRIX) {
2786       /* create psubcomm, then get subcomm */
2787       ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2788       ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2789       if (nsubcomm < 1 || nsubcomm > size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"nsubcomm must between 1 and %D",size);
2790 
2791       ierr = PetscSubcommCreate(comm,&psubcomm);CHKERRQ(ierr);
2792       ierr = PetscSubcommSetNumber(psubcomm,nsubcomm);CHKERRQ(ierr);
2793       ierr = PetscSubcommSetType(psubcomm,PETSC_SUBCOMM_CONTIGUOUS);CHKERRQ(ierr);
2794       ierr = PetscSubcommSetFromOptions(psubcomm);CHKERRQ(ierr);
2795       subcomm = psubcomm->comm;
2796     } else { /* retrieve psubcomm and subcomm */
2797       ierr = PetscObjectGetComm((PetscObject)(*matredundant),&subcomm);CHKERRQ(ierr);
2798       ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2799       if (subsize == 1) {
2800         Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2801         redund = c->redundant;
2802       } else {
2803         Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2804         redund = c->redundant;
2805       }
2806       psubcomm = redund->psubcomm;
2807     }
2808     if (psubcomm->type == PETSC_SUBCOMM_INTERLACED) {
2809       ierr = MatGetRedundantMatrix_MPIAIJ_interlaced(mat,nsubcomm,subcomm,reuse,matredundant);CHKERRQ(ierr);
2810       if (reuse ==  MAT_INITIAL_MATRIX) { /* psubcomm is created in this routine, free it in MatDestroy_Redundant() */
2811         ierr = MPI_Comm_size(psubcomm->comm,&subsize);CHKERRQ(ierr);
2812         if (subsize == 1) {
2813           Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2814           c->redundant->psubcomm = psubcomm;
2815         } else {
2816           Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2817           c->redundant->psubcomm = psubcomm ;
2818         }
2819       }
2820       PetscFunctionReturn(0);
2821     }
2822   }
2823 
2824   /* use MPI subcomm via MatGetSubMatrices(); use subcomm_in or psubcomm->comm (psubcomm->type != INTERLACED) */
2825   ierr = MPI_Comm_size(subcomm,&subsize);CHKERRQ(ierr);
2826   if (reuse == MAT_INITIAL_MATRIX) {
2827     /* create a local sequential matrix matseq[0] */
2828     mloc_sub = PETSC_DECIDE;
2829     ierr = PetscSplitOwnership(subcomm,&mloc_sub,&M);CHKERRQ(ierr);
2830     ierr = MPI_Scan(&mloc_sub,&rend,1,MPIU_INT,MPI_SUM,subcomm);CHKERRQ(ierr);
2831     rstart = rend - mloc_sub;
2832     ierr = ISCreateStride(PETSC_COMM_SELF,mloc_sub,rstart,1,&isrow);CHKERRQ(ierr);
2833     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol);CHKERRQ(ierr);
2834   } else { /* reuse == MAT_REUSE_MATRIX */
2835     if (subsize == 1) {
2836       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2837       redund = c->redundant;
2838     } else {
2839       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2840       redund = c->redundant;
2841     }
2842 
2843     isrow  = redund->isrow;
2844     iscol  = redund->iscol;
2845     matseq = redund->matseq;
2846   }
2847   ierr = MatGetSubMatrices(mat,1,&isrow,&iscol,reuse,&matseq);CHKERRQ(ierr);
2848   ierr = MatCreateMPIAIJConcatenateSeqAIJ(subcomm,matseq[0],PETSC_DECIDE,reuse,matredundant);CHKERRQ(ierr);
2849 
2850   if (reuse == MAT_INITIAL_MATRIX) {
2851     /* create a supporting struct and attach it to C for reuse */
2852     ierr = PetscNewLog(*matredundant,&redund);CHKERRQ(ierr);
2853     if (subsize == 1) {
2854       Mat_SeqAIJ *c = (Mat_SeqAIJ*)(*matredundant)->data;
2855       c->redundant = redund;
2856     } else {
2857       Mat_MPIAIJ *c = (Mat_MPIAIJ*)(*matredundant)->data;
2858       c->redundant = redund;
2859     }
2860     redund->isrow    = isrow;
2861     redund->iscol    = iscol;
2862     redund->matseq   = matseq;
2863     redund->psubcomm = psubcomm;
2864   }
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 #undef __FUNCT__
2869 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ"
2870 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2871 {
2872   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2873   PetscErrorCode ierr;
2874   PetscInt       i,*idxb = 0;
2875   PetscScalar    *va,*vb;
2876   Vec            vtmp;
2877 
2878   PetscFunctionBegin;
2879   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
2880   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2881   if (idx) {
2882     for (i=0; i<A->rmap->n; i++) {
2883       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2884     }
2885   }
2886 
2887   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2888   if (idx) {
2889     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2890   }
2891   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2892   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2893 
2894   for (i=0; i<A->rmap->n; i++) {
2895     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2896       va[i] = vb[i];
2897       if (idx) idx[i] = a->garray[idxb[i]];
2898     }
2899   }
2900 
2901   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2902   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2903   ierr = PetscFree(idxb);CHKERRQ(ierr);
2904   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2905   PetscFunctionReturn(0);
2906 }
2907 
2908 #undef __FUNCT__
2909 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ"
2910 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2911 {
2912   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2913   PetscErrorCode ierr;
2914   PetscInt       i,*idxb = 0;
2915   PetscScalar    *va,*vb;
2916   Vec            vtmp;
2917 
2918   PetscFunctionBegin;
2919   ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr);
2920   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2921   if (idx) {
2922     for (i=0; i<A->cmap->n; i++) {
2923       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2924     }
2925   }
2926 
2927   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
2928   if (idx) {
2929     ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);
2930   }
2931   ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
2932   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
2933 
2934   for (i=0; i<A->rmap->n; i++) {
2935     if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) {
2936       va[i] = vb[i];
2937       if (idx) idx[i] = a->garray[idxb[i]];
2938     }
2939   }
2940 
2941   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2942   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
2943   ierr = PetscFree(idxb);CHKERRQ(ierr);
2944   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
2945   PetscFunctionReturn(0);
2946 }
2947 
2948 #undef __FUNCT__
2949 #define __FUNCT__ "MatGetRowMin_MPIAIJ"
2950 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2951 {
2952   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2953   PetscInt       n      = A->rmap->n;
2954   PetscInt       cstart = A->cmap->rstart;
2955   PetscInt       *cmap  = mat->garray;
2956   PetscInt       *diagIdx, *offdiagIdx;
2957   Vec            diagV, offdiagV;
2958   PetscScalar    *a, *diagA, *offdiagA;
2959   PetscInt       r;
2960   PetscErrorCode ierr;
2961 
2962   PetscFunctionBegin;
2963   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
2964   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr);
2965   ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr);
2966   ierr = MatGetRowMin(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
2967   ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
2968   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
2969   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
2970   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2971   for (r = 0; r < n; ++r) {
2972     if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) {
2973       a[r]   = diagA[r];
2974       idx[r] = cstart + diagIdx[r];
2975     } else {
2976       a[r]   = offdiagA[r];
2977       idx[r] = cmap[offdiagIdx[r]];
2978     }
2979   }
2980   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
2981   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
2982   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
2983   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2984   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2985   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2986   PetscFunctionReturn(0);
2987 }
2988 
2989 #undef __FUNCT__
2990 #define __FUNCT__ "MatGetRowMax_MPIAIJ"
2991 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2992 {
2993   Mat_MPIAIJ     *mat   = (Mat_MPIAIJ*) A->data;
2994   PetscInt       n      = A->rmap->n;
2995   PetscInt       cstart = A->cmap->rstart;
2996   PetscInt       *cmap  = mat->garray;
2997   PetscInt       *diagIdx, *offdiagIdx;
2998   Vec            diagV, offdiagV;
2999   PetscScalar    *a, *diagA, *offdiagA;
3000   PetscInt       r;
3001   PetscErrorCode ierr;
3002 
3003   PetscFunctionBegin;
3004   ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr);
3005   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr);
3006   ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr);
3007   ierr = MatGetRowMax(mat->A, diagV,    diagIdx);CHKERRQ(ierr);
3008   ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr);
3009   ierr = VecGetArray(v,        &a);CHKERRQ(ierr);
3010   ierr = VecGetArray(diagV,    &diagA);CHKERRQ(ierr);
3011   ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3012   for (r = 0; r < n; ++r) {
3013     if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) {
3014       a[r]   = diagA[r];
3015       idx[r] = cstart + diagIdx[r];
3016     } else {
3017       a[r]   = offdiagA[r];
3018       idx[r] = cmap[offdiagIdx[r]];
3019     }
3020   }
3021   ierr = VecRestoreArray(v,        &a);CHKERRQ(ierr);
3022   ierr = VecRestoreArray(diagV,    &diagA);CHKERRQ(ierr);
3023   ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr);
3024   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
3025   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
3026   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
3027   PetscFunctionReturn(0);
3028 }
3029 
3030 #undef __FUNCT__
3031 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ"
3032 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
3033 {
3034   PetscErrorCode ierr;
3035   Mat            *dummy;
3036 
3037   PetscFunctionBegin;
3038   ierr    = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
3039   *newmat = *dummy;
3040   ierr    = PetscFree(dummy);CHKERRQ(ierr);
3041   PetscFunctionReturn(0);
3042 }
3043 
3044 #undef __FUNCT__
3045 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ"
3046 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
3047 {
3048   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
3049   PetscErrorCode ierr;
3050 
3051   PetscFunctionBegin;
3052   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
3053   PetscFunctionReturn(0);
3054 }
3055 
3056 #undef __FUNCT__
3057 #define __FUNCT__ "MatSetRandom_MPIAIJ"
3058 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
3059 {
3060   PetscErrorCode ierr;
3061   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
3062 
3063   PetscFunctionBegin;
3064   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
3065   ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
3066   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3067   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 /* -------------------------------------------------------------------*/
3072 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
3073                                        MatGetRow_MPIAIJ,
3074                                        MatRestoreRow_MPIAIJ,
3075                                        MatMult_MPIAIJ,
3076                                 /* 4*/ MatMultAdd_MPIAIJ,
3077                                        MatMultTranspose_MPIAIJ,
3078                                        MatMultTransposeAdd_MPIAIJ,
3079 #if defined(PETSC_HAVE_PBGL)
3080                                        MatSolve_MPIAIJ,
3081 #else
3082                                        0,
3083 #endif
3084                                        0,
3085                                        0,
3086                                 /*10*/ 0,
3087                                        0,
3088                                        0,
3089                                        MatSOR_MPIAIJ,
3090                                        MatTranspose_MPIAIJ,
3091                                 /*15*/ MatGetInfo_MPIAIJ,
3092                                        MatEqual_MPIAIJ,
3093                                        MatGetDiagonal_MPIAIJ,
3094                                        MatDiagonalScale_MPIAIJ,
3095                                        MatNorm_MPIAIJ,
3096                                 /*20*/ MatAssemblyBegin_MPIAIJ,
3097                                        MatAssemblyEnd_MPIAIJ,
3098                                        MatSetOption_MPIAIJ,
3099                                        MatZeroEntries_MPIAIJ,
3100                                 /*24*/ MatZeroRows_MPIAIJ,
3101                                        0,
3102 #if defined(PETSC_HAVE_PBGL)
3103                                        0,
3104 #else
3105                                        0,
3106 #endif
3107                                        0,
3108                                        0,
3109                                 /*29*/ MatSetUp_MPIAIJ,
3110 #if defined(PETSC_HAVE_PBGL)
3111                                        0,
3112 #else
3113                                        0,
3114 #endif
3115                                        0,
3116                                        0,
3117                                        0,
3118                                 /*34*/ MatDuplicate_MPIAIJ,
3119                                        0,
3120                                        0,
3121                                        0,
3122                                        0,
3123                                 /*39*/ MatAXPY_MPIAIJ,
3124                                        MatGetSubMatrices_MPIAIJ,
3125                                        MatIncreaseOverlap_MPIAIJ,
3126                                        MatGetValues_MPIAIJ,
3127                                        MatCopy_MPIAIJ,
3128                                 /*44*/ MatGetRowMax_MPIAIJ,
3129                                        MatScale_MPIAIJ,
3130                                        0,
3131                                        MatDiagonalSet_MPIAIJ,
3132                                        MatZeroRowsColumns_MPIAIJ,
3133                                 /*49*/ MatSetRandom_MPIAIJ,
3134                                        0,
3135                                        0,
3136                                        0,
3137                                        0,
3138                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
3139                                        0,
3140                                        MatSetUnfactored_MPIAIJ,
3141                                        MatPermute_MPIAIJ,
3142                                        0,
3143                                 /*59*/ MatGetSubMatrix_MPIAIJ,
3144                                        MatDestroy_MPIAIJ,
3145                                        MatView_MPIAIJ,
3146                                        0,
3147                                        MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ,
3148                                 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ,
3149                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
3150                                        0,
3151                                        0,
3152                                        0,
3153                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
3154                                        MatGetRowMinAbs_MPIAIJ,
3155                                        0,
3156                                        MatSetColoring_MPIAIJ,
3157                                        0,
3158                                        MatSetValuesAdifor_MPIAIJ,
3159                                 /*75*/ MatFDColoringApply_AIJ,
3160                                        0,
3161                                        0,
3162                                        0,
3163                                        MatFindZeroDiagonals_MPIAIJ,
3164                                 /*80*/ 0,
3165                                        0,
3166                                        0,
3167                                 /*83*/ MatLoad_MPIAIJ,
3168                                        0,
3169                                        0,
3170                                        0,
3171                                        0,
3172                                        0,
3173                                 /*89*/ MatMatMult_MPIAIJ_MPIAIJ,
3174                                        MatMatMultSymbolic_MPIAIJ_MPIAIJ,
3175                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
3176                                        MatPtAP_MPIAIJ_MPIAIJ,
3177                                        MatPtAPSymbolic_MPIAIJ_MPIAIJ,
3178                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
3179                                        0,
3180                                        0,
3181                                        0,
3182                                        0,
3183                                 /*99*/ 0,
3184                                        0,
3185                                        0,
3186                                        MatConjugate_MPIAIJ,
3187                                        0,
3188                                 /*104*/MatSetValuesRow_MPIAIJ,
3189                                        MatRealPart_MPIAIJ,
3190                                        MatImaginaryPart_MPIAIJ,
3191                                        0,
3192                                        0,
3193                                 /*109*/0,
3194                                        MatGetRedundantMatrix_MPIAIJ,
3195                                        MatGetRowMin_MPIAIJ,
3196                                        0,
3197                                        0,
3198                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
3199                                        0,
3200                                        0,
3201                                        0,
3202                                        0,
3203                                 /*119*/0,
3204                                        0,
3205                                        0,
3206                                        0,
3207                                        MatGetMultiProcBlock_MPIAIJ,
3208                                 /*124*/MatFindNonzeroRows_MPIAIJ,
3209                                        MatGetColumnNorms_MPIAIJ,
3210                                        MatInvertBlockDiagonal_MPIAIJ,
3211                                        0,
3212                                        MatGetSubMatricesParallel_MPIAIJ,
3213                                 /*129*/0,
3214                                        MatTransposeMatMult_MPIAIJ_MPIAIJ,
3215                                        MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ,
3216                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
3217                                        0,
3218                                 /*134*/0,
3219                                        0,
3220                                        0,
3221                                        0,
3222                                        0,
3223                                 /*139*/0,
3224                                        0,
3225                                        0,
3226                                        MatFDColoringSetUp_MPIXAIJ
3227 };
3228 
3229 /* ----------------------------------------------------------------------------------------*/
3230 
3231 #undef __FUNCT__
3232 #define __FUNCT__ "MatStoreValues_MPIAIJ"
3233 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
3234 {
3235   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3236   PetscErrorCode ierr;
3237 
3238   PetscFunctionBegin;
3239   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
3240   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
3241   PetscFunctionReturn(0);
3242 }
3243 
3244 #undef __FUNCT__
3245 #define __FUNCT__ "MatRetrieveValues_MPIAIJ"
3246 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
3247 {
3248   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
3249   PetscErrorCode ierr;
3250 
3251   PetscFunctionBegin;
3252   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
3253   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
3254   PetscFunctionReturn(0);
3255 }
3256 
3257 #undef __FUNCT__
3258 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ"
3259 PetscErrorCode  MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3260 {
3261   Mat_MPIAIJ     *b;
3262   PetscErrorCode ierr;
3263 
3264   PetscFunctionBegin;
3265   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3266   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3267   b = (Mat_MPIAIJ*)B->data;
3268 
3269   if (!B->preallocated) {
3270     /* Explicitly create 2 MATSEQAIJ matrices. */
3271     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3272     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3273     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
3274     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
3275     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3276     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3277     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3278     ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
3279     ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
3280     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3281   }
3282 
3283   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
3284   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
3285   B->preallocated = PETSC_TRUE;
3286   PetscFunctionReturn(0);
3287 }
3288 
3289 #undef __FUNCT__
3290 #define __FUNCT__ "MatDuplicate_MPIAIJ"
3291 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3292 {
3293   Mat            mat;
3294   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
3295   PetscErrorCode ierr;
3296 
3297   PetscFunctionBegin;
3298   *newmat = 0;
3299   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3300   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3301   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
3302   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3303   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3304   a       = (Mat_MPIAIJ*)mat->data;
3305 
3306   mat->factortype   = matin->factortype;
3307   mat->assembled    = PETSC_TRUE;
3308   mat->insertmode   = NOT_SET_VALUES;
3309   mat->preallocated = PETSC_TRUE;
3310 
3311   a->size         = oldmat->size;
3312   a->rank         = oldmat->rank;
3313   a->donotstash   = oldmat->donotstash;
3314   a->roworiented  = oldmat->roworiented;
3315   a->rowindices   = 0;
3316   a->rowvalues    = 0;
3317   a->getrowactive = PETSC_FALSE;
3318 
3319   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3320   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3321 
3322   if (oldmat->colmap) {
3323 #if defined(PETSC_USE_CTABLE)
3324     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3325 #else
3326     ierr = PetscMalloc1((mat->cmap->N),&a->colmap);CHKERRQ(ierr);
3327     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3328     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
3329 #endif
3330   } else a->colmap = 0;
3331   if (oldmat->garray) {
3332     PetscInt len;
3333     len  = oldmat->B->cmap->n;
3334     ierr = PetscMalloc1((len+1),&a->garray);CHKERRQ(ierr);
3335     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3336     if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); }
3337   } else a->garray = 0;
3338 
3339   ierr    = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3340   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3341   ierr    = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3342   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3343   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3344   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3345   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3346   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3347   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3348   *newmat = mat;
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatLoad_MPIAIJ"
3356 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
3357 {
3358   PetscScalar    *vals,*svals;
3359   MPI_Comm       comm;
3360   PetscErrorCode ierr;
3361   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
3362   PetscInt       i,nz,j,rstart,rend,mmax,maxnz = 0,grows,gcols;
3363   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
3364   PetscInt       *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols;
3365   PetscInt       cend,cstart,n,*rowners,sizesset=1;
3366   int            fd;
3367   PetscInt       bs = 1;
3368 
3369   PetscFunctionBegin;
3370   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3371   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3372   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3373   if (!rank) {
3374     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3375     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3376     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3377   }
3378 
3379   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQAIJ matrix","Mat");CHKERRQ(ierr);
3380   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3381   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3382 
3383   if (newMat->rmap->n < 0 && newMat->rmap->N < 0 && newMat->cmap->n < 0 && newMat->cmap->N < 0) sizesset = 0;
3384 
3385   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3386   M    = header[1]; N = header[2];
3387   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3388   if (sizesset && newMat->rmap->N < 0) newMat->rmap->N = M;
3389   if (sizesset && newMat->cmap->N < 0) newMat->cmap->N = N;
3390 
3391   /* If global sizes are set, check if they are consistent with that given in the file */
3392   if (sizesset) {
3393     ierr = MatGetSize(newMat,&grows,&gcols);CHKERRQ(ierr);
3394   }
3395   if (sizesset && newMat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3396   if (sizesset && newMat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3397 
3398   /* determine ownership of all (block) rows */
3399   if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs);
3400   if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank));    /* PETSC_DECIDE */
3401   else m = newMat->rmap->n; /* Set by user */
3402 
3403   ierr = PetscMalloc1((size+1),&rowners);CHKERRQ(ierr);
3404   ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3405 
3406   /* First process needs enough room for process with most rows */
3407   if (!rank) {
3408     mmax = rowners[1];
3409     for (i=2; i<=size; i++) {
3410       mmax = PetscMax(mmax, rowners[i]);
3411     }
3412   } else mmax = -1;             /* unused, but compilers complain */
3413 
3414   rowners[0] = 0;
3415   for (i=2; i<=size; i++) {
3416     rowners[i] += rowners[i-1];
3417   }
3418   rstart = rowners[rank];
3419   rend   = rowners[rank+1];
3420 
3421   /* distribute row lengths to all processors */
3422   ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr);
3423   if (!rank) {
3424     ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr);
3425     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3426     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3427     for (j=0; j<m; j++) {
3428       procsnz[0] += ourlens[j];
3429     }
3430     for (i=1; i<size; i++) {
3431       ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr);
3432       /* calculate the number of nonzeros on each processor */
3433       for (j=0; j<rowners[i+1]-rowners[i]; j++) {
3434         procsnz[i] += rowlengths[j];
3435       }
3436       ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3437     }
3438     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3439   } else {
3440     ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3441   }
3442 
3443   if (!rank) {
3444     /* determine max buffer needed and allocate it */
3445     maxnz = 0;
3446     for (i=0; i<size; i++) {
3447       maxnz = PetscMax(maxnz,procsnz[i]);
3448     }
3449     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3450 
3451     /* read in my part of the matrix column indices  */
3452     nz   = procsnz[0];
3453     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3454     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3455 
3456     /* read in every one elses and ship off */
3457     for (i=1; i<size; i++) {
3458       nz   = procsnz[i];
3459       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3460       ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3461     }
3462     ierr = PetscFree(cols);CHKERRQ(ierr);
3463   } else {
3464     /* determine buffer space needed for message */
3465     nz = 0;
3466     for (i=0; i<m; i++) {
3467       nz += ourlens[i];
3468     }
3469     ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr);
3470 
3471     /* receive message of column indices*/
3472     ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr);
3473   }
3474 
3475   /* determine column ownership if matrix is not square */
3476   if (N != M) {
3477     if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank);
3478     else n = newMat->cmap->n;
3479     ierr   = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3480     cstart = cend - n;
3481   } else {
3482     cstart = rstart;
3483     cend   = rend;
3484     n      = cend - cstart;
3485   }
3486 
3487   /* loop over local rows, determining number of off diagonal entries */
3488   ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr);
3489   jj   = 0;
3490   for (i=0; i<m; i++) {
3491     for (j=0; j<ourlens[i]; j++) {
3492       if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++;
3493       jj++;
3494     }
3495   }
3496 
3497   for (i=0; i<m; i++) {
3498     ourlens[i] -= offlens[i];
3499   }
3500   if (!sizesset) {
3501     ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr);
3502   }
3503 
3504   if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);}
3505 
3506   ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr);
3507 
3508   for (i=0; i<m; i++) {
3509     ourlens[i] += offlens[i];
3510   }
3511 
3512   if (!rank) {
3513     ierr = PetscMalloc1((maxnz+1),&vals);CHKERRQ(ierr);
3514 
3515     /* read in my part of the matrix numerical values  */
3516     nz   = procsnz[0];
3517     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3518 
3519     /* insert into matrix */
3520     jj      = rstart;
3521     smycols = mycols;
3522     svals   = vals;
3523     for (i=0; i<m; i++) {
3524       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3525       smycols += ourlens[i];
3526       svals   += ourlens[i];
3527       jj++;
3528     }
3529 
3530     /* read in other processors and ship out */
3531     for (i=1; i<size; i++) {
3532       nz   = procsnz[i];
3533       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3534       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3535     }
3536     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3537   } else {
3538     /* receive numeric values */
3539     ierr = PetscMalloc1((nz+1),&vals);CHKERRQ(ierr);
3540 
3541     /* receive message of values*/
3542     ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr);
3543 
3544     /* insert into matrix */
3545     jj      = rstart;
3546     smycols = mycols;
3547     svals   = vals;
3548     for (i=0; i<m; i++) {
3549       ierr     = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr);
3550       smycols += ourlens[i];
3551       svals   += ourlens[i];
3552       jj++;
3553     }
3554   }
3555   ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr);
3556   ierr = PetscFree(vals);CHKERRQ(ierr);
3557   ierr = PetscFree(mycols);CHKERRQ(ierr);
3558   ierr = PetscFree(rowners);CHKERRQ(ierr);
3559   ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3560   ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3561   PetscFunctionReturn(0);
3562 }
3563 
3564 #undef __FUNCT__
3565 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ"
3566 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3567 {
3568   PetscErrorCode ierr;
3569   IS             iscol_local;
3570   PetscInt       csize;
3571 
3572   PetscFunctionBegin;
3573   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3574   if (call == MAT_REUSE_MATRIX) {
3575     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3576     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3577   } else {
3578     PetscInt cbs;
3579     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3580     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3581     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3582   }
3583   ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3584   if (call == MAT_INITIAL_MATRIX) {
3585     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3586     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3587   }
3588   PetscFunctionReturn(0);
3589 }
3590 
3591 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*);
3592 #undef __FUNCT__
3593 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private"
3594 /*
3595     Not great since it makes two copies of the submatrix, first an SeqAIJ
3596   in local and then by concatenating the local matrices the end result.
3597   Writing it directly would be much like MatGetSubMatrices_MPIAIJ()
3598 
3599   Note: This requires a sequential iscol with all indices.
3600 */
3601 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3602 {
3603   PetscErrorCode ierr;
3604   PetscMPIInt    rank,size;
3605   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3606   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol;
3607   PetscBool      allcolumns, colflag;
3608   Mat            M,Mreuse;
3609   MatScalar      *vwork,*aa;
3610   MPI_Comm       comm;
3611   Mat_SeqAIJ     *aij;
3612 
3613   PetscFunctionBegin;
3614   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3615   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3616   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3617 
3618   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3619   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
3620   if (colflag && ncol == mat->cmap->N) {
3621     allcolumns = PETSC_TRUE;
3622   } else {
3623     allcolumns = PETSC_FALSE;
3624   }
3625   if (call ==  MAT_REUSE_MATRIX) {
3626     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3627     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3628     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3629   } else {
3630     ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr);
3631   }
3632 
3633   /*
3634       m - number of local rows
3635       n - number of columns (same on all processors)
3636       rstart - first row in new global matrix generated
3637   */
3638   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3639   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3640   if (call == MAT_INITIAL_MATRIX) {
3641     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3642     ii  = aij->i;
3643     jj  = aij->j;
3644 
3645     /*
3646         Determine the number of non-zeros in the diagonal and off-diagonal
3647         portions of the matrix in order to do correct preallocation
3648     */
3649 
3650     /* first get start and end of "diagonal" columns */
3651     if (csize == PETSC_DECIDE) {
3652       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3653       if (mglobal == n) { /* square matrix */
3654         nlocal = m;
3655       } else {
3656         nlocal = n/size + ((n % size) > rank);
3657       }
3658     } else {
3659       nlocal = csize;
3660     }
3661     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3662     rstart = rend - nlocal;
3663     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3664 
3665     /* next, compute all the lengths */
3666     ierr  = PetscMalloc1((2*m+1),&dlens);CHKERRQ(ierr);
3667     olens = dlens + m;
3668     for (i=0; i<m; i++) {
3669       jend = ii[i+1] - ii[i];
3670       olen = 0;
3671       dlen = 0;
3672       for (j=0; j<jend; j++) {
3673         if (*jj < rstart || *jj >= rend) olen++;
3674         else dlen++;
3675         jj++;
3676       }
3677       olens[i] = olen;
3678       dlens[i] = dlen;
3679     }
3680     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3681     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3682     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3683     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3684     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3685     ierr = PetscFree(dlens);CHKERRQ(ierr);
3686   } else {
3687     PetscInt ml,nl;
3688 
3689     M    = *newmat;
3690     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3691     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3692     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3693     /*
3694          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3695        rather than the slower MatSetValues().
3696     */
3697     M->was_assembled = PETSC_TRUE;
3698     M->assembled     = PETSC_FALSE;
3699   }
3700   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3701   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3702   ii   = aij->i;
3703   jj   = aij->j;
3704   aa   = aij->a;
3705   for (i=0; i<m; i++) {
3706     row   = rstart + i;
3707     nz    = ii[i+1] - ii[i];
3708     cwork = jj;     jj += nz;
3709     vwork = aa;     aa += nz;
3710     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3711   }
3712 
3713   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3714   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3715   *newmat = M;
3716 
3717   /* save submatrix used in processor for next request */
3718   if (call ==  MAT_INITIAL_MATRIX) {
3719     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3720     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3721   }
3722   PetscFunctionReturn(0);
3723 }
3724 
3725 #undef __FUNCT__
3726 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ"
3727 PetscErrorCode  MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3728 {
3729   PetscInt       m,cstart, cend,j,nnz,i,d;
3730   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3731   const PetscInt *JJ;
3732   PetscScalar    *values;
3733   PetscErrorCode ierr;
3734 
3735   PetscFunctionBegin;
3736   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3737 
3738   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3739   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3740   m      = B->rmap->n;
3741   cstart = B->cmap->rstart;
3742   cend   = B->cmap->rend;
3743   rstart = B->rmap->rstart;
3744 
3745   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3746 
3747 #if defined(PETSC_USE_DEBUGGING)
3748   for (i=0; i<m; i++) {
3749     nnz = Ii[i+1]- Ii[i];
3750     JJ  = J + Ii[i];
3751     if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3752     if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j);
3753     if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3754   }
3755 #endif
3756 
3757   for (i=0; i<m; i++) {
3758     nnz     = Ii[i+1]- Ii[i];
3759     JJ      = J + Ii[i];
3760     nnz_max = PetscMax(nnz_max,nnz);
3761     d       = 0;
3762     for (j=0; j<nnz; j++) {
3763       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3764     }
3765     d_nnz[i] = d;
3766     o_nnz[i] = nnz - d;
3767   }
3768   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3769   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3770 
3771   if (v) values = (PetscScalar*)v;
3772   else {
3773     ierr = PetscCalloc1((nnz_max+1),&values);CHKERRQ(ierr);
3774   }
3775 
3776   for (i=0; i<m; i++) {
3777     ii   = i + rstart;
3778     nnz  = Ii[i+1]- Ii[i];
3779     ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
3780   }
3781   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3782   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3783 
3784   if (!v) {
3785     ierr = PetscFree(values);CHKERRQ(ierr);
3786   }
3787   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3788   PetscFunctionReturn(0);
3789 }
3790 
3791 #undef __FUNCT__
3792 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR"
3793 /*@
3794    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3795    (the default parallel PETSc format).
3796 
3797    Collective on MPI_Comm
3798 
3799    Input Parameters:
3800 +  B - the matrix
3801 .  i - the indices into j for the start of each local row (starts with zero)
3802 .  j - the column indices for each local row (starts with zero)
3803 -  v - optional values in the matrix
3804 
3805    Level: developer
3806 
3807    Notes:
3808        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3809      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3810      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3811 
3812        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3813 
3814        The format which is used for the sparse matrix input, is equivalent to a
3815     row-major ordering.. i.e for the following matrix, the input data expected is
3816     as shown:
3817 
3818         1 0 0
3819         2 0 3     P0
3820        -------
3821         4 5 6     P1
3822 
3823      Process0 [P0]: rows_owned=[0,1]
3824         i =  {0,1,3}  [size = nrow+1  = 2+1]
3825         j =  {0,0,2}  [size = nz = 6]
3826         v =  {1,2,3}  [size = nz = 6]
3827 
3828      Process1 [P1]: rows_owned=[2]
3829         i =  {0,3}    [size = nrow+1  = 1+1]
3830         j =  {0,1,2}  [size = nz = 6]
3831         v =  {4,5,6}  [size = nz = 6]
3832 
3833 .keywords: matrix, aij, compressed row, sparse, parallel
3834 
3835 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ,
3836           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3837 @*/
3838 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3839 {
3840   PetscErrorCode ierr;
3841 
3842   PetscFunctionBegin;
3843   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3844   PetscFunctionReturn(0);
3845 }
3846 
3847 #undef __FUNCT__
3848 #define __FUNCT__ "MatMPIAIJSetPreallocation"
3849 /*@C
3850    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3851    (the default parallel PETSc format).  For good matrix assembly performance
3852    the user should preallocate the matrix storage by setting the parameters
3853    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3854    performance can be increased by more than a factor of 50.
3855 
3856    Collective on MPI_Comm
3857 
3858    Input Parameters:
3859 +  B - the matrix
3860 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3861            (same value is used for all local rows)
3862 .  d_nnz - array containing the number of nonzeros in the various rows of the
3863            DIAGONAL portion of the local submatrix (possibly different for each row)
3864            or NULL, if d_nz is used to specify the nonzero structure.
3865            The size of this array is equal to the number of local rows, i.e 'm'.
3866            For matrices that will be factored, you must leave room for (and set)
3867            the diagonal entry even if it is zero.
3868 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3869            submatrix (same value is used for all local rows).
3870 -  o_nnz - array containing the number of nonzeros in the various rows of the
3871            OFF-DIAGONAL portion of the local submatrix (possibly different for
3872            each row) or NULL, if o_nz is used to specify the nonzero
3873            structure. The size of this array is equal to the number
3874            of local rows, i.e 'm'.
3875 
3876    If the *_nnz parameter is given then the *_nz parameter is ignored
3877 
3878    The AIJ format (also called the Yale sparse matrix format or
3879    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3880    storage.  The stored row and column indices begin with zero.
3881    See Users-Manual: ch_mat for details.
3882 
3883    The parallel matrix is partitioned such that the first m0 rows belong to
3884    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3885    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3886 
3887    The DIAGONAL portion of the local submatrix of a processor can be defined
3888    as the submatrix which is obtained by extraction the part corresponding to
3889    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3890    first row that belongs to the processor, r2 is the last row belonging to
3891    the this processor, and c1-c2 is range of indices of the local part of a
3892    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3893    common case of a square matrix, the row and column ranges are the same and
3894    the DIAGONAL part is also square. The remaining portion of the local
3895    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3896 
3897    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3898 
3899    You can call MatGetInfo() to get information on how effective the preallocation was;
3900    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3901    You can also run with the option -info and look for messages with the string
3902    malloc in them to see if additional memory allocation was needed.
3903 
3904    Example usage:
3905 
3906    Consider the following 8x8 matrix with 34 non-zero values, that is
3907    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
3908    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
3909    as follows:
3910 
3911 .vb
3912             1  2  0  |  0  3  0  |  0  4
3913     Proc0   0  5  6  |  7  0  0  |  8  0
3914             9  0 10  | 11  0  0  | 12  0
3915     -------------------------------------
3916            13  0 14  | 15 16 17  |  0  0
3917     Proc1   0 18  0  | 19 20 21  |  0  0
3918             0  0  0  | 22 23  0  | 24  0
3919     -------------------------------------
3920     Proc2  25 26 27  |  0  0 28  | 29  0
3921            30  0  0  | 31 32 33  |  0 34
3922 .ve
3923 
3924    This can be represented as a collection of submatrices as:
3925 
3926 .vb
3927       A B C
3928       D E F
3929       G H I
3930 .ve
3931 
3932    Where the submatrices A,B,C are owned by proc0, D,E,F are
3933    owned by proc1, G,H,I are owned by proc2.
3934 
3935    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3936    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
3937    The 'M','N' parameters are 8,8, and have the same values on all procs.
3938 
3939    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
3940    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
3941    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
3942    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
3943    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
3944    matrix, ans [DF] as another SeqAIJ matrix.
3945 
3946    When d_nz, o_nz parameters are specified, d_nz storage elements are
3947    allocated for every row of the local diagonal submatrix, and o_nz
3948    storage locations are allocated for every row of the OFF-DIAGONAL submat.
3949    One way to choose d_nz and o_nz is to use the max nonzerors per local
3950    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
3951    In this case, the values of d_nz,o_nz are:
3952 .vb
3953      proc0 : dnz = 2, o_nz = 2
3954      proc1 : dnz = 3, o_nz = 2
3955      proc2 : dnz = 1, o_nz = 4
3956 .ve
3957    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
3958    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
3959    for proc3. i.e we are using 12+15+10=37 storage locations to store
3960    34 values.
3961 
3962    When d_nnz, o_nnz parameters are specified, the storage is specified
3963    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
3964    In the above case the values for d_nnz,o_nnz are:
3965 .vb
3966      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
3967      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
3968      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
3969 .ve
3970    Here the space allocated is sum of all the above values i.e 34, and
3971    hence pre-allocation is perfect.
3972 
3973    Level: intermediate
3974 
3975 .keywords: matrix, aij, compressed row, sparse, parallel
3976 
3977 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
3978           MPIAIJ, MatGetInfo(), PetscSplitOwnership()
3979 @*/
3980 PetscErrorCode  MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3981 {
3982   PetscErrorCode ierr;
3983 
3984   PetscFunctionBegin;
3985   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3986   PetscValidType(B,1);
3987   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3988   PetscFunctionReturn(0);
3989 }
3990 
3991 #undef __FUNCT__
3992 #define __FUNCT__ "MatCreateMPIAIJWithArrays"
3993 /*@
3994      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
3995          CSR format the local rows.
3996 
3997    Collective on MPI_Comm
3998 
3999    Input Parameters:
4000 +  comm - MPI communicator
4001 .  m - number of local rows (Cannot be PETSC_DECIDE)
4002 .  n - This value should be the same as the local size used in creating the
4003        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4004        calculated if N is given) For square matrices n is almost always m.
4005 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4006 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4007 .   i - row indices
4008 .   j - column indices
4009 -   a - matrix values
4010 
4011    Output Parameter:
4012 .   mat - the matrix
4013 
4014    Level: intermediate
4015 
4016    Notes:
4017        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4018      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4019      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4020 
4021        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4022 
4023        The format which is used for the sparse matrix input, is equivalent to a
4024     row-major ordering.. i.e for the following matrix, the input data expected is
4025     as shown:
4026 
4027         1 0 0
4028         2 0 3     P0
4029        -------
4030         4 5 6     P1
4031 
4032      Process0 [P0]: rows_owned=[0,1]
4033         i =  {0,1,3}  [size = nrow+1  = 2+1]
4034         j =  {0,0,2}  [size = nz = 6]
4035         v =  {1,2,3}  [size = nz = 6]
4036 
4037      Process1 [P1]: rows_owned=[2]
4038         i =  {0,3}    [size = nrow+1  = 1+1]
4039         j =  {0,1,2}  [size = nz = 6]
4040         v =  {4,5,6}  [size = nz = 6]
4041 
4042 .keywords: matrix, aij, compressed row, sparse, parallel
4043 
4044 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4045           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4046 @*/
4047 PetscErrorCode  MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4048 {
4049   PetscErrorCode ierr;
4050 
4051   PetscFunctionBegin;
4052   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4053   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4054   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4055   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4056   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4057   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4058   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4059   PetscFunctionReturn(0);
4060 }
4061 
4062 #undef __FUNCT__
4063 #define __FUNCT__ "MatCreateAIJ"
4064 /*@C
4065    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4066    (the default parallel PETSc format).  For good matrix assembly performance
4067    the user should preallocate the matrix storage by setting the parameters
4068    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4069    performance can be increased by more than a factor of 50.
4070 
4071    Collective on MPI_Comm
4072 
4073    Input Parameters:
4074 +  comm - MPI communicator
4075 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4076            This value should be the same as the local size used in creating the
4077            y vector for the matrix-vector product y = Ax.
4078 .  n - This value should be the same as the local size used in creating the
4079        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4080        calculated if N is given) For square matrices n is almost always m.
4081 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4082 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4083 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4084            (same value is used for all local rows)
4085 .  d_nnz - array containing the number of nonzeros in the various rows of the
4086            DIAGONAL portion of the local submatrix (possibly different for each row)
4087            or NULL, if d_nz is used to specify the nonzero structure.
4088            The size of this array is equal to the number of local rows, i.e 'm'.
4089 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4090            submatrix (same value is used for all local rows).
4091 -  o_nnz - array containing the number of nonzeros in the various rows of the
4092            OFF-DIAGONAL portion of the local submatrix (possibly different for
4093            each row) or NULL, if o_nz is used to specify the nonzero
4094            structure. The size of this array is equal to the number
4095            of local rows, i.e 'm'.
4096 
4097    Output Parameter:
4098 .  A - the matrix
4099 
4100    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4101    MatXXXXSetPreallocation() paradgm instead of this routine directly.
4102    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4103 
4104    Notes:
4105    If the *_nnz parameter is given then the *_nz parameter is ignored
4106 
4107    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4108    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4109    storage requirements for this matrix.
4110 
4111    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4112    processor than it must be used on all processors that share the object for
4113    that argument.
4114 
4115    The user MUST specify either the local or global matrix dimensions
4116    (possibly both).
4117 
4118    The parallel matrix is partitioned across processors such that the
4119    first m0 rows belong to process 0, the next m1 rows belong to
4120    process 1, the next m2 rows belong to process 2 etc.. where
4121    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4122    values corresponding to [m x N] submatrix.
4123 
4124    The columns are logically partitioned with the n0 columns belonging
4125    to 0th partition, the next n1 columns belonging to the next
4126    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4127 
4128    The DIAGONAL portion of the local submatrix on any given processor
4129    is the submatrix corresponding to the rows and columns m,n
4130    corresponding to the given processor. i.e diagonal matrix on
4131    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4132    etc. The remaining portion of the local submatrix [m x (N-n)]
4133    constitute the OFF-DIAGONAL portion. The example below better
4134    illustrates this concept.
4135 
4136    For a square global matrix we define each processor's diagonal portion
4137    to be its local rows and the corresponding columns (a square submatrix);
4138    each processor's off-diagonal portion encompasses the remainder of the
4139    local matrix (a rectangular submatrix).
4140 
4141    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4142 
4143    When calling this routine with a single process communicator, a matrix of
4144    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4145    type of communicator, use the construction mechanism:
4146      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4147 
4148    By default, this format uses inodes (identical nodes) when possible.
4149    We search for consecutive rows with the same nonzero structure, thereby
4150    reusing matrix information to achieve increased efficiency.
4151 
4152    Options Database Keys:
4153 +  -mat_no_inode  - Do not use inodes
4154 .  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4155 -  -mat_aij_oneindex - Internally use indexing starting at 1
4156         rather than 0.  Note that when calling MatSetValues(),
4157         the user still MUST index entries starting at 0!
4158 
4159 
4160    Example usage:
4161 
4162    Consider the following 8x8 matrix with 34 non-zero values, that is
4163    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4164    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4165    as follows:
4166 
4167 .vb
4168             1  2  0  |  0  3  0  |  0  4
4169     Proc0   0  5  6  |  7  0  0  |  8  0
4170             9  0 10  | 11  0  0  | 12  0
4171     -------------------------------------
4172            13  0 14  | 15 16 17  |  0  0
4173     Proc1   0 18  0  | 19 20 21  |  0  0
4174             0  0  0  | 22 23  0  | 24  0
4175     -------------------------------------
4176     Proc2  25 26 27  |  0  0 28  | 29  0
4177            30  0  0  | 31 32 33  |  0 34
4178 .ve
4179 
4180    This can be represented as a collection of submatrices as:
4181 
4182 .vb
4183       A B C
4184       D E F
4185       G H I
4186 .ve
4187 
4188    Where the submatrices A,B,C are owned by proc0, D,E,F are
4189    owned by proc1, G,H,I are owned by proc2.
4190 
4191    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4192    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4193    The 'M','N' parameters are 8,8, and have the same values on all procs.
4194 
4195    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4196    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4197    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4198    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4199    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4200    matrix, ans [DF] as another SeqAIJ matrix.
4201 
4202    When d_nz, o_nz parameters are specified, d_nz storage elements are
4203    allocated for every row of the local diagonal submatrix, and o_nz
4204    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4205    One way to choose d_nz and o_nz is to use the max nonzerors per local
4206    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4207    In this case, the values of d_nz,o_nz are:
4208 .vb
4209      proc0 : dnz = 2, o_nz = 2
4210      proc1 : dnz = 3, o_nz = 2
4211      proc2 : dnz = 1, o_nz = 4
4212 .ve
4213    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4214    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4215    for proc3. i.e we are using 12+15+10=37 storage locations to store
4216    34 values.
4217 
4218    When d_nnz, o_nnz parameters are specified, the storage is specified
4219    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4220    In the above case the values for d_nnz,o_nnz are:
4221 .vb
4222      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4223      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4224      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4225 .ve
4226    Here the space allocated is sum of all the above values i.e 34, and
4227    hence pre-allocation is perfect.
4228 
4229    Level: intermediate
4230 
4231 .keywords: matrix, aij, compressed row, sparse, parallel
4232 
4233 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4234           MPIAIJ, MatCreateMPIAIJWithArrays()
4235 @*/
4236 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4237 {
4238   PetscErrorCode ierr;
4239   PetscMPIInt    size;
4240 
4241   PetscFunctionBegin;
4242   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4243   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4244   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4245   if (size > 1) {
4246     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4247     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4248   } else {
4249     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4250     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4251   }
4252   PetscFunctionReturn(0);
4253 }
4254 
4255 #undef __FUNCT__
4256 #define __FUNCT__ "MatMPIAIJGetSeqAIJ"
4257 PetscErrorCode  MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4258 {
4259   Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data;
4260 
4261   PetscFunctionBegin;
4262   if (Ad)     *Ad     = a->A;
4263   if (Ao)     *Ao     = a->B;
4264   if (colmap) *colmap = a->garray;
4265   PetscFunctionReturn(0);
4266 }
4267 
4268 #undef __FUNCT__
4269 #define __FUNCT__ "MatSetColoring_MPIAIJ"
4270 PetscErrorCode MatSetColoring_MPIAIJ(Mat A,ISColoring coloring)
4271 {
4272   PetscErrorCode ierr;
4273   PetscInt       i;
4274   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4275 
4276   PetscFunctionBegin;
4277   if (coloring->ctype == IS_COLORING_GLOBAL) {
4278     ISColoringValue *allcolors,*colors;
4279     ISColoring      ocoloring;
4280 
4281     /* set coloring for diagonal portion */
4282     ierr = MatSetColoring_SeqAIJ(a->A,coloring);CHKERRQ(ierr);
4283 
4284     /* set coloring for off-diagonal portion */
4285     ierr = ISAllGatherColors(PetscObjectComm((PetscObject)A),coloring->n,coloring->colors,NULL,&allcolors);CHKERRQ(ierr);
4286     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4287     for (i=0; i<a->B->cmap->n; i++) {
4288       colors[i] = allcolors[a->garray[i]];
4289     }
4290     ierr = PetscFree(allcolors);CHKERRQ(ierr);
4291     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4292     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4293     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4294   } else if (coloring->ctype == IS_COLORING_GHOSTED) {
4295     ISColoringValue *colors;
4296     PetscInt        *larray;
4297     ISColoring      ocoloring;
4298 
4299     /* set coloring for diagonal portion */
4300     ierr = PetscMalloc1((a->A->cmap->n+1),&larray);CHKERRQ(ierr);
4301     for (i=0; i<a->A->cmap->n; i++) {
4302       larray[i] = i + A->cmap->rstart;
4303     }
4304     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->A->cmap->n,larray,NULL,larray);CHKERRQ(ierr);
4305     ierr = PetscMalloc1((a->A->cmap->n+1),&colors);CHKERRQ(ierr);
4306     for (i=0; i<a->A->cmap->n; i++) {
4307       colors[i] = coloring->colors[larray[i]];
4308     }
4309     ierr = PetscFree(larray);CHKERRQ(ierr);
4310     ierr = ISColoringCreate(PETSC_COMM_SELF,coloring->n,a->A->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4311     ierr = MatSetColoring_SeqAIJ(a->A,ocoloring);CHKERRQ(ierr);
4312     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4313 
4314     /* set coloring for off-diagonal portion */
4315     ierr = PetscMalloc1((a->B->cmap->n+1),&larray);CHKERRQ(ierr);
4316     ierr = ISGlobalToLocalMappingApply(A->cmap->mapping,IS_GTOLM_MASK,a->B->cmap->n,a->garray,NULL,larray);CHKERRQ(ierr);
4317     ierr = PetscMalloc1((a->B->cmap->n+1),&colors);CHKERRQ(ierr);
4318     for (i=0; i<a->B->cmap->n; i++) {
4319       colors[i] = coloring->colors[larray[i]];
4320     }
4321     ierr = PetscFree(larray);CHKERRQ(ierr);
4322     ierr = ISColoringCreate(MPI_COMM_SELF,coloring->n,a->B->cmap->n,colors,&ocoloring);CHKERRQ(ierr);
4323     ierr = MatSetColoring_SeqAIJ(a->B,ocoloring);CHKERRQ(ierr);
4324     ierr = ISColoringDestroy(&ocoloring);CHKERRQ(ierr);
4325   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support ISColoringType %d",(int)coloring->ctype);
4326   PetscFunctionReturn(0);
4327 }
4328 
4329 #undef __FUNCT__
4330 #define __FUNCT__ "MatSetValuesAdifor_MPIAIJ"
4331 PetscErrorCode MatSetValuesAdifor_MPIAIJ(Mat A,PetscInt nl,void *advalues)
4332 {
4333   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4334   PetscErrorCode ierr;
4335 
4336   PetscFunctionBegin;
4337   ierr = MatSetValuesAdifor_SeqAIJ(a->A,nl,advalues);CHKERRQ(ierr);
4338   ierr = MatSetValuesAdifor_SeqAIJ(a->B,nl,advalues);CHKERRQ(ierr);
4339   PetscFunctionReturn(0);
4340 }
4341 
4342 #undef __FUNCT__
4343 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJSymbolic"
4344 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJSymbolic(MPI_Comm comm,Mat inmat,PetscInt n,Mat *outmat)
4345 {
4346   PetscErrorCode ierr;
4347   PetscInt       m,N,i,rstart,nnz,*dnz,*onz,sum,bs,cbs;
4348   PetscInt       *indx;
4349 
4350   PetscFunctionBegin;
4351   /* This routine will ONLY return MPIAIJ type matrix */
4352   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4353   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4354   if (n == PETSC_DECIDE) {
4355     ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4356   }
4357   /* Check sum(n) = N */
4358   ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4359   if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N);
4360 
4361   ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4362   rstart -= m;
4363 
4364   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4365   for (i=0; i<m; i++) {
4366     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4367     ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4368     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4369   }
4370 
4371   ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4372   ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4373   ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4374   ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr);
4375   ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4376   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4377   PetscFunctionReturn(0);
4378 }
4379 
4380 #undef __FUNCT__
4381 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJNumeric"
4382 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJNumeric(MPI_Comm comm,Mat inmat,PetscInt n,Mat outmat)
4383 {
4384   PetscErrorCode ierr;
4385   PetscInt       m,N,i,rstart,nnz,Ii;
4386   PetscInt       *indx;
4387   PetscScalar    *values;
4388 
4389   PetscFunctionBegin;
4390   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4391   ierr = MatGetOwnershipRange(outmat,&rstart,NULL);CHKERRQ(ierr);
4392   for (i=0; i<m; i++) {
4393     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4394     Ii   = i + rstart;
4395     ierr = MatSetValues(outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4396     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4397   }
4398   ierr = MatAssemblyBegin(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4399   ierr = MatAssemblyEnd(outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4400   PetscFunctionReturn(0);
4401 }
4402 
4403 #undef __FUNCT__
4404 #define __FUNCT__ "MatCreateMPIAIJConcatenateSeqAIJ"
4405 /*@
4406       MatCreateMPIAIJConcatenateSeqAIJ - Creates a single large PETSc matrix by concatenating sequential
4407                  matrices from each processor
4408 
4409     Collective on MPI_Comm
4410 
4411    Input Parameters:
4412 +    comm - the communicators the parallel matrix will live on
4413 .    inmat - the input sequential matrices
4414 .    n - number of local columns (or PETSC_DECIDE)
4415 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4416 
4417    Output Parameter:
4418 .    outmat - the parallel matrix generated
4419 
4420     Level: advanced
4421 
4422    Notes: The number of columns of the matrix in EACH processor MUST be the same.
4423 
4424 @*/
4425 PetscErrorCode  MatCreateMPIAIJConcatenateSeqAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4426 {
4427   PetscErrorCode ierr;
4428   PetscMPIInt    size;
4429 
4430   PetscFunctionBegin;
4431   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4432   ierr = PetscLogEventBegin(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4433   if (size == 1) {
4434     if (scall == MAT_INITIAL_MATRIX) {
4435       ierr = MatDuplicate(inmat,MAT_COPY_VALUES,outmat);CHKERRQ(ierr);
4436     } else {
4437       ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4438     }
4439   } else {
4440     if (scall == MAT_INITIAL_MATRIX) {
4441       ierr = MatCreateMPIAIJConcatenateSeqAIJSymbolic(comm,inmat,n,outmat);CHKERRQ(ierr);
4442     }
4443     ierr = MatCreateMPIAIJConcatenateSeqAIJNumeric(comm,inmat,n,*outmat);CHKERRQ(ierr);
4444   }
4445   ierr = PetscLogEventEnd(MAT_Merge,inmat,0,0,0);CHKERRQ(ierr);
4446   PetscFunctionReturn(0);
4447 }
4448 
4449 #undef __FUNCT__
4450 #define __FUNCT__ "MatFileSplit"
4451 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4452 {
4453   PetscErrorCode    ierr;
4454   PetscMPIInt       rank;
4455   PetscInt          m,N,i,rstart,nnz;
4456   size_t            len;
4457   const PetscInt    *indx;
4458   PetscViewer       out;
4459   char              *name;
4460   Mat               B;
4461   const PetscScalar *values;
4462 
4463   PetscFunctionBegin;
4464   ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr);
4465   ierr = MatGetSize(A,0,&N);CHKERRQ(ierr);
4466   /* Should this be the type of the diagonal block of A? */
4467   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4468   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4469   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4470   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4471   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4472   ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr);
4473   for (i=0; i<m; i++) {
4474     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4475     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4476     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4477   }
4478   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4479   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4480 
4481   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
4482   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4483   ierr = PetscMalloc1((len+5),&name);CHKERRQ(ierr);
4484   sprintf(name,"%s.%d",outfile,rank);
4485   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4486   ierr = PetscFree(name);CHKERRQ(ierr);
4487   ierr = MatView(B,out);CHKERRQ(ierr);
4488   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4489   ierr = MatDestroy(&B);CHKERRQ(ierr);
4490   PetscFunctionReturn(0);
4491 }
4492 
4493 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
4494 #undef __FUNCT__
4495 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI"
4496 PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat A)
4497 {
4498   PetscErrorCode      ierr;
4499   Mat_Merge_SeqsToMPI *merge;
4500   PetscContainer      container;
4501 
4502   PetscFunctionBegin;
4503   ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4504   if (container) {
4505     ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4506     ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4507     ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4508     ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4509     ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4510     ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4511     ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4512     ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4513     ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4514     ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4515     ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4516     ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4517     ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4518     ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4519     ierr = PetscFree(merge);CHKERRQ(ierr);
4520     ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr);
4521   }
4522   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
4523   PetscFunctionReturn(0);
4524 }
4525 
4526 #include <../src/mat/utils/freespace.h>
4527 #include <petscbt.h>
4528 
4529 #undef __FUNCT__
4530 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric"
4531 PetscErrorCode  MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4532 {
4533   PetscErrorCode      ierr;
4534   MPI_Comm            comm;
4535   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4536   PetscMPIInt         size,rank,taga,*len_s;
4537   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4538   PetscInt            proc,m;
4539   PetscInt            **buf_ri,**buf_rj;
4540   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4541   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4542   MPI_Request         *s_waits,*r_waits;
4543   MPI_Status          *status;
4544   MatScalar           *aa=a->a;
4545   MatScalar           **abuf_r,*ba_i;
4546   Mat_Merge_SeqsToMPI *merge;
4547   PetscContainer      container;
4548 
4549   PetscFunctionBegin;
4550   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4551   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4552 
4553   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4554   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4555 
4556   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4557   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4558 
4559   bi     = merge->bi;
4560   bj     = merge->bj;
4561   buf_ri = merge->buf_ri;
4562   buf_rj = merge->buf_rj;
4563 
4564   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4565   owners = merge->rowmap->range;
4566   len_s  = merge->len_s;
4567 
4568   /* send and recv matrix values */
4569   /*-----------------------------*/
4570   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4571   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4572 
4573   ierr = PetscMalloc1((merge->nsend+1),&s_waits);CHKERRQ(ierr);
4574   for (proc=0,k=0; proc<size; proc++) {
4575     if (!len_s[proc]) continue;
4576     i    = owners[proc];
4577     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr);
4578     k++;
4579   }
4580 
4581   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);}
4582   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);}
4583   ierr = PetscFree(status);CHKERRQ(ierr);
4584 
4585   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4586   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4587 
4588   /* insert mat values of mpimat */
4589   /*----------------------------*/
4590   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4591   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4592 
4593   for (k=0; k<merge->nrecv; k++) {
4594     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4595     nrows       = *(buf_ri_k[k]);
4596     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4597     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4598   }
4599 
4600   /* set values of ba */
4601   m = merge->rowmap->n;
4602   for (i=0; i<m; i++) {
4603     arow = owners[rank] + i;
4604     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4605     bnzi = bi[i+1] - bi[i];
4606     ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr);
4607 
4608     /* add local non-zero vals of this proc's seqmat into ba */
4609     anzi   = ai[arow+1] - ai[arow];
4610     aj     = a->j + ai[arow];
4611     aa     = a->a + ai[arow];
4612     nextaj = 0;
4613     for (j=0; nextaj<anzi; j++) {
4614       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4615         ba_i[j] += aa[nextaj++];
4616       }
4617     }
4618 
4619     /* add received vals into ba */
4620     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4621       /* i-th row */
4622       if (i == *nextrow[k]) {
4623         anzi   = *(nextai[k]+1) - *nextai[k];
4624         aj     = buf_rj[k] + *(nextai[k]);
4625         aa     = abuf_r[k] + *(nextai[k]);
4626         nextaj = 0;
4627         for (j=0; nextaj<anzi; j++) {
4628           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4629             ba_i[j] += aa[nextaj++];
4630           }
4631         }
4632         nextrow[k]++; nextai[k]++;
4633       }
4634     }
4635     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4636   }
4637   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4638   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4639 
4640   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4641   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4642   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4643   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4644   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4645   PetscFunctionReturn(0);
4646 }
4647 
4648 extern PetscErrorCode  MatDestroy_MPIAIJ_SeqsToMPI(Mat);
4649 
4650 #undef __FUNCT__
4651 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic"
4652 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4653 {
4654   PetscErrorCode      ierr;
4655   Mat                 B_mpi;
4656   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4657   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4658   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4659   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4660   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4661   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4662   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4663   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4664   MPI_Status          *status;
4665   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4666   PetscBT             lnkbt;
4667   Mat_Merge_SeqsToMPI *merge;
4668   PetscContainer      container;
4669 
4670   PetscFunctionBegin;
4671   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4672 
4673   /* make sure it is a PETSc comm */
4674   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4675   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4676   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
4677 
4678   ierr = PetscNew(&merge);CHKERRQ(ierr);
4679   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4680 
4681   /* determine row ownership */
4682   /*---------------------------------------------------------*/
4683   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4684   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4685   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4686   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4687   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4688   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4689   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4690 
4691   m      = merge->rowmap->n;
4692   owners = merge->rowmap->range;
4693 
4694   /* determine the number of messages to send, their lengths */
4695   /*---------------------------------------------------------*/
4696   len_s = merge->len_s;
4697 
4698   len          = 0; /* length of buf_si[] */
4699   merge->nsend = 0;
4700   for (proc=0; proc<size; proc++) {
4701     len_si[proc] = 0;
4702     if (proc == rank) {
4703       len_s[proc] = 0;
4704     } else {
4705       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4706       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4707     }
4708     if (len_s[proc]) {
4709       merge->nsend++;
4710       nrows = 0;
4711       for (i=owners[proc]; i<owners[proc+1]; i++) {
4712         if (ai[i+1] > ai[i]) nrows++;
4713       }
4714       len_si[proc] = 2*(nrows+1);
4715       len         += len_si[proc];
4716     }
4717   }
4718 
4719   /* determine the number and length of messages to receive for ij-structure */
4720   /*-------------------------------------------------------------------------*/
4721   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4722   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4723 
4724   /* post the Irecv of j-structure */
4725   /*-------------------------------*/
4726   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4727   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4728 
4729   /* post the Isend of j-structure */
4730   /*--------------------------------*/
4731   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4732 
4733   for (proc=0, k=0; proc<size; proc++) {
4734     if (!len_s[proc]) continue;
4735     i    = owners[proc];
4736     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr);
4737     k++;
4738   }
4739 
4740   /* receives and sends of j-structure are complete */
4741   /*------------------------------------------------*/
4742   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);}
4743   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);}
4744 
4745   /* send and recv i-structure */
4746   /*---------------------------*/
4747   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4748   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4749 
4750   ierr   = PetscMalloc1((len+1),&buf_s);CHKERRQ(ierr);
4751   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4752   for (proc=0,k=0; proc<size; proc++) {
4753     if (!len_s[proc]) continue;
4754     /* form outgoing message for i-structure:
4755          buf_si[0]:                 nrows to be sent
4756                [1:nrows]:           row index (global)
4757                [nrows+1:2*nrows+1]: i-structure index
4758     */
4759     /*-------------------------------------------*/
4760     nrows       = len_si[proc]/2 - 1;
4761     buf_si_i    = buf_si + nrows+1;
4762     buf_si[0]   = nrows;
4763     buf_si_i[0] = 0;
4764     nrows       = 0;
4765     for (i=owners[proc]; i<owners[proc+1]; i++) {
4766       anzi = ai[i+1] - ai[i];
4767       if (anzi) {
4768         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4769         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4770         nrows++;
4771       }
4772     }
4773     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr);
4774     k++;
4775     buf_si += len_si[proc];
4776   }
4777 
4778   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);}
4779   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);}
4780 
4781   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4782   for (i=0; i<merge->nrecv; i++) {
4783     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4784   }
4785 
4786   ierr = PetscFree(len_si);CHKERRQ(ierr);
4787   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4788   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4789   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4790   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4791   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4792   ierr = PetscFree(status);CHKERRQ(ierr);
4793 
4794   /* compute a local seq matrix in each processor */
4795   /*----------------------------------------------*/
4796   /* allocate bi array and free space for accumulating nonzero column info */
4797   ierr  = PetscMalloc1((m+1),&bi);CHKERRQ(ierr);
4798   bi[0] = 0;
4799 
4800   /* create and initialize a linked list */
4801   nlnk = N+1;
4802   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4803 
4804   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4805   len  = ai[owners[rank+1]] - ai[owners[rank]];
4806   ierr = PetscFreeSpaceGet((PetscInt)(2*len+1),&free_space);CHKERRQ(ierr);
4807 
4808   current_space = free_space;
4809 
4810   /* determine symbolic info for each local row */
4811   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4812 
4813   for (k=0; k<merge->nrecv; k++) {
4814     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4815     nrows       = *buf_ri_k[k];
4816     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4817     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4818   }
4819 
4820   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4821   len  = 0;
4822   for (i=0; i<m; i++) {
4823     bnzi = 0;
4824     /* add local non-zero cols of this proc's seqmat into lnk */
4825     arow  = owners[rank] + i;
4826     anzi  = ai[arow+1] - ai[arow];
4827     aj    = a->j + ai[arow];
4828     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4829     bnzi += nlnk;
4830     /* add received col data into lnk */
4831     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4832       if (i == *nextrow[k]) { /* i-th row */
4833         anzi  = *(nextai[k]+1) - *nextai[k];
4834         aj    = buf_rj[k] + *nextai[k];
4835         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4836         bnzi += nlnk;
4837         nextrow[k]++; nextai[k]++;
4838       }
4839     }
4840     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4841 
4842     /* if free space is not available, make more free space */
4843     if (current_space->local_remaining<bnzi) {
4844       ierr = PetscFreeSpaceGet(bnzi+current_space->total_array_size,&current_space);CHKERRQ(ierr);
4845       nspacedouble++;
4846     }
4847     /* copy data into free space, then initialize lnk */
4848     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4849     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4850 
4851     current_space->array           += bnzi;
4852     current_space->local_used      += bnzi;
4853     current_space->local_remaining -= bnzi;
4854 
4855     bi[i+1] = bi[i] + bnzi;
4856   }
4857 
4858   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4859 
4860   ierr = PetscMalloc1((bi[m]+1),&bj);CHKERRQ(ierr);
4861   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4862   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4863 
4864   /* create symbolic parallel matrix B_mpi */
4865   /*---------------------------------------*/
4866   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4867   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4868   if (n==PETSC_DECIDE) {
4869     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4870   } else {
4871     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4872   }
4873   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4874   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4875   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4876   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4877   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4878 
4879   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4880   B_mpi->assembled    = PETSC_FALSE;
4881   B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI;
4882   merge->bi           = bi;
4883   merge->bj           = bj;
4884   merge->buf_ri       = buf_ri;
4885   merge->buf_rj       = buf_rj;
4886   merge->coi          = NULL;
4887   merge->coj          = NULL;
4888   merge->owners_co    = NULL;
4889 
4890   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4891 
4892   /* attach the supporting struct to B_mpi for reuse */
4893   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4894   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4895   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4896   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4897   *mpimat = B_mpi;
4898 
4899   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4900   PetscFunctionReturn(0);
4901 }
4902 
4903 #undef __FUNCT__
4904 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ"
4905 /*@C
4906       MatCreateMPIAIJSumSeqAIJ - Creates a MPIAIJ matrix by adding sequential
4907                  matrices from each processor
4908 
4909     Collective on MPI_Comm
4910 
4911    Input Parameters:
4912 +    comm - the communicators the parallel matrix will live on
4913 .    seqmat - the input sequential matrices
4914 .    m - number of local rows (or PETSC_DECIDE)
4915 .    n - number of local columns (or PETSC_DECIDE)
4916 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4917 
4918    Output Parameter:
4919 .    mpimat - the parallel matrix generated
4920 
4921     Level: advanced
4922 
4923    Notes:
4924      The dimensions of the sequential matrix in each processor MUST be the same.
4925      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4926      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4927 @*/
4928 PetscErrorCode  MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4929 {
4930   PetscErrorCode ierr;
4931   PetscMPIInt    size;
4932 
4933   PetscFunctionBegin;
4934   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
4935   if (size == 1) {
4936     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4937     if (scall == MAT_INITIAL_MATRIX) {
4938       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4939     } else {
4940       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4941     }
4942     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4943     PetscFunctionReturn(0);
4944   }
4945   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4946   if (scall == MAT_INITIAL_MATRIX) {
4947     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
4948   }
4949   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
4950   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4951   PetscFunctionReturn(0);
4952 }
4953 
4954 #undef __FUNCT__
4955 #define __FUNCT__ "MatMPIAIJGetLocalMat"
4956 /*@
4957      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MPIAIJ matrix by taking all its local rows and putting them into a sequential vector with
4958           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
4959           with MatGetSize()
4960 
4961     Not Collective
4962 
4963    Input Parameters:
4964 +    A - the matrix
4965 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4966 
4967    Output Parameter:
4968 .    A_loc - the local sequential matrix generated
4969 
4970     Level: developer
4971 
4972 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed()
4973 
4974 @*/
4975 PetscErrorCode  MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
4976 {
4977   PetscErrorCode ierr;
4978   Mat_MPIAIJ     *mpimat=(Mat_MPIAIJ*)A->data;
4979   Mat_SeqAIJ     *mat,*a,*b;
4980   PetscInt       *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
4981   MatScalar      *aa,*ba,*cam;
4982   PetscScalar    *ca;
4983   PetscInt       am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
4984   PetscInt       *ci,*cj,col,ncols_d,ncols_o,jo;
4985   PetscBool      match;
4986 
4987   PetscFunctionBegin;
4988   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
4989   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
4990   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
4991   a = (Mat_SeqAIJ*)(mpimat->A)->data;
4992   b = (Mat_SeqAIJ*)(mpimat->B)->data;
4993   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
4994   aa = a->a; ba = b->a;
4995   if (scall == MAT_INITIAL_MATRIX) {
4996     ierr  = PetscMalloc1((1+am),&ci);CHKERRQ(ierr);
4997     ci[0] = 0;
4998     for (i=0; i<am; i++) {
4999       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5000     }
5001     ierr = PetscMalloc1((1+ci[am]),&cj);CHKERRQ(ierr);
5002     ierr = PetscMalloc1((1+ci[am]),&ca);CHKERRQ(ierr);
5003     k    = 0;
5004     for (i=0; i<am; i++) {
5005       ncols_o = bi[i+1] - bi[i];
5006       ncols_d = ai[i+1] - ai[i];
5007       /* off-diagonal portion of A */
5008       for (jo=0; jo<ncols_o; jo++) {
5009         col = cmap[*bj];
5010         if (col >= cstart) break;
5011         cj[k]   = col; bj++;
5012         ca[k++] = *ba++;
5013       }
5014       /* diagonal portion of A */
5015       for (j=0; j<ncols_d; j++) {
5016         cj[k]   = cstart + *aj++;
5017         ca[k++] = *aa++;
5018       }
5019       /* off-diagonal portion of A */
5020       for (j=jo; j<ncols_o; j++) {
5021         cj[k]   = cmap[*bj++];
5022         ca[k++] = *ba++;
5023       }
5024     }
5025     /* put together the new matrix */
5026     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5027     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5028     /* Since these are PETSc arrays, change flags to free them as necessary. */
5029     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5030     mat->free_a  = PETSC_TRUE;
5031     mat->free_ij = PETSC_TRUE;
5032     mat->nonew   = 0;
5033   } else if (scall == MAT_REUSE_MATRIX) {
5034     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5035     ci = mat->i; cj = mat->j; cam = mat->a;
5036     for (i=0; i<am; i++) {
5037       /* off-diagonal portion of A */
5038       ncols_o = bi[i+1] - bi[i];
5039       for (jo=0; jo<ncols_o; jo++) {
5040         col = cmap[*bj];
5041         if (col >= cstart) break;
5042         *cam++ = *ba++; bj++;
5043       }
5044       /* diagonal portion of A */
5045       ncols_d = ai[i+1] - ai[i];
5046       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5047       /* off-diagonal portion of A */
5048       for (j=jo; j<ncols_o; j++) {
5049         *cam++ = *ba++; bj++;
5050       }
5051     }
5052   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5053   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5054   PetscFunctionReturn(0);
5055 }
5056 
5057 #undef __FUNCT__
5058 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed"
5059 /*@C
5060      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MPIAIJ matrix by taking all its local rows and NON-ZERO columns
5061 
5062     Not Collective
5063 
5064    Input Parameters:
5065 +    A - the matrix
5066 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5067 -    row, col - index sets of rows and columns to extract (or NULL)
5068 
5069    Output Parameter:
5070 .    A_loc - the local sequential matrix generated
5071 
5072     Level: developer
5073 
5074 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5075 
5076 @*/
5077 PetscErrorCode  MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5078 {
5079   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5080   PetscErrorCode ierr;
5081   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5082   IS             isrowa,iscola;
5083   Mat            *aloc;
5084   PetscBool      match;
5085 
5086   PetscFunctionBegin;
5087   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5088   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MPIAIJ matrix as input");
5089   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5090   if (!row) {
5091     start = A->rmap->rstart; end = A->rmap->rend;
5092     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5093   } else {
5094     isrowa = *row;
5095   }
5096   if (!col) {
5097     start = A->cmap->rstart;
5098     cmap  = a->garray;
5099     nzA   = a->A->cmap->n;
5100     nzB   = a->B->cmap->n;
5101     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5102     ncols = 0;
5103     for (i=0; i<nzB; i++) {
5104       if (cmap[i] < start) idx[ncols++] = cmap[i];
5105       else break;
5106     }
5107     imark = i;
5108     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5109     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5110     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5111   } else {
5112     iscola = *col;
5113   }
5114   if (scall != MAT_INITIAL_MATRIX) {
5115     ierr    = PetscMalloc(sizeof(Mat),&aloc);CHKERRQ(ierr);
5116     aloc[0] = *A_loc;
5117   }
5118   ierr   = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5119   *A_loc = aloc[0];
5120   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5121   if (!row) {
5122     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5123   }
5124   if (!col) {
5125     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5126   }
5127   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5128   PetscFunctionReturn(0);
5129 }
5130 
5131 #undef __FUNCT__
5132 #define __FUNCT__ "MatGetBrowsOfAcols"
5133 /*@C
5134     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5135 
5136     Collective on Mat
5137 
5138    Input Parameters:
5139 +    A,B - the matrices in mpiaij format
5140 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5141 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5142 
5143    Output Parameter:
5144 +    rowb, colb - index sets of rows and columns of B to extract
5145 -    B_seq - the sequential matrix generated
5146 
5147     Level: developer
5148 
5149 @*/
5150 PetscErrorCode  MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5151 {
5152   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5153   PetscErrorCode ierr;
5154   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5155   IS             isrowb,iscolb;
5156   Mat            *bseq=NULL;
5157 
5158   PetscFunctionBegin;
5159   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5160     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5161   }
5162   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5163 
5164   if (scall == MAT_INITIAL_MATRIX) {
5165     start = A->cmap->rstart;
5166     cmap  = a->garray;
5167     nzA   = a->A->cmap->n;
5168     nzB   = a->B->cmap->n;
5169     ierr  = PetscMalloc1((nzA+nzB), &idx);CHKERRQ(ierr);
5170     ncols = 0;
5171     for (i=0; i<nzB; i++) {  /* row < local row index */
5172       if (cmap[i] < start) idx[ncols++] = cmap[i];
5173       else break;
5174     }
5175     imark = i;
5176     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5177     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5178     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5179     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5180   } else {
5181     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5182     isrowb  = *rowb; iscolb = *colb;
5183     ierr    = PetscMalloc(sizeof(Mat),&bseq);CHKERRQ(ierr);
5184     bseq[0] = *B_seq;
5185   }
5186   ierr   = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5187   *B_seq = bseq[0];
5188   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5189   if (!rowb) {
5190     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5191   } else {
5192     *rowb = isrowb;
5193   }
5194   if (!colb) {
5195     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5196   } else {
5197     *colb = iscolb;
5198   }
5199   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5200   PetscFunctionReturn(0);
5201 }
5202 
5203 #undef __FUNCT__
5204 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ"
5205 /*
5206     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5207     of the OFF-DIAGONAL portion of local A
5208 
5209     Collective on Mat
5210 
5211    Input Parameters:
5212 +    A,B - the matrices in mpiaij format
5213 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5214 
5215    Output Parameter:
5216 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5217 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5218 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5219 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5220 
5221     Level: developer
5222 
5223 */
5224 PetscErrorCode  MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5225 {
5226   VecScatter_MPI_General *gen_to,*gen_from;
5227   PetscErrorCode         ierr;
5228   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5229   Mat_SeqAIJ             *b_oth;
5230   VecScatter             ctx =a->Mvctx;
5231   MPI_Comm               comm;
5232   PetscMPIInt            *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank;
5233   PetscInt               *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj;
5234   PetscScalar            *rvalues,*svalues;
5235   MatScalar              *b_otha,*bufa,*bufA;
5236   PetscInt               i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len;
5237   MPI_Request            *rwaits = NULL,*swaits = NULL;
5238   MPI_Status             *sstatus,rstatus;
5239   PetscMPIInt            jj;
5240   PetscInt               *cols,sbs,rbs;
5241   PetscScalar            *vals;
5242 
5243   PetscFunctionBegin;
5244   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5245   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5246     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5247   }
5248   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5249   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
5250 
5251   gen_to   = (VecScatter_MPI_General*)ctx->todata;
5252   gen_from = (VecScatter_MPI_General*)ctx->fromdata;
5253   rvalues  = gen_from->values; /* holds the length of receiving row */
5254   svalues  = gen_to->values;   /* holds the length of sending row */
5255   nrecvs   = gen_from->n;
5256   nsends   = gen_to->n;
5257 
5258   ierr    = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5259   srow    = gen_to->indices;    /* local row index to be sent */
5260   sstarts = gen_to->starts;
5261   sprocs  = gen_to->procs;
5262   sstatus = gen_to->sstatus;
5263   sbs     = gen_to->bs;
5264   rstarts = gen_from->starts;
5265   rprocs  = gen_from->procs;
5266   rbs     = gen_from->bs;
5267 
5268   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5269   if (scall == MAT_INITIAL_MATRIX) {
5270     /* i-array */
5271     /*---------*/
5272     /*  post receives */
5273     for (i=0; i<nrecvs; i++) {
5274       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5275       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5276       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5277     }
5278 
5279     /* pack the outgoing message */
5280     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5281 
5282     sstartsj[0] = 0;
5283     rstartsj[0] = 0;
5284     len         = 0; /* total length of j or a array to be sent */
5285     k           = 0;
5286     for (i=0; i<nsends; i++) {
5287       rowlen = (PetscInt*)svalues + sstarts[i]*sbs;
5288       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5289       for (j=0; j<nrows; j++) {
5290         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5291         for (l=0; l<sbs; l++) {
5292           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5293 
5294           rowlen[j*sbs+l] = ncols;
5295 
5296           len += ncols;
5297           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5298         }
5299         k++;
5300       }
5301       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5302 
5303       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5304     }
5305     /* recvs and sends of i-array are completed */
5306     i = nrecvs;
5307     while (i--) {
5308       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5309     }
5310     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5311 
5312     /* allocate buffers for sending j and a arrays */
5313     ierr = PetscMalloc1((len+1),&bufj);CHKERRQ(ierr);
5314     ierr = PetscMalloc1((len+1),&bufa);CHKERRQ(ierr);
5315 
5316     /* create i-array of B_oth */
5317     ierr = PetscMalloc1((aBn+2),&b_othi);CHKERRQ(ierr);
5318 
5319     b_othi[0] = 0;
5320     len       = 0; /* total length of j or a array to be received */
5321     k         = 0;
5322     for (i=0; i<nrecvs; i++) {
5323       rowlen = (PetscInt*)rvalues + rstarts[i]*rbs;
5324       nrows  = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be recieved */
5325       for (j=0; j<nrows; j++) {
5326         b_othi[k+1] = b_othi[k] + rowlen[j];
5327         len        += rowlen[j]; k++;
5328       }
5329       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5330     }
5331 
5332     /* allocate space for j and a arrrays of B_oth */
5333     ierr = PetscMalloc1((b_othi[aBn]+1),&b_othj);CHKERRQ(ierr);
5334     ierr = PetscMalloc1((b_othi[aBn]+1),&b_otha);CHKERRQ(ierr);
5335 
5336     /* j-array */
5337     /*---------*/
5338     /*  post receives of j-array */
5339     for (i=0; i<nrecvs; i++) {
5340       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5341       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5342     }
5343 
5344     /* pack the outgoing message j-array */
5345     k = 0;
5346     for (i=0; i<nsends; i++) {
5347       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5348       bufJ  = bufj+sstartsj[i];
5349       for (j=0; j<nrows; j++) {
5350         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5351         for (ll=0; ll<sbs; ll++) {
5352           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5353           for (l=0; l<ncols; l++) {
5354             *bufJ++ = cols[l];
5355           }
5356           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5357         }
5358       }
5359       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5360     }
5361 
5362     /* recvs and sends of j-array are completed */
5363     i = nrecvs;
5364     while (i--) {
5365       ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5366     }
5367     if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5368   } else if (scall == MAT_REUSE_MATRIX) {
5369     sstartsj = *startsj_s;
5370     rstartsj = *startsj_r;
5371     bufa     = *bufa_ptr;
5372     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5373     b_otha   = b_oth->a;
5374   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5375 
5376   /* a-array */
5377   /*---------*/
5378   /*  post receives of a-array */
5379   for (i=0; i<nrecvs; i++) {
5380     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5381     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5382   }
5383 
5384   /* pack the outgoing message a-array */
5385   k = 0;
5386   for (i=0; i<nsends; i++) {
5387     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5388     bufA  = bufa+sstartsj[i];
5389     for (j=0; j<nrows; j++) {
5390       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5391       for (ll=0; ll<sbs; ll++) {
5392         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5393         for (l=0; l<ncols; l++) {
5394           *bufA++ = vals[l];
5395         }
5396         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5397       }
5398     }
5399     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr);
5400   }
5401   /* recvs and sends of a-array are completed */
5402   i = nrecvs;
5403   while (i--) {
5404     ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr);
5405   }
5406   if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);}
5407   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5408 
5409   if (scall == MAT_INITIAL_MATRIX) {
5410     /* put together the new matrix */
5411     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5412 
5413     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5414     /* Since these are PETSc arrays, change flags to free them as necessary. */
5415     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5416     b_oth->free_a  = PETSC_TRUE;
5417     b_oth->free_ij = PETSC_TRUE;
5418     b_oth->nonew   = 0;
5419 
5420     ierr = PetscFree(bufj);CHKERRQ(ierr);
5421     if (!startsj_s || !bufa_ptr) {
5422       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5423       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5424     } else {
5425       *startsj_s = sstartsj;
5426       *startsj_r = rstartsj;
5427       *bufa_ptr  = bufa;
5428     }
5429   }
5430   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5431   PetscFunctionReturn(0);
5432 }
5433 
5434 #undef __FUNCT__
5435 #define __FUNCT__ "MatGetCommunicationStructs"
5436 /*@C
5437   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5438 
5439   Not Collective
5440 
5441   Input Parameters:
5442 . A - The matrix in mpiaij format
5443 
5444   Output Parameter:
5445 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5446 . colmap - A map from global column index to local index into lvec
5447 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5448 
5449   Level: developer
5450 
5451 @*/
5452 #if defined(PETSC_USE_CTABLE)
5453 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5454 #else
5455 PetscErrorCode  MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5456 #endif
5457 {
5458   Mat_MPIAIJ *a;
5459 
5460   PetscFunctionBegin;
5461   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5462   PetscValidPointer(lvec, 2);
5463   PetscValidPointer(colmap, 3);
5464   PetscValidPointer(multScatter, 4);
5465   a = (Mat_MPIAIJ*) A->data;
5466   if (lvec) *lvec = a->lvec;
5467   if (colmap) *colmap = a->colmap;
5468   if (multScatter) *multScatter = a->Mvctx;
5469   PetscFunctionReturn(0);
5470 }
5471 
5472 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5473 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5474 PETSC_EXTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5475 
5476 #undef __FUNCT__
5477 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ"
5478 /*
5479     Computes (B'*A')' since computing B*A directly is untenable
5480 
5481                n                       p                          p
5482         (              )       (              )         (                  )
5483       m (      A       )  *  n (       B      )   =   m (         C        )
5484         (              )       (              )         (                  )
5485 
5486 */
5487 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5488 {
5489   PetscErrorCode ierr;
5490   Mat            At,Bt,Ct;
5491 
5492   PetscFunctionBegin;
5493   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5494   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5495   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr);
5496   ierr = MatDestroy(&At);CHKERRQ(ierr);
5497   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5498   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5499   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5500   PetscFunctionReturn(0);
5501 }
5502 
5503 #undef __FUNCT__
5504 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ"
5505 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C)
5506 {
5507   PetscErrorCode ierr;
5508   PetscInt       m=A->rmap->n,n=B->cmap->n;
5509   Mat            Cmat;
5510 
5511   PetscFunctionBegin;
5512   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5513   ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr);
5514   ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5515   ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr);
5516   ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr);
5517   ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr);
5518   ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5519   ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5520 
5521   Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5522 
5523   *C = Cmat;
5524   PetscFunctionReturn(0);
5525 }
5526 
5527 /* ----------------------------------------------------------------*/
5528 #undef __FUNCT__
5529 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ"
5530 PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C)
5531 {
5532   PetscErrorCode ierr;
5533 
5534   PetscFunctionBegin;
5535   if (scall == MAT_INITIAL_MATRIX) {
5536     ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5537     ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr);
5538     ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr);
5539   }
5540   ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5541   ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr);
5542   ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr);
5543   PetscFunctionReturn(0);
5544 }
5545 
5546 #if defined(PETSC_HAVE_MUMPS)
5547 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_mumps(Mat,MatFactorType,Mat*);
5548 #endif
5549 #if defined(PETSC_HAVE_PASTIX)
5550 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_pastix(Mat,MatFactorType,Mat*);
5551 #endif
5552 #if defined(PETSC_HAVE_SUPERLU_DIST)
5553 PETSC_EXTERN PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat,MatFactorType,Mat*);
5554 #endif
5555 #if defined(PETSC_HAVE_CLIQUE)
5556 PETSC_EXTERN PetscErrorCode MatGetFactor_aij_clique(Mat,MatFactorType,Mat*);
5557 #endif
5558 
5559 /*MC
5560    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
5561 
5562    Options Database Keys:
5563 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
5564 
5565   Level: beginner
5566 
5567 .seealso: MatCreateAIJ()
5568 M*/
5569 
5570 #undef __FUNCT__
5571 #define __FUNCT__ "MatCreate_MPIAIJ"
5572 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
5573 {
5574   Mat_MPIAIJ     *b;
5575   PetscErrorCode ierr;
5576   PetscMPIInt    size;
5577 
5578   PetscFunctionBegin;
5579   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
5580 
5581   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
5582   B->data       = (void*)b;
5583   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
5584   B->assembled  = PETSC_FALSE;
5585   B->insertmode = NOT_SET_VALUES;
5586   b->size       = size;
5587 
5588   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
5589 
5590   /* build cache for off array entries formed */
5591   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
5592 
5593   b->donotstash  = PETSC_FALSE;
5594   b->colmap      = 0;
5595   b->garray      = 0;
5596   b->roworiented = PETSC_TRUE;
5597 
5598   /* stuff used for matrix vector multiply */
5599   b->lvec  = NULL;
5600   b->Mvctx = NULL;
5601 
5602   /* stuff for MatGetRow() */
5603   b->rowindices   = 0;
5604   b->rowvalues    = 0;
5605   b->getrowactive = PETSC_FALSE;
5606 
5607   /* flexible pointer used in CUSP/CUSPARSE classes */
5608   b->spptr = NULL;
5609 
5610 #if defined(PETSC_HAVE_MUMPS)
5611   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_aij_mumps);CHKERRQ(ierr);
5612 #endif
5613 #if defined(PETSC_HAVE_PASTIX)
5614   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpiaij_pastix);CHKERRQ(ierr);
5615 #endif
5616 #if defined(PETSC_HAVE_SUPERLU_DIST)
5617   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_superlu_dist_C",MatGetFactor_mpiaij_superlu_dist);CHKERRQ(ierr);
5618 #endif
5619 #if defined(PETSC_HAVE_CLIQUE)
5620   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_clique_C",MatGetFactor_aij_clique);CHKERRQ(ierr);
5621 #endif
5622   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
5623   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
5624   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIAIJ);CHKERRQ(ierr);
5625   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
5626   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
5627   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
5628   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
5629   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
5630   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
5631   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
5632   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr);
5633   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr);
5634   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr);
5635   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
5636   PetscFunctionReturn(0);
5637 }
5638 
5639 #undef __FUNCT__
5640 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays"
5641 /*@
5642      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
5643          and "off-diagonal" part of the matrix in CSR format.
5644 
5645    Collective on MPI_Comm
5646 
5647    Input Parameters:
5648 +  comm - MPI communicator
5649 .  m - number of local rows (Cannot be PETSC_DECIDE)
5650 .  n - This value should be the same as the local size used in creating the
5651        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
5652        calculated if N is given) For square matrices n is almost always m.
5653 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
5654 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
5655 .   i - row indices for "diagonal" portion of matrix
5656 .   j - column indices
5657 .   a - matrix values
5658 .   oi - row indices for "off-diagonal" portion of matrix
5659 .   oj - column indices
5660 -   oa - matrix values
5661 
5662    Output Parameter:
5663 .   mat - the matrix
5664 
5665    Level: advanced
5666 
5667    Notes:
5668        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
5669        must free the arrays once the matrix has been destroyed and not before.
5670 
5671        The i and j indices are 0 based
5672 
5673        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
5674 
5675        This sets local rows and cannot be used to set off-processor values.
5676 
5677        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
5678        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
5679        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
5680        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
5681        keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
5682        communication if it is known that only local entries will be set.
5683 
5684 .keywords: matrix, aij, compressed row, sparse, parallel
5685 
5686 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
5687           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
5688 @*/
5689 PetscErrorCode  MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
5690 {
5691   PetscErrorCode ierr;
5692   Mat_MPIAIJ     *maij;
5693 
5694   PetscFunctionBegin;
5695   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
5696   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
5697   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
5698   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
5699   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
5700   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
5701   maij = (Mat_MPIAIJ*) (*mat)->data;
5702 
5703   (*mat)->preallocated = PETSC_TRUE;
5704 
5705   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
5706   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
5707 
5708   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
5709   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
5710 
5711   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5712   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5713   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5714   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5715 
5716   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5717   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5718   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
5719   PetscFunctionReturn(0);
5720 }
5721 
5722 /*
5723     Special version for direct calls from Fortran
5724 */
5725 #include <petsc-private/fortranimpl.h>
5726 
5727 #if defined(PETSC_HAVE_FORTRAN_CAPS)
5728 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
5729 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
5730 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
5731 #endif
5732 
5733 /* Change these macros so can be used in void function */
5734 #undef CHKERRQ
5735 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
5736 #undef SETERRQ2
5737 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
5738 #undef SETERRQ3
5739 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
5740 #undef SETERRQ
5741 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
5742 
5743 #undef __FUNCT__
5744 #define __FUNCT__ "matsetvaluesmpiaij_"
5745 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
5746 {
5747   Mat            mat  = *mmat;
5748   PetscInt       m    = *mm, n = *mn;
5749   InsertMode     addv = *maddv;
5750   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
5751   PetscScalar    value;
5752   PetscErrorCode ierr;
5753 
5754   MatCheckPreallocated(mat,1);
5755   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
5756 
5757 #if defined(PETSC_USE_DEBUG)
5758   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
5759 #endif
5760   {
5761     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
5762     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
5763     PetscBool roworiented = aij->roworiented;
5764 
5765     /* Some Variables required in the macro */
5766     Mat        A                 = aij->A;
5767     Mat_SeqAIJ *a                = (Mat_SeqAIJ*)A->data;
5768     PetscInt   *aimax            = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
5769     MatScalar  *aa               = a->a;
5770     PetscBool  ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
5771     Mat        B                 = aij->B;
5772     Mat_SeqAIJ *b                = (Mat_SeqAIJ*)B->data;
5773     PetscInt   *bimax            = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
5774     MatScalar  *ba               = b->a;
5775 
5776     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
5777     PetscInt  nonew = a->nonew;
5778     MatScalar *ap1,*ap2;
5779 
5780     PetscFunctionBegin;
5781     for (i=0; i<m; i++) {
5782       if (im[i] < 0) continue;
5783 #if defined(PETSC_USE_DEBUG)
5784       if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
5785 #endif
5786       if (im[i] >= rstart && im[i] < rend) {
5787         row      = im[i] - rstart;
5788         lastcol1 = -1;
5789         rp1      = aj + ai[row];
5790         ap1      = aa + ai[row];
5791         rmax1    = aimax[row];
5792         nrow1    = ailen[row];
5793         low1     = 0;
5794         high1    = nrow1;
5795         lastcol2 = -1;
5796         rp2      = bj + bi[row];
5797         ap2      = ba + bi[row];
5798         rmax2    = bimax[row];
5799         nrow2    = bilen[row];
5800         low2     = 0;
5801         high2    = nrow2;
5802 
5803         for (j=0; j<n; j++) {
5804           if (roworiented) value = v[i*n+j];
5805           else value = v[i+j*m];
5806           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue;
5807           if (in[j] >= cstart && in[j] < cend) {
5808             col = in[j] - cstart;
5809             MatSetValues_SeqAIJ_A_Private(row,col,value,addv);
5810           } else if (in[j] < 0) continue;
5811 #if defined(PETSC_USE_DEBUG)
5812           else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
5813 #endif
5814           else {
5815             if (mat->was_assembled) {
5816               if (!aij->colmap) {
5817                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
5818               }
5819 #if defined(PETSC_USE_CTABLE)
5820               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
5821               col--;
5822 #else
5823               col = aij->colmap[in[j]] - 1;
5824 #endif
5825               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
5826                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
5827                 col  =  in[j];
5828                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
5829                 B     = aij->B;
5830                 b     = (Mat_SeqAIJ*)B->data;
5831                 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
5832                 rp2   = bj + bi[row];
5833                 ap2   = ba + bi[row];
5834                 rmax2 = bimax[row];
5835                 nrow2 = bilen[row];
5836                 low2  = 0;
5837                 high2 = nrow2;
5838                 bm    = aij->B->rmap->n;
5839                 ba    = b->a;
5840               }
5841             } else col = in[j];
5842             MatSetValues_SeqAIJ_B_Private(row,col,value,addv);
5843           }
5844         }
5845       } else if (!aij->donotstash) {
5846         if (roworiented) {
5847           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5848         } else {
5849           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
5850         }
5851       }
5852     }
5853   }
5854   PetscFunctionReturnVoid();
5855 }
5856 
5857